amdidio.github.io

Project - Neural Networks for Character Recognition

In this project, I have implemented an artificial neural network for Optical Character Recognition (OCR). I will use a so-called Multilayer Perceptron (MLP) with a single hidden layer.

The code is implemented in four parts.

using PyPlot, Random, LinearAlgebra, Optim   # Packages needed

Preliminaries

Each character is an image of size 7-by-6, represented as a vector of 42 values. For this project, I used the 4 characters “MATH”. In the network, these characters will be encoded using two output variables y = (y1,y2), where y = (0,0) represents “M”, y = (0,1) represents “A”, and so on. The code below defines these images and returns the following variables:

charstr = """
          OOOOOO  OOOOOO  OOOOOO  OO..OO
          OOOOOO  OOOOOO  OOOOOO  OO..OO
          O.OO.O  OO..OO  ..OO..  OO..OO
          O.OO.O  OO..OO  ..OO..  OOOOOO
          O....O  OOOOOO  ..OO..  OO..OO
          O....O  OO..OO  ..OO..  OO..OO
          O... O  OO..OO  ..OO..  OO..OO
          """

training = reshape(collect(charstr), :, 7)
training = Int.(training[[1:6;9:14;17:22;25:30],:] .== 'O')
training = reshape(training', 7*6, 4)
target = [0 0; 0 1; 1 0; 1 1]'
mapstr = "MATH";

Defined below is the plotting function, which takes an array images with 42 rows and one column for each image, and shows the images in a grid:

function plot_chars(images)
    gray()
    n_images = size(images,2)
    for j = 1:n_images
        subplot(ceil(Int, n_images/4), 4, j)
        im = 1 .- reshape(images[:,j], 7, 6)
        imshow(im); axis("off");
    end
end
plot_chars(training)

Screen Shot 2021-04-07 at 1 19 54 PM

Part 1 - Generating noisy test characters

To test the trained OCR code, I have artificially produced noisy characters by modifying the true character images in training as follows:

The first 4 images (columns) of testdata are identical copies of the training array. The next 4 images are generated by randomly choosing 2 pixels in each training image, and flipping their values (that is, 0 becomes 1, 1 becomes 0). For the next 4 images, random 2x2=4 pixels are chosen to flip, then 2x3=6 pixels, and finally 2x4=8 pixels. This gives a total of 16 new perturbed images, and 20 columns total including the original images.

function make_testdata(training)
    testdata = zeros(Int64,42,20)
    for i = 1:42 #col 1-4
        for j = 1:4
            testdata[i,j] = training[i,j]
        end
    end
    for j = 1:4 #col 5-8
        newcol = copy(training[:,j])
        randpix1 = rand(1:42)
        randpix2 = rand(1:42)
        newcol[randpix1,1]= 1 - newcol[randpix1,1]
        newcol[randpix2,1]= 1 - newcol[randpix2,1]
        testdata[:,j+4] = newcol[:,:]
    end
    for j = 1:4 #col 9-12
        newcol = copy(training[:,j])
        randpix1 = rand(1:42)
        randpix2 = rand(1:42)
        randpix3 = rand(1:42)
        randpix4 = rand(1:42)
        newcol[randpix1,1]= 1 - newcol[randpix1,1]
        newcol[randpix2,1]= 1 - newcol[randpix2,1]
        newcol[randpix3,1]= 1 - newcol[randpix3,1]
        newcol[randpix4,1]= 1 - newcol[randpix4,1]
        testdata[:,j+8] = newcol[:,:]
    end
    for j = 1:4 #col 13-16
        newcol = copy(training[:,j])
        randpix1 = rand(1:42)
        randpix2 = rand(1:42)
        randpix3 = rand(1:42)
        randpix4 = rand(1:42)
        randpix5 = rand(1:42)
        randpix6 = rand(1:42)
        newcol[randpix1,1]= 1 - newcol[randpix1,1]
        newcol[randpix2,1]= 1 - newcol[randpix2,1]
        newcol[randpix3,1]= 1 - newcol[randpix3,1]
        newcol[randpix4,1]= 1 - newcol[randpix4,1]
        newcol[randpix5,1]= 1 - newcol[randpix5,1]
        newcol[randpix6,1]= 1 - newcol[randpix6,1]
        testdata[:,j+12] = newcol[:,:]
    end
    for j = 1:4 #col 17-20
        newcol = copy(training[:,j])
        randpix1 = rand(1:42)
        randpix2 = rand(1:42)
        randpix3 = rand(1:42)
        randpix4 = rand(1:42)
        randpix5 = rand(1:42)
        randpix6 = rand(1:42)
        randpix7 = rand(1:42)
        randpix8 = rand(1:42)
        newcol[randpix1,1]= 1 - newcol[randpix1,1]
        newcol[randpix2,1]= 1 - newcol[randpix2,1]
        newcol[randpix3,1]= 1 - newcol[randpix3,1]
        newcol[randpix4,1]= 1 - newcol[randpix4,1]
        newcol[randpix5,1]= 1 - newcol[randpix5,1]
        newcol[randpix6,1]= 1 - newcol[randpix6,1]
        newcol[randpix7,1]= 1 - newcol[randpix7,1]
        newcol[randpix8,1]= 1 - newcol[randpix8,1]
        testdata[:,j+16] = newcol[:,:]
    end    
    return testdata
end

For example, the following input:

testdata = make_testdata(training);
plot_chars(testdata)

will produce a randomly generated output, such as:

Screen Shot 2021-04-07 at 1 32 24 PM

Part 2 - Training using Stochastic Gradient Descent

train_sgd initializes V,W to normal-distributed random numbers and performs maxiter stochastic gradient descent iterations, using the step-size α as the learning rate. The function returns V,W.

function train_sgd(; maxiter=10000, rate=1)    
    V = randn(10,42)
    W = randn(2,10)
    sig(x) = 1/(1 + exp(.5-x))
    for i = 1:maxiter
        j = rand(1:4)
        k = rand(1:2)
        x = training[:,j]
        t = target[:,j]
        r = sig.(V*x) #10x1 column
        y = sig.(dot(W[k,:],r)) #scalar
        q = (y - t[k])*y*(1-y) #scalar
        u = (W[k,:]).*r.*(1 .- r)
        W_kgrad = q.*r
        Vgrad = q*u*(x')
        W[k,:] .-= W_kgrad*rate
        V .-= Vgrad*rate
    end
    return V, W
end
V,W = train_sgd()

Part 3 - Predict output for test characters

The function predict(testdata, V, W) returns a 5-by-4 character array with the predicted characters. The ideal output would be 5 rows of ‘M’, ‘A’, ‘T’, ‘H’.

function predict(testdata,V,W)
    sig(x) = 1/(1 + exp(.5-x))
    y_(x) = sig.(W*sig.(V*x))
    y_xj = []
    for i = 1:20
        x = testdata[:,i]
        push!(y_xj,y_(x))
    end
    #println(y_xj)
    for k = 1:20
    y_xj[k] = round.(Int64,y_xj[k])
    end
    #mapstr = "MATH"
    results = []
    for n = 1:20
        if y_xj[n] == [0,0]
            push!(results, mapstr[1])
        elseif y_xj[n] == [0,1]
            push!(results, mapstr[2])
        elseif y_xj[n] == [1,0]
            push!(results, mapstr[3])
        elseif y_xj[n] == [1,1]
            push!(results, mapstr[4])
        end
    end
    results = permutedims(reshape(results,4,5))
    return results
end

As such, the input predict(testdata, V, W) for the noisy test characters generated above produces the following output:

Screen Shot 2021-04-07 at 2 15 07 PM

Part 4 - True gradient descent

Alternatively, the above can be implemented using the Optim package:

function f(V_W)
    E = 0
    v_VW = V_W[1:420]
    w_VW = V_W[421:440]
    V = reshape(v_VW,10,42)
    W = reshape(w_VW,2,10)
    sig(x) = 1/(1 + exp(.5-x))
    y_(x) = sig.(W*sig.(V*x))
    for j = 1:4
        E += norm(y_(training[:,j]) - target[:,j])^2
    end
    return E/2
end

function train_optim()
    V = randn(10,42)
    W = randn(2,10)
    A = reshape(V,420,1)
    B = reshape(W,20,1)
    V_W = [A; B]
    res = optimize(f, V_W, GradientDescent(); autodiff=:forward)
    opt = Optim.minimizer(res)
    opt_V = reshape(opt[1:420],10,42)
    opt_W = reshape(opt[421:440],2,10)
    return opt_V, opt_W
end

Just as with the SGD implementation in Part 3, the following input:

plot_chars(testdata)
V,W = train_optim()
predict(testdata, V, W)

produces the same output:

Screen Shot 2021-04-07 at 2 12 51 PM