From be55de113c7886af49bac2c902cfdb4980a1d73b Mon Sep 17 00:00:00 2001 From: xdrm-brackets Date: Thu, 11 Oct 2018 23:26:36 +0200 Subject: [PATCH] backprop (?) | refactor methods (?) --- network.go | 254 +++++++++++++++++++++++++++--------------------- network_test.go | 22 ++--- 2 files changed, 153 insertions(+), 123 deletions(-) diff --git a/network.go b/network.go index a98f6c2..2c6bbd6 100644 --- a/network.go +++ b/network.go @@ -1,9 +1,7 @@ package nn import ( - "fmt" "gonum.org/v1/gonum/mat" - "math" "math/rand" "time" ) @@ -13,9 +11,9 @@ type Network struct { fed bool // whether the network has the state fed by a forward pass - Neurons []*mat.VecDense // neuron value vector for each layer (size=L) - Biases []*mat.VecDense // neuron bias vector for each layer (size=L-1) - Weights []*mat.Dense // weights between each 2-layers (size=L-1) + Neurons []*mat.Dense // neuron value vector for each layer (size=L) + Biases []*mat.Dense // neuron bias vector for each layer (size=L-1) + Weights []*mat.Dense // weights between each 2-layers (size=L-1) } const MaxLayerCount = 255 @@ -36,8 +34,8 @@ func Empty(_layers ...uint) (*Network, error) { net := &Network{ layers: _layers, fed: false, - Neurons: make([]*mat.VecDense, 0), - Biases: make([]*mat.VecDense, 0), + Neurons: make([]*mat.Dense, 0), + Biases: make([]*mat.Dense, 0), Weights: make([]*mat.Dense, 0), } @@ -49,7 +47,7 @@ func Empty(_layers ...uint) (*Network, error) { } // create neurons - net.Neurons = append(net.Neurons, mat.NewVecDense(int(layer), nil)) + net.Neurons = append(net.Neurons, mat.NewDense(int(layer), 1, nil)) // do not create weights nor biases for first layer // (no previous layer to bound to) @@ -63,7 +61,7 @@ func Empty(_layers ...uint) (*Network, error) { rand.Seed(time.Now().UnixNano()) biases = append(biases, rand.Float64()) } - biasesVec := mat.NewVecDense(int(layer), biases) + biasesVec := mat.NewDense(int(layer), 1, biases) net.Biases = append(net.Biases, biasesVec) rows, cols := int(layer), int(_layers[i-1]) @@ -86,31 +84,31 @@ func (net *Network) reset() { net.fed = false for i, _ := range net.Neurons { - net.Neurons[i] = mat.NewVecDense(int(net.layers[i]), nil) + net.Neurons[i] = mat.NewDense(int(net.layers[i]), 1, nil) } } -// Forward processes a forward propagation from an input vector +// forward processes a forward propagation from an input vector // and lets the network in the final processing state -func (net *Network) Forward(_input ...float64) ([]float64, error) { +func (net *Network) forward(_input ...float64) error { // check input size - if len(_input) < net.Neurons[0].Len() { - return nil, ErrMissingInput + if len(_input) < net.Neurons[0].ColView(0).Len() { + return ErrMissingInput } // reset neuron values net.reset() // forward input to first layer - for n, l := 0, net.Neurons[0].Len(); n < l; n++ { - net.Neurons[0].SetVec(n, _input[n]) + for n, l := 0, net.Neurons[0].ColView(0).Len(); n < l; n++ { + net.Neurons[0].Set(n, 0, _input[n]) } // process each layer from the previous one for l, ll := 1, len(net.layers); l < ll; l++ { // Z = w^l . a^(l-1) + b^l - z := new(mat.Dense) + z := net.Neurons[l] a := net.Neurons[l-1] // neurons of previous layer w := net.Weights[l-1] // shifted by 1 because no weights between layers -1 and 0 @@ -119,146 +117,178 @@ func (net *Network) Forward(_input ...float64) ([]float64, error) { z.Mul(w, a) z.Add(z, b) z.Apply(sigmoid, z) - - // copy values (first line = vector) - net.Neurons[l].CloneVec(z.ColView(0)) } net.fed = true - // format output - outputLayer := net.Neurons[len(net.Neurons)-1] - output := make([]float64, 0, net.layers[len(net.layers)-1]) - for n, l := 0, outputLayer.Len(); n < l; n++ { - output = append(output, outputLayer.AtVec(n)) - } - - return output, nil + return nil } // Cost returns the cost from the given output func (net *Network) Cost(_expect ...float64) (float64, error) { - outputLayer := net.Neurons[len(net.Neurons)-1] - - // check output size - if len(_expect) < outputLayer.Len() { - return 0, ErrMissingOutput + costVec, err := net.costVec(_expect...) + if err != nil { + return 0, err } - var Cost float64 - - // process cost - for n, l := 0, outputLayer.Len(); n < l; n++ { - Cost += math.Pow(outputLayer.AtVec(n)-_expect[n], 2) * LearningRate - } - - return Cost, nil + return mat.Sum(costVec), nil } -// CostDerVec returns the cost derivative for each output (as a vector) +// costVec returns the cost derivative for each output (as a vector) // from the given _expect data -func (net *Network) CostDerVec(_expect ...float64) (*mat.VecDense, error) { +func (net *Network) costVec(_expect ...float64) (*mat.Dense, error) { + + out := net.Neurons[len(net.Neurons)-1] + + // check output size + if len(_expect) < out.ColView(0).Len() { + return nil, ErrMissingOutput + } + + // build expect vector + expect := mat.NewDense(len(_expect), 1, _expect) + + // process cost = 1/2 * learningRate * (out - expect)^2 + cost := new(mat.Dense) + cost.Sub(out, expect) // out - expect + cost.MulElem(cost, cost) // (out - expect)^2 + cost.Mul(cost, mat.NewDense(1, 1, []float64{0.5 * LearningRate})) // 1/2 *learningRate * (out - expect)^2 + + return cost, nil +} + +// errorVec returns the cost derivative (also called ERROR) for each +// output (as a vector) from the given _expect data +func (net *Network) errorVec(_expect ...float64) (*mat.Dense, error) { outLayer := net.Neurons[len(net.Neurons)-1] // check output size - if len(_expect) < outLayer.Len() { + if len(_expect) < outLayer.ColView(0).Len() { return nil, ErrMissingOutput } - Cost := mat.NewVecDense(outLayer.Len(), nil) + // build expect vector + expect := mat.NewDense(len(_expect), 1, _expect) - // process cost - for n, expect := range _expect { - Cost.SetVec(n, LearningRate*2*(outLayer.AtVec(n)-expect)) - } + // calc cost derivative = 2 * learningRate * (expect - out) + cost := new(mat.Dense) + cost.Sub(expect, outLayer) + cost.Mul(cost, mat.NewDense(1, 1, []float64{2 * LearningRate})) - return Cost, nil + // return diff (derivative of cost) + return cost, nil } -// Backward processes the backpropagation from the current network state +// backward processes the backpropagation from the current network state // and the expected data : _expect -func (net *Network) Backward(_expect ...float64) error { +func (net *Network) backward(_expect ...float64) error { - // 0. fail on no state (no forward pass applied first) + out := net.Neurons[len(net.Neurons)-1] + + // 1. fail on no state (no forward pass applied first) if !net.fed { return ErrNoState } - // 1. Prepare receiver network - delta, err := Empty(net.layers...) + // fail on invalid _expect size + if len(_expect) != out.ColView(0).Len() { + return ErrMissingOutput + } + + // calc ERROR = 0.5 * learningRate * (expect - out) + // *it is in fact the cost derivative + errors, err := net.errorVec(_expect...) if err != nil { return err } - // 2. Get cost - cost, err := net.CostDerVec(_expect...) - if err != nil { - return err - } - // replace delta neuron values with the cost derivative - deltaOutLayer := delta.Neurons[len(delta.Neurons)-1] - for n, nl := 0, deltaOutLayer.Len(); n < nl; n++ { - deltaOutLayer.SetVec(n, cost.AtVec(n)) - } - - // 3. for each layer (except last) + // FOR EACH LAYER (from last to 1) for l := len(net.layers) - 1; l > 0; l-- { - // process weights/biases between l and (l-1) - for prev := 0; prev < int(net.layers[l-1]); prev++ { + neurons := net.Neurons[l] + previous := net.Neurons[l-1] + weights := net.Weights[l-1] // from l-1 to l + biases := net.Biases[l-1] // at l - // init sum to get the previous layers' neuron cost derivative - prevCostDer := float64(0) + // calc GRADIENTS = sigmoid'( neuron[l-1] ) + gradients := new(mat.Dense) + gradients.Apply(derivateSigmoid, neurons) + gradients.MulElem(gradients, errors) + gradients.Mul(gradients, mat.NewDense(1, 1, []float64{LearningRate})) - for cur := 0; cur < int(net.layers[l]); cur++ { + // calc WEIGHTS DELTAS = gradients . previous^T + wdeltas := new(mat.Dense) + wdeltas.Mul(gradients, previous.T()) - sigmoidDer := sigmoidToDerivative(net.Neurons[l].AtVec(cur)) - curCostDer := delta.Neurons[l].AtVec(cur) + // update weights + weights.Add(weights, wdeltas) - // bias = sigmoid' . (cost derivative of current neuron) - if prev == 0 { - bias := sigmoidDer - bias *= curCostDer - delta.Biases[l-1].SetVec(cur, bias) - } + // adjust biases + biases.Add(biases, gradients) - // weight = a^prev . sigmoid' . (cost derivative of current neuron) - weight := net.Neurons[l-1].AtVec(prev) - weight *= sigmoidDer - weight *= curCostDer - delta.Weights[l-1].Set(cur, prev, weight) - - // add each weight to derivative of the previous neuron : weight * sigmoid' * (cost derivative of current neuron) - prevCostDer += weight * sigmoidDer * curCostDer - - } - - // update previous layer neuron cost derivative - delta.Neurons[l-1].SetVec(prev, prevCostDer) - - } - - } - - // 4. Apply backpropagation - - // each bias - for b, bias := range net.Biases { - bias.SubVec(bias, delta.Biases[b]) - } - // each weight - for w, weight := range net.Weights { - weight.Sub(weight, delta.Weights[w]) - } - - outLayer := net.Neurons[len(net.Neurons)-1] - for i, l := 0, deltaOutLayer.Len(); i < l; i++ { - fmt.Printf("[out.%d.deriv] = %f - %f = %f\n", i, outLayer.AtVec(i), delta.Neurons[len(delta.Neurons)-1].AtVec(i), _expect[i]) + // update ERRORS + previousErrors := new(mat.Dense) + previousErrors.Clone(errors) + errors.Reset() + errors.Mul(weights.T(), previousErrors) } return nil } + +// Guess uses the trained network to guess output from an input +func (net *Network) Guess(_input ...float64) ([]float64, error) { + + // process feed forward + err := net.forward(_input...) + if err != nil { + return nil, err + } + + // extract output + return net.Output() + +} + +// Train uses the trained network to train with the _input and tries to learn +// to guess the _expect instead +func (net *Network) Train(_input []float64, _expect []float64) error { + + out := net.Neurons[len(net.Neurons)-1] + + // check output size + if len(_expect) != out.ColView(0).Len() { + return ErrMissingOutput + } + + // process guess subroutine + _, err := net.Guess(_input...) + if err != nil { + return err + } + + // process backward propagation + return net.backward(_expect...) + +} + +// Output returns the output data (only if the network has been fed) +func (net Network) Output() ([]float64, error) { + + if !net.fed { + return nil, ErrNoState + } + + out := net.Neurons[len(net.Neurons)-1] + output := make([]float64, 0, net.layers[len(net.layers)-1]) + for n, l := 0, out.ColView(0).Len(); n < l; n++ { + output = append(output, out.At(n, 0)) + } + + return output, nil + +} diff --git a/network_test.go b/network_test.go index 1af82ff..956d5ff 100644 --- a/network_test.go +++ b/network_test.go @@ -86,15 +86,15 @@ func TestEmptyNetworkSizes(t *testing.T) { // 4. Check each neuron layer count for n, neuron := range net.Neurons { - if uint(neuron.Len()) != test[n] { - t.Errorf("Expected %d neurons on layer %d, got %d", test[n], n, neuron.Len()) + if uint(neuron.ColView(0).Len()) != test[n] { + t.Errorf("Expected %d neurons on layer %d, got %d", test[n], n, neuron.ColView(0).Len()) } } // 5. Check each bias layer count for b, bias := range net.Biases { - if uint(bias.Len()) != test[b+1] { - t.Errorf("Expected %d biases on layer %d, got %d", test[b+1], b, bias.Len()) + if uint(bias.ColView(0).Len()) != test[b+1] { + t.Errorf("Expected %d biases on layer %d, got %d", test[b+1], b, bias.ColView(0).Len()) } } @@ -141,7 +141,7 @@ func TestForwardPass(t *testing.T) { } // apply forward pass - _, err = net.Forward(test.X...) + _, err = net.Guess(test.X...) if err != nil { t.Errorf("Unexpected error <%s>", err) break @@ -151,19 +151,19 @@ func TestForwardPass(t *testing.T) { for l, ll := 1, len(net.layers); l < ll; l++ { // each neuron = ( each previous neuron times its weight ) + neuron bias - for n, nl := 0, net.Neurons[l].Len(); n < nl; n++ { - sum := net.Biases[l-1].AtVec(n) + for n, nl := 0, net.Neurons[l].ColView(0).Len(); n < nl; n++ { + sum := net.Biases[l-1].At(n, 0) // sum each previous neuron*its weight - for i, il := 0, net.Neurons[l-1].Len(); i < il; i++ { - sum += net.Neurons[l-1].AtVec(i) * net.Weights[l-1].At(n, i) + for i, il := 0, net.Neurons[l-1].ColView(0).Len(); i < il; i++ { + sum += net.Neurons[l-1].At(i, 0) * net.Weights[l-1].At(n, i) } sum = sigmoid(0, 0, sum) // check sum - if !floats.EqualWithinAbs(net.Neurons[l].AtVec(n), sum, 1e9) { - t.Fatalf("Expected neuron %d.%d to be %f, got %f", l, n, sum, net.Neurons[l].AtVec(n)) + if !floats.EqualWithinAbs(net.Neurons[l].At(n, 0), sum, 1e9) { + t.Fatalf("Expected neuron %d.%d to be %f, got %f", l, n, sum, net.Neurons[l].At(n, 0)) } }