backprop (?) | refactor methods (?)

2018-10-11 23:26:36 +02:00 · 2018-10-11 23:26:36 +02:00 · be55de113c
parent e31b864c32
commit be55de113c
2 changed files with 153 additions and 123 deletions
--- a/network.go
+++ b/network.go
@ -1,9 +1,7 @@
 package nn

 import (
-	"fmt"
 	"gonum.org/v1/gonum/mat"
-	"math"
 	"math/rand"
 	"time"
 )
@ -13,9 +11,9 @@ type Network struct {

 	fed bool // whether the network has the state fed by a forward pass

-	Neurons []*mat.VecDense // neuron value vector for each layer (size=L)
-	Biases  []*mat.VecDense // neuron bias vector for each layer (size=L-1)
-	Weights []*mat.Dense    // weights between each 2-layers (size=L-1)
+	Neurons []*mat.Dense // neuron value vector for each layer (size=L)
+	Biases  []*mat.Dense // neuron bias vector for each layer (size=L-1)
+	Weights []*mat.Dense // weights between each 2-layers (size=L-1)
 }

 const MaxLayerCount = 255
@ -36,8 +34,8 @@ func Empty(_layers ...uint) (*Network, error) {
 	net := &Network{
 		layers:  _layers,
 		fed:     false,
-		Neurons: make([]*mat.VecDense, 0),
-		Biases:  make([]*mat.VecDense, 0),
+		Neurons: make([]*mat.Dense, 0),
+		Biases:  make([]*mat.Dense, 0),
 		Weights: make([]*mat.Dense, 0),
 	}

@ -49,7 +47,7 @@ func Empty(_layers ...uint) (*Network, error) {
 		}

 		// create neurons
-		net.Neurons = append(net.Neurons, mat.NewVecDense(int(layer), nil))
+		net.Neurons = append(net.Neurons, mat.NewDense(int(layer), 1, nil))

 		// do not create weights nor biases for first layer
 		// (no previous layer to bound to)
@ -63,7 +61,7 @@ func Empty(_layers ...uint) (*Network, error) {
 			rand.Seed(time.Now().UnixNano())
 			biases = append(biases, rand.Float64())
 		}
-		biasesVec := mat.NewVecDense(int(layer), biases)
+		biasesVec := mat.NewDense(int(layer), 1, biases)
 		net.Biases = append(net.Biases, biasesVec)

 		rows, cols := int(layer), int(_layers[i-1])
@ -86,31 +84,31 @@ func (net *Network) reset() {
 	net.fed = false

 	for i, _ := range net.Neurons {
-		net.Neurons[i] = mat.NewVecDense(int(net.layers[i]), nil)
+		net.Neurons[i] = mat.NewDense(int(net.layers[i]), 1, nil)
 	}
 }

-// Forward processes a forward propagation from an input vector
+// forward processes a forward propagation from an input vector
 // and lets the network in the final processing state
-func (net *Network) Forward(_input ...float64) ([]float64, error) {
+func (net *Network) forward(_input ...float64) error {

 	// check input size
-	if len(_input) < net.Neurons[0].Len() {
-		return nil, ErrMissingInput
+	if len(_input) < net.Neurons[0].ColView(0).Len() {
+		return ErrMissingInput
 	}
 	// reset neuron values
 	net.reset()

 	// forward input to first layer
-	for n, l := 0, net.Neurons[0].Len(); n < l; n++ {
-		net.Neurons[0].SetVec(n, _input[n])
+	for n, l := 0, net.Neurons[0].ColView(0).Len(); n < l; n++ {
+		net.Neurons[0].Set(n, 0, _input[n])
 	}

 	// process each layer from the previous one
 	for l, ll := 1, len(net.layers); l < ll; l++ {

 		// Z = w^l . a^(l-1) + b^l
-		z := new(mat.Dense)
+		z := net.Neurons[l]

 		a := net.Neurons[l-1] // neurons of previous layer
 		w := net.Weights[l-1] // shifted by 1 because no weights between layers -1 and 0
@ -119,146 +117,178 @@ func (net *Network) Forward(_input ...float64) ([]float64, error) {
 		z.Mul(w, a)
 		z.Add(z, b)
 		z.Apply(sigmoid, z)
-
-		// copy values (first line = vector)
-		net.Neurons[l].CloneVec(z.ColView(0))
 	}

 	net.fed = true

-	// format output
-	outputLayer := net.Neurons[len(net.Neurons)-1]
-	output := make([]float64, 0, net.layers[len(net.layers)-1])
-	for n, l := 0, outputLayer.Len(); n < l; n++ {
-		output = append(output, outputLayer.AtVec(n))
-	}
-
-	return output, nil
+	return nil

 }

 // Cost returns the cost from the given output
 func (net *Network) Cost(_expect ...float64) (float64, error) {

-	outputLayer := net.Neurons[len(net.Neurons)-1]
-
-	// check output size
-	if len(_expect) < outputLayer.Len() {
-		return 0, ErrMissingOutput
+	costVec, err := net.costVec(_expect...)
+	if err != nil {
+		return 0, err
 	}

-	var Cost float64
-
-	// process cost
-	for n, l := 0, outputLayer.Len(); n < l; n++ {
-		Cost += math.Pow(outputLayer.AtVec(n)-_expect[n], 2) * LearningRate
-	}
-
-	return Cost, nil
+	return mat.Sum(costVec), nil
 }

-// CostDerVec returns the cost derivative for each output (as a vector)
+// costVec returns the cost derivative for each output (as a vector)
 // from the given _expect data
-func (net *Network) CostDerVec(_expect ...float64) (*mat.VecDense, error) {
+func (net *Network) costVec(_expect ...float64) (*mat.Dense, error) {
+
+	out := net.Neurons[len(net.Neurons)-1]
+
+	// check output size
+	if len(_expect) < out.ColView(0).Len() {
+		return nil, ErrMissingOutput
+	}
+
+	// build expect vector
+	expect := mat.NewDense(len(_expect), 1, _expect)
+
+	// process cost = 1/2 * learningRate * (out - expect)^2
+	cost := new(mat.Dense)
+	cost.Sub(out, expect)                                             // out - expect
+	cost.MulElem(cost, cost)                                          // (out - expect)^2
+	cost.Mul(cost, mat.NewDense(1, 1, []float64{0.5 * LearningRate})) // 1/2 *learningRate * (out - expect)^2
+
+	return cost, nil
+}
+
+// errorVec returns the cost derivative  (also called ERROR) for each
+// output (as a vector) from the given _expect data
+func (net *Network) errorVec(_expect ...float64) (*mat.Dense, error) {

 	outLayer := net.Neurons[len(net.Neurons)-1]

 	// check output size
-	if len(_expect) < outLayer.Len() {
+	if len(_expect) < outLayer.ColView(0).Len() {
 		return nil, ErrMissingOutput
 	}

-	Cost := mat.NewVecDense(outLayer.Len(), nil)
+	// build expect vector
+	expect := mat.NewDense(len(_expect), 1, _expect)

-	// process cost
-	for n, expect := range _expect {
-		Cost.SetVec(n, LearningRate*2*(outLayer.AtVec(n)-expect))
-	}
+	// calc cost derivative = 2 * learningRate * (expect - out)
+	cost := new(mat.Dense)
+	cost.Sub(expect, outLayer)
+	cost.Mul(cost, mat.NewDense(1, 1, []float64{2 * LearningRate}))

-	return Cost, nil
+	// return diff (derivative of cost)
+	return cost, nil
 }

-// Backward processes the backpropagation from the current network state
+// backward processes the backpropagation from the current network state
 // and the expected data : _expect
-func (net *Network) Backward(_expect ...float64) error {
+func (net *Network) backward(_expect ...float64) error {

-	// 0. fail on no state (no forward pass applied first)
+	out := net.Neurons[len(net.Neurons)-1]
+
+	// 1. fail on no state (no forward pass applied first)
 	if !net.fed {
 		return ErrNoState
 	}

-	// 1. Prepare receiver network
-	delta, err := Empty(net.layers...)
+	// fail on invalid _expect size
+	if len(_expect) != out.ColView(0).Len() {
+		return ErrMissingOutput
+	}
+
+	// calc ERROR = 0.5 * learningRate * (expect - out)
+	// *it is in fact the cost derivative
+	errors, err := net.errorVec(_expect...)
 	if err != nil {
 		return err
 	}

-	// 2. Get cost
-	cost, err := net.CostDerVec(_expect...)
-	if err != nil {
-		return err
-	}
-	// replace delta neuron values with the cost derivative
-	deltaOutLayer := delta.Neurons[len(delta.Neurons)-1]
-	for n, nl := 0, deltaOutLayer.Len(); n < nl; n++ {
-		deltaOutLayer.SetVec(n, cost.AtVec(n))
-	}
-
-	// 3. for each layer (except last)
+	// FOR EACH LAYER (from last to 1)
 	for l := len(net.layers) - 1; l > 0; l-- {

-		// process weights/biases between l and (l-1)
-		for prev := 0; prev < int(net.layers[l-1]); prev++ {
+		neurons := net.Neurons[l]
+		previous := net.Neurons[l-1]
+		weights := net.Weights[l-1] // from l-1 to l
+		biases := net.Biases[l-1]   // at l

-			// init sum to get the previous layers' neuron cost derivative
-			prevCostDer := float64(0)
+		// calc GRADIENTS = sigmoid'( neuron[l-1] )
+		gradients := new(mat.Dense)
+		gradients.Apply(derivateSigmoid, neurons)
+		gradients.MulElem(gradients, errors)
+		gradients.Mul(gradients, mat.NewDense(1, 1, []float64{LearningRate}))

-			for cur := 0; cur < int(net.layers[l]); cur++ {
+		// calc WEIGHTS DELTAS = gradients . previous^T
+		wdeltas := new(mat.Dense)
+		wdeltas.Mul(gradients, previous.T())

-				sigmoidDer := sigmoidToDerivative(net.Neurons[l].AtVec(cur))
-				curCostDer := delta.Neurons[l].AtVec(cur)
+		// update weights
+		weights.Add(weights, wdeltas)

-				// bias = sigmoid' . (cost derivative of current neuron)
-				if prev == 0 {
-					bias := sigmoidDer
-					bias *= curCostDer
-					delta.Biases[l-1].SetVec(cur, bias)
-				}
+		// adjust biases
+		biases.Add(biases, gradients)

-				// weight = a^prev . sigmoid' . (cost derivative of current neuron)
-				weight := net.Neurons[l-1].AtVec(prev)
-				weight *= sigmoidDer
-				weight *= curCostDer
-				delta.Weights[l-1].Set(cur, prev, weight)
-
-				// add each weight to derivative of the previous neuron : weight * sigmoid' * (cost derivative of current neuron)
-				prevCostDer += weight * sigmoidDer * curCostDer
-
-			}
-
-			// update previous layer neuron cost derivative
-			delta.Neurons[l-1].SetVec(prev, prevCostDer)
-
-		}
-
-	}
-
-	// 4. Apply backpropagation
-
-	// each bias
-	for b, bias := range net.Biases {
-		bias.SubVec(bias, delta.Biases[b])
-	}
-	// each weight
-	for w, weight := range net.Weights {
-		weight.Sub(weight, delta.Weights[w])
-	}
-
-	outLayer := net.Neurons[len(net.Neurons)-1]
-	for i, l := 0, deltaOutLayer.Len(); i < l; i++ {
-		fmt.Printf("[out.%d.deriv] = %f - %f = %f\n", i, outLayer.AtVec(i), delta.Neurons[len(delta.Neurons)-1].AtVec(i), _expect[i])
+		// update ERRORS
+		previousErrors := new(mat.Dense)
+		previousErrors.Clone(errors)
+		errors.Reset()
+		errors.Mul(weights.T(), previousErrors)
 	}

 	return nil

 }
+
+// Guess uses the trained network to guess output from an input
+func (net *Network) Guess(_input ...float64) ([]float64, error) {
+
+	// process feed forward
+	err := net.forward(_input...)
+	if err != nil {
+		return nil, err
+	}
+
+	// extract output
+	return net.Output()
+
+}
+
+// Train uses the trained network to train with the _input and tries to learn
+// to guess the _expect instead
+func (net *Network) Train(_input []float64, _expect []float64) error {
+
+	out := net.Neurons[len(net.Neurons)-1]
+
+	// check output size
+	if len(_expect) != out.ColView(0).Len() {
+		return ErrMissingOutput
+	}
+
+	// process guess subroutine
+	_, err := net.Guess(_input...)
+	if err != nil {
+		return err
+	}
+
+	// process backward propagation
+	return net.backward(_expect...)
+
+}
+
+// Output returns the output data (only if the network has been fed)
+func (net Network) Output() ([]float64, error) {
+
+	if !net.fed {
+		return nil, ErrNoState
+	}
+
+	out := net.Neurons[len(net.Neurons)-1]
+	output := make([]float64, 0, net.layers[len(net.layers)-1])
+	for n, l := 0, out.ColView(0).Len(); n < l; n++ {
+		output = append(output, out.At(n, 0))
+	}
+
+	return output, nil
+
+}
--- a/network_test.go
+++ b/network_test.go
@ -86,15 +86,15 @@ func TestEmptyNetworkSizes(t *testing.T) {

 		// 4. Check each neuron layer count
 		for n, neuron := range net.Neurons {
-			if uint(neuron.Len()) != test[n] {
-				t.Errorf("Expected %d neurons on layer %d, got %d", test[n], n, neuron.Len())
+			if uint(neuron.ColView(0).Len()) != test[n] {
+				t.Errorf("Expected %d neurons on layer %d, got %d", test[n], n, neuron.ColView(0).Len())
 			}
 		}
 		// 5. Check each bias layer count
 		for b, bias := range net.Biases {

-			if uint(bias.Len()) != test[b+1] {
-				t.Errorf("Expected %d biases on layer %d, got %d", test[b+1], b, bias.Len())
+			if uint(bias.ColView(0).Len()) != test[b+1] {
+				t.Errorf("Expected %d biases on layer %d, got %d", test[b+1], b, bias.ColView(0).Len())
 			}

 		}
@ -141,7 +141,7 @@ func TestForwardPass(t *testing.T) {
 		}

 		// apply forward pass
-		_, err = net.Forward(test.X...)
+		_, err = net.Guess(test.X...)
 		if err != nil {
 			t.Errorf("Unexpected error <%s>", err)
 			break
@ -151,19 +151,19 @@ func TestForwardPass(t *testing.T) {
 		for l, ll := 1, len(net.layers); l < ll; l++ {

 			// each neuron = ( each previous neuron times its weight ) + neuron bias
-			for n, nl := 0, net.Neurons[l].Len(); n < nl; n++ {
-				sum := net.Biases[l-1].AtVec(n)
+			for n, nl := 0, net.Neurons[l].ColView(0).Len(); n < nl; n++ {
+				sum := net.Biases[l-1].At(n, 0)

 				// sum each previous neuron*its weight
-				for i, il := 0, net.Neurons[l-1].Len(); i < il; i++ {
-					sum += net.Neurons[l-1].AtVec(i) * net.Weights[l-1].At(n, i)
+				for i, il := 0, net.Neurons[l-1].ColView(0).Len(); i < il; i++ {
+					sum += net.Neurons[l-1].At(i, 0) * net.Weights[l-1].At(n, i)
 				}

 				sum = sigmoid(0, 0, sum)

 				// check sum
-				if !floats.EqualWithinAbs(net.Neurons[l].AtVec(n), sum, 1e9) {
-					t.Fatalf("Expected neuron %d.%d to be %f, got %f", l, n, sum, net.Neurons[l].AtVec(n))
+				if !floats.EqualWithinAbs(net.Neurons[l].At(n, 0), sum, 1e9) {
+					t.Fatalf("Expected neuron %d.%d to be %f, got %f", l, n, sum, net.Neurons[l].At(n, 0))
 				}
 			}