backprop (?) | refactor methods (?)
This commit is contained in:
parent
e31b864c32
commit
be55de113c
250
network.go
250
network.go
|
@ -1,9 +1,7 @@
|
||||||
package nn
|
package nn
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"gonum.org/v1/gonum/mat"
|
"gonum.org/v1/gonum/mat"
|
||||||
"math"
|
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
@ -13,8 +11,8 @@ type Network struct {
|
||||||
|
|
||||||
fed bool // whether the network has the state fed by a forward pass
|
fed bool // whether the network has the state fed by a forward pass
|
||||||
|
|
||||||
Neurons []*mat.VecDense // neuron value vector for each layer (size=L)
|
Neurons []*mat.Dense // neuron value vector for each layer (size=L)
|
||||||
Biases []*mat.VecDense // neuron bias vector for each layer (size=L-1)
|
Biases []*mat.Dense // neuron bias vector for each layer (size=L-1)
|
||||||
Weights []*mat.Dense // weights between each 2-layers (size=L-1)
|
Weights []*mat.Dense // weights between each 2-layers (size=L-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,8 +34,8 @@ func Empty(_layers ...uint) (*Network, error) {
|
||||||
net := &Network{
|
net := &Network{
|
||||||
layers: _layers,
|
layers: _layers,
|
||||||
fed: false,
|
fed: false,
|
||||||
Neurons: make([]*mat.VecDense, 0),
|
Neurons: make([]*mat.Dense, 0),
|
||||||
Biases: make([]*mat.VecDense, 0),
|
Biases: make([]*mat.Dense, 0),
|
||||||
Weights: make([]*mat.Dense, 0),
|
Weights: make([]*mat.Dense, 0),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,7 +47,7 @@ func Empty(_layers ...uint) (*Network, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// create neurons
|
// create neurons
|
||||||
net.Neurons = append(net.Neurons, mat.NewVecDense(int(layer), nil))
|
net.Neurons = append(net.Neurons, mat.NewDense(int(layer), 1, nil))
|
||||||
|
|
||||||
// do not create weights nor biases for first layer
|
// do not create weights nor biases for first layer
|
||||||
// (no previous layer to bound to)
|
// (no previous layer to bound to)
|
||||||
|
@ -63,7 +61,7 @@ func Empty(_layers ...uint) (*Network, error) {
|
||||||
rand.Seed(time.Now().UnixNano())
|
rand.Seed(time.Now().UnixNano())
|
||||||
biases = append(biases, rand.Float64())
|
biases = append(biases, rand.Float64())
|
||||||
}
|
}
|
||||||
biasesVec := mat.NewVecDense(int(layer), biases)
|
biasesVec := mat.NewDense(int(layer), 1, biases)
|
||||||
net.Biases = append(net.Biases, biasesVec)
|
net.Biases = append(net.Biases, biasesVec)
|
||||||
|
|
||||||
rows, cols := int(layer), int(_layers[i-1])
|
rows, cols := int(layer), int(_layers[i-1])
|
||||||
|
@ -86,31 +84,31 @@ func (net *Network) reset() {
|
||||||
net.fed = false
|
net.fed = false
|
||||||
|
|
||||||
for i, _ := range net.Neurons {
|
for i, _ := range net.Neurons {
|
||||||
net.Neurons[i] = mat.NewVecDense(int(net.layers[i]), nil)
|
net.Neurons[i] = mat.NewDense(int(net.layers[i]), 1, nil)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Forward processes a forward propagation from an input vector
|
// forward processes a forward propagation from an input vector
|
||||||
// and lets the network in the final processing state
|
// and lets the network in the final processing state
|
||||||
func (net *Network) Forward(_input ...float64) ([]float64, error) {
|
func (net *Network) forward(_input ...float64) error {
|
||||||
|
|
||||||
// check input size
|
// check input size
|
||||||
if len(_input) < net.Neurons[0].Len() {
|
if len(_input) < net.Neurons[0].ColView(0).Len() {
|
||||||
return nil, ErrMissingInput
|
return ErrMissingInput
|
||||||
}
|
}
|
||||||
// reset neuron values
|
// reset neuron values
|
||||||
net.reset()
|
net.reset()
|
||||||
|
|
||||||
// forward input to first layer
|
// forward input to first layer
|
||||||
for n, l := 0, net.Neurons[0].Len(); n < l; n++ {
|
for n, l := 0, net.Neurons[0].ColView(0).Len(); n < l; n++ {
|
||||||
net.Neurons[0].SetVec(n, _input[n])
|
net.Neurons[0].Set(n, 0, _input[n])
|
||||||
}
|
}
|
||||||
|
|
||||||
// process each layer from the previous one
|
// process each layer from the previous one
|
||||||
for l, ll := 1, len(net.layers); l < ll; l++ {
|
for l, ll := 1, len(net.layers); l < ll; l++ {
|
||||||
|
|
||||||
// Z = w^l . a^(l-1) + b^l
|
// Z = w^l . a^(l-1) + b^l
|
||||||
z := new(mat.Dense)
|
z := net.Neurons[l]
|
||||||
|
|
||||||
a := net.Neurons[l-1] // neurons of previous layer
|
a := net.Neurons[l-1] // neurons of previous layer
|
||||||
w := net.Weights[l-1] // shifted by 1 because no weights between layers -1 and 0
|
w := net.Weights[l-1] // shifted by 1 because no weights between layers -1 and 0
|
||||||
|
@ -119,146 +117,178 @@ func (net *Network) Forward(_input ...float64) ([]float64, error) {
|
||||||
z.Mul(w, a)
|
z.Mul(w, a)
|
||||||
z.Add(z, b)
|
z.Add(z, b)
|
||||||
z.Apply(sigmoid, z)
|
z.Apply(sigmoid, z)
|
||||||
|
|
||||||
// copy values (first line = vector)
|
|
||||||
net.Neurons[l].CloneVec(z.ColView(0))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
net.fed = true
|
net.fed = true
|
||||||
|
|
||||||
// format output
|
return nil
|
||||||
outputLayer := net.Neurons[len(net.Neurons)-1]
|
|
||||||
output := make([]float64, 0, net.layers[len(net.layers)-1])
|
|
||||||
for n, l := 0, outputLayer.Len(); n < l; n++ {
|
|
||||||
output = append(output, outputLayer.AtVec(n))
|
|
||||||
}
|
|
||||||
|
|
||||||
return output, nil
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cost returns the cost from the given output
|
// Cost returns the cost from the given output
|
||||||
func (net *Network) Cost(_expect ...float64) (float64, error) {
|
func (net *Network) Cost(_expect ...float64) (float64, error) {
|
||||||
|
|
||||||
outputLayer := net.Neurons[len(net.Neurons)-1]
|
costVec, err := net.costVec(_expect...)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return mat.Sum(costVec), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// costVec returns the cost derivative for each output (as a vector)
|
||||||
|
// from the given _expect data
|
||||||
|
func (net *Network) costVec(_expect ...float64) (*mat.Dense, error) {
|
||||||
|
|
||||||
|
out := net.Neurons[len(net.Neurons)-1]
|
||||||
|
|
||||||
// check output size
|
// check output size
|
||||||
if len(_expect) < outputLayer.Len() {
|
if len(_expect) < out.ColView(0).Len() {
|
||||||
return 0, ErrMissingOutput
|
return nil, ErrMissingOutput
|
||||||
}
|
}
|
||||||
|
|
||||||
var Cost float64
|
// build expect vector
|
||||||
|
expect := mat.NewDense(len(_expect), 1, _expect)
|
||||||
|
|
||||||
// process cost
|
// process cost = 1/2 * learningRate * (out - expect)^2
|
||||||
for n, l := 0, outputLayer.Len(); n < l; n++ {
|
cost := new(mat.Dense)
|
||||||
Cost += math.Pow(outputLayer.AtVec(n)-_expect[n], 2) * LearningRate
|
cost.Sub(out, expect) // out - expect
|
||||||
|
cost.MulElem(cost, cost) // (out - expect)^2
|
||||||
|
cost.Mul(cost, mat.NewDense(1, 1, []float64{0.5 * LearningRate})) // 1/2 *learningRate * (out - expect)^2
|
||||||
|
|
||||||
|
return cost, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return Cost, nil
|
// errorVec returns the cost derivative (also called ERROR) for each
|
||||||
}
|
// output (as a vector) from the given _expect data
|
||||||
|
func (net *Network) errorVec(_expect ...float64) (*mat.Dense, error) {
|
||||||
// CostDerVec returns the cost derivative for each output (as a vector)
|
|
||||||
// from the given _expect data
|
|
||||||
func (net *Network) CostDerVec(_expect ...float64) (*mat.VecDense, error) {
|
|
||||||
|
|
||||||
outLayer := net.Neurons[len(net.Neurons)-1]
|
outLayer := net.Neurons[len(net.Neurons)-1]
|
||||||
|
|
||||||
// check output size
|
// check output size
|
||||||
if len(_expect) < outLayer.Len() {
|
if len(_expect) < outLayer.ColView(0).Len() {
|
||||||
return nil, ErrMissingOutput
|
return nil, ErrMissingOutput
|
||||||
}
|
}
|
||||||
|
|
||||||
Cost := mat.NewVecDense(outLayer.Len(), nil)
|
// build expect vector
|
||||||
|
expect := mat.NewDense(len(_expect), 1, _expect)
|
||||||
|
|
||||||
// process cost
|
// calc cost derivative = 2 * learningRate * (expect - out)
|
||||||
for n, expect := range _expect {
|
cost := new(mat.Dense)
|
||||||
Cost.SetVec(n, LearningRate*2*(outLayer.AtVec(n)-expect))
|
cost.Sub(expect, outLayer)
|
||||||
|
cost.Mul(cost, mat.NewDense(1, 1, []float64{2 * LearningRate}))
|
||||||
|
|
||||||
|
// return diff (derivative of cost)
|
||||||
|
return cost, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return Cost, nil
|
// backward processes the backpropagation from the current network state
|
||||||
}
|
|
||||||
|
|
||||||
// Backward processes the backpropagation from the current network state
|
|
||||||
// and the expected data : _expect
|
// and the expected data : _expect
|
||||||
func (net *Network) Backward(_expect ...float64) error {
|
func (net *Network) backward(_expect ...float64) error {
|
||||||
|
|
||||||
// 0. fail on no state (no forward pass applied first)
|
out := net.Neurons[len(net.Neurons)-1]
|
||||||
|
|
||||||
|
// 1. fail on no state (no forward pass applied first)
|
||||||
if !net.fed {
|
if !net.fed {
|
||||||
return ErrNoState
|
return ErrNoState
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. Prepare receiver network
|
// fail on invalid _expect size
|
||||||
delta, err := Empty(net.layers...)
|
if len(_expect) != out.ColView(0).Len() {
|
||||||
|
return ErrMissingOutput
|
||||||
|
}
|
||||||
|
|
||||||
|
// calc ERROR = 0.5 * learningRate * (expect - out)
|
||||||
|
// *it is in fact the cost derivative
|
||||||
|
errors, err := net.errorVec(_expect...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Get cost
|
// FOR EACH LAYER (from last to 1)
|
||||||
cost, err := net.CostDerVec(_expect...)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// replace delta neuron values with the cost derivative
|
|
||||||
deltaOutLayer := delta.Neurons[len(delta.Neurons)-1]
|
|
||||||
for n, nl := 0, deltaOutLayer.Len(); n < nl; n++ {
|
|
||||||
deltaOutLayer.SetVec(n, cost.AtVec(n))
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. for each layer (except last)
|
|
||||||
for l := len(net.layers) - 1; l > 0; l-- {
|
for l := len(net.layers) - 1; l > 0; l-- {
|
||||||
|
|
||||||
// process weights/biases between l and (l-1)
|
neurons := net.Neurons[l]
|
||||||
for prev := 0; prev < int(net.layers[l-1]); prev++ {
|
previous := net.Neurons[l-1]
|
||||||
|
weights := net.Weights[l-1] // from l-1 to l
|
||||||
|
biases := net.Biases[l-1] // at l
|
||||||
|
|
||||||
// init sum to get the previous layers' neuron cost derivative
|
// calc GRADIENTS = sigmoid'( neuron[l-1] )
|
||||||
prevCostDer := float64(0)
|
gradients := new(mat.Dense)
|
||||||
|
gradients.Apply(derivateSigmoid, neurons)
|
||||||
|
gradients.MulElem(gradients, errors)
|
||||||
|
gradients.Mul(gradients, mat.NewDense(1, 1, []float64{LearningRate}))
|
||||||
|
|
||||||
for cur := 0; cur < int(net.layers[l]); cur++ {
|
// calc WEIGHTS DELTAS = gradients . previous^T
|
||||||
|
wdeltas := new(mat.Dense)
|
||||||
|
wdeltas.Mul(gradients, previous.T())
|
||||||
|
|
||||||
sigmoidDer := sigmoidToDerivative(net.Neurons[l].AtVec(cur))
|
// update weights
|
||||||
curCostDer := delta.Neurons[l].AtVec(cur)
|
weights.Add(weights, wdeltas)
|
||||||
|
|
||||||
// bias = sigmoid' . (cost derivative of current neuron)
|
// adjust biases
|
||||||
if prev == 0 {
|
biases.Add(biases, gradients)
|
||||||
bias := sigmoidDer
|
|
||||||
bias *= curCostDer
|
|
||||||
delta.Biases[l-1].SetVec(cur, bias)
|
|
||||||
}
|
|
||||||
|
|
||||||
// weight = a^prev . sigmoid' . (cost derivative of current neuron)
|
// update ERRORS
|
||||||
weight := net.Neurons[l-1].AtVec(prev)
|
previousErrors := new(mat.Dense)
|
||||||
weight *= sigmoidDer
|
previousErrors.Clone(errors)
|
||||||
weight *= curCostDer
|
errors.Reset()
|
||||||
delta.Weights[l-1].Set(cur, prev, weight)
|
errors.Mul(weights.T(), previousErrors)
|
||||||
|
|
||||||
// add each weight to derivative of the previous neuron : weight * sigmoid' * (cost derivative of current neuron)
|
|
||||||
prevCostDer += weight * sigmoidDer * curCostDer
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// update previous layer neuron cost derivative
|
|
||||||
delta.Neurons[l-1].SetVec(prev, prevCostDer)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Apply backpropagation
|
|
||||||
|
|
||||||
// each bias
|
|
||||||
for b, bias := range net.Biases {
|
|
||||||
bias.SubVec(bias, delta.Biases[b])
|
|
||||||
}
|
|
||||||
// each weight
|
|
||||||
for w, weight := range net.Weights {
|
|
||||||
weight.Sub(weight, delta.Weights[w])
|
|
||||||
}
|
|
||||||
|
|
||||||
outLayer := net.Neurons[len(net.Neurons)-1]
|
|
||||||
for i, l := 0, deltaOutLayer.Len(); i < l; i++ {
|
|
||||||
fmt.Printf("[out.%d.deriv] = %f - %f = %f\n", i, outLayer.AtVec(i), delta.Neurons[len(delta.Neurons)-1].AtVec(i), _expect[i])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Guess uses the trained network to guess output from an input
|
||||||
|
func (net *Network) Guess(_input ...float64) ([]float64, error) {
|
||||||
|
|
||||||
|
// process feed forward
|
||||||
|
err := net.forward(_input...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract output
|
||||||
|
return net.Output()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Train uses the trained network to train with the _input and tries to learn
|
||||||
|
// to guess the _expect instead
|
||||||
|
func (net *Network) Train(_input []float64, _expect []float64) error {
|
||||||
|
|
||||||
|
out := net.Neurons[len(net.Neurons)-1]
|
||||||
|
|
||||||
|
// check output size
|
||||||
|
if len(_expect) != out.ColView(0).Len() {
|
||||||
|
return ErrMissingOutput
|
||||||
|
}
|
||||||
|
|
||||||
|
// process guess subroutine
|
||||||
|
_, err := net.Guess(_input...)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// process backward propagation
|
||||||
|
return net.backward(_expect...)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output returns the output data (only if the network has been fed)
|
||||||
|
func (net Network) Output() ([]float64, error) {
|
||||||
|
|
||||||
|
if !net.fed {
|
||||||
|
return nil, ErrNoState
|
||||||
|
}
|
||||||
|
|
||||||
|
out := net.Neurons[len(net.Neurons)-1]
|
||||||
|
output := make([]float64, 0, net.layers[len(net.layers)-1])
|
||||||
|
for n, l := 0, out.ColView(0).Len(); n < l; n++ {
|
||||||
|
output = append(output, out.At(n, 0))
|
||||||
|
}
|
||||||
|
|
||||||
|
return output, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -86,15 +86,15 @@ func TestEmptyNetworkSizes(t *testing.T) {
|
||||||
|
|
||||||
// 4. Check each neuron layer count
|
// 4. Check each neuron layer count
|
||||||
for n, neuron := range net.Neurons {
|
for n, neuron := range net.Neurons {
|
||||||
if uint(neuron.Len()) != test[n] {
|
if uint(neuron.ColView(0).Len()) != test[n] {
|
||||||
t.Errorf("Expected %d neurons on layer %d, got %d", test[n], n, neuron.Len())
|
t.Errorf("Expected %d neurons on layer %d, got %d", test[n], n, neuron.ColView(0).Len())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 5. Check each bias layer count
|
// 5. Check each bias layer count
|
||||||
for b, bias := range net.Biases {
|
for b, bias := range net.Biases {
|
||||||
|
|
||||||
if uint(bias.Len()) != test[b+1] {
|
if uint(bias.ColView(0).Len()) != test[b+1] {
|
||||||
t.Errorf("Expected %d biases on layer %d, got %d", test[b+1], b, bias.Len())
|
t.Errorf("Expected %d biases on layer %d, got %d", test[b+1], b, bias.ColView(0).Len())
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -141,7 +141,7 @@ func TestForwardPass(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// apply forward pass
|
// apply forward pass
|
||||||
_, err = net.Forward(test.X...)
|
_, err = net.Guess(test.X...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Unexpected error <%s>", err)
|
t.Errorf("Unexpected error <%s>", err)
|
||||||
break
|
break
|
||||||
|
@ -151,19 +151,19 @@ func TestForwardPass(t *testing.T) {
|
||||||
for l, ll := 1, len(net.layers); l < ll; l++ {
|
for l, ll := 1, len(net.layers); l < ll; l++ {
|
||||||
|
|
||||||
// each neuron = ( each previous neuron times its weight ) + neuron bias
|
// each neuron = ( each previous neuron times its weight ) + neuron bias
|
||||||
for n, nl := 0, net.Neurons[l].Len(); n < nl; n++ {
|
for n, nl := 0, net.Neurons[l].ColView(0).Len(); n < nl; n++ {
|
||||||
sum := net.Biases[l-1].AtVec(n)
|
sum := net.Biases[l-1].At(n, 0)
|
||||||
|
|
||||||
// sum each previous neuron*its weight
|
// sum each previous neuron*its weight
|
||||||
for i, il := 0, net.Neurons[l-1].Len(); i < il; i++ {
|
for i, il := 0, net.Neurons[l-1].ColView(0).Len(); i < il; i++ {
|
||||||
sum += net.Neurons[l-1].AtVec(i) * net.Weights[l-1].At(n, i)
|
sum += net.Neurons[l-1].At(i, 0) * net.Weights[l-1].At(n, i)
|
||||||
}
|
}
|
||||||
|
|
||||||
sum = sigmoid(0, 0, sum)
|
sum = sigmoid(0, 0, sum)
|
||||||
|
|
||||||
// check sum
|
// check sum
|
||||||
if !floats.EqualWithinAbs(net.Neurons[l].AtVec(n), sum, 1e9) {
|
if !floats.EqualWithinAbs(net.Neurons[l].At(n, 0), sum, 1e9) {
|
||||||
t.Fatalf("Expected neuron %d.%d to be %f, got %f", l, n, sum, net.Neurons[l].AtVec(n))
|
t.Fatalf("Expected neuron %d.%d to be %f, got %f", l, n, sum, net.Neurons[l].At(n, 0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue