简介
神经网络模型也被称为人工神经网络(Artificial Neural Network,ANN),它是一类非线性模型,用来在给定输入变量集合的情况下,对输出进行预测。其最早的例子可以追溯到20世纪40年代早期,当时用它来解决分类问题。可以说,它是一类非线性的回归方法。最近几年在深度学习中应用很广泛,本篇文章抱着学习其原理的心态,参考书籍,力图用Java语言实现一个神经网络。
由于网上有太多介绍神经网络的文章,这里对它的原理这种关键点描述,不在展开他们的细节,直接探究其实现方法,如果不了解这些知识点请自行Google。
- 多层前向传播网络
- 激活函数
- 反向传播算法(Backpropagation Algorithm)
建立ANN
构造激活函数
这里主要使用Simgod和tanh函数,主要是因为他们的范围可控,并且导数易求(神经网络中的导数往往要简单到可以由f(x)求得,主要是因为f(x)的值常常被记录下来,利用将极大提高效率)。也可以自己实现一个激活函数。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
| package com.chengsluo.ann;
public abstract class Activation { public abstract double f(double x); public abstract double df(double x);
public static Activation logit(final double k) { return new Activation() { @Override public double f(double x) { return 1.0/(1.0 + Math.exp(-k*x)); }
@Override public double df(double x) { x = f(x); return k*x*(1.0-x); } }; } public static Activation sigmoid(double k) { return logit(k); } public static Activation logit() { return logit(1.0); } public static Activation sigmoid() { return logit(); } public static Activation tanh() { return new Activation() { @Override public double f(double x) { return Math.tanh(x); } @Override public double df(double x) { x = f(x); return 1.0 - x*x; } }; } }
|
构建单层
这里在构建网络层的时候,为了简化起见,给每一个层配置了相同的激活函数,这常常是默认的做法。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
| package com.chengsluo.ann; import java.util.Random; public class Layer {
public static String printVec(double[] x) { StringBuilder sb = new StringBuilder(); sb.append("{"); for(int i=0;i<x.length;i++) { if(i > 0) sb.append(",");sb.append(x[i]); } sb.append("}"); return sb.toString(); }
double[] v; double[][] w; double[] bW = null;
Activation fn;
public Layer(int units,int inputs,Activation fn,boolean bias) { this.fn = fn; v = new double[units]; w = new double[units][]; for(int i=0;i<v.length;i++) w[i] = new double[inputs]; if(bias) bW = new double[units]; err = new double[units]; } public int units() { return v.length; }
public double[] value() { return v; } double[] err; double E;
public double[] errors() { return err; }
public double[] feed(double[] x) { for(int i=0;i<v.length;i++) { double[] W = w[i]; v[i] = bW != null ? bW[i] : 0.0; for(int j=0;j<W.length;j++) v[i] += W[j]*x[j]; v[i] = fn.f(v[i]); } return v; }
public double[] backprop(double[] s) { double[] out = new double[w[0].length]; E = 0; for(int i=0;i<v.length;i++) { err[i] = fn.df(v[i])*s[i]; E += err[i]*err[i]; double[] W = w[i]; for(int j=0;j<W.length;j++) out[j] += W[j]*err[i]; } return out; }
public double error() { return E; }
public double[] update(double[] o,double r) { for(int i=0;i<v.length;i++) { if(bW != null) bW[i] += r*err[i];
double[] W = w[i]; for(int j=0;j<W.length;j++) W[j] += r*err[i]*o[j]; } return v; }
public void initialize(Random rng) { for(int i=0;i<v.length;i++) { for(int j=0;j<w[i].length;j++) w[i][j] = 2*rng.nextDouble() - 1; bW[i] = 2*rng.nextDouble() - 1; }
}
public void initialize() { initialize(new Random()); }
}
|
组织成网络
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
| package com.chengsluo.ann;
import java.util.ArrayList;
public class NeuralNetwork {
int inputUnits = 0; ArrayList<Layer> layers = new ArrayList<Layer>(); double learningRate = 0.2;
public NeuralNetwork learningRate(double l) { learningRate = l; return this; }
public NeuralNetwork inputs(int inputUnits) { this.inputUnits = inputUnits; return this; }
public NeuralNetwork momentumLayer(int units, Activation fn, boolean bias) { int inputs = (layers.size() == 0) ? this.inputUnits : layers.get(layers.size() - 1).units(); layers.add(new MomentumLayer(units, inputs, fn, bias)); return this; }
public NeuralNetwork momentumLayer(int units, Activation fn) { return momentumLayer(units, fn, true); }
public NeuralNetwork momentumLayer(int units) { return momentumLayer(units, Activation.tanh()); }
public NeuralNetwork layer(int units, Activation fn, boolean bias) { int inputs = (layers.size() == 0) ? this.inputUnits : layers.get(layers.size() - 1).units(); layers.add(new Layer(units, inputs, fn, bias)); return this; }
public NeuralNetwork layer(int units, Activation fn) { return layer(units, fn, true); }
public NeuralNetwork layer(int units) { return layer(units, Activation.tanh()); }
public double[] feed(double[] x) { for (Layer l : layers) x = l.feed(x); return x; }
public double[] output() { return layers.get(layers.size() - 1).value(); }
public double error() { double E = 0; for (Layer l : layers) E += l.error(); return E; }
public NeuralNetwork train(double[] x, double[] y) { double[] out = feed(x); double[] s = new double[out.length]; for (int i = 0; i < y.length; i++) s[i] = y[i] - out[i]; System.out.println(Layer.printVec(y)+" - "+Layer.printVec(out)+" => "+Layer.printVec(s)); for (int i = layers.size() - 1; i >= 0; i--) s = layers.get(i).backprop(s); for (Layer l : layers) x = l.update(x, learningRate); return this; }
public NeuralNetwork initialize() { for (Layer l : layers) l.initialize(); return this; }
public static NeuralNetwork build() { return new NeuralNetwork(); }
}
|
提升性能(利用冲量构造单层)
这里层的实现不在使用前面讲的基本方法来更新权值,而是使用单独的矩阵来存储权重delta值。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
| package com.chengsluo.ann;
public class MomentumLayer extends Layer {
double dW[][]; double dbW[]; double m = 0.1;
public MomentumLayer(int units, int inputs, Activation fn, boolean bias) { super(units, inputs, fn, bias); dW = new double[units][]; for (int i = 0; i < v.length; i++) dW[i] = new double[inputs]; if (bias) dbW = new double[units];
}
@Override public double[] update(double[] o, double r) { for (int i = 0; i < v.length; i++) {
if (bW != null) { dbW[i] = (1 - m) * r * err[i] + m * dbW[i]; bW[i] += dbW[i]; }
double[] W = w[i]; for (int j = 0; j < W.length; j++) { dW[i][j] = (1 - m) * r * err[i] * o[j] + m * dW[i][j]; W[j] += dW[i][j]; } } return v; }
}
|
测试ANN
训练一个异或模型
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
| package com.chengsluo.ann;
import org.junit.Test;
public class XorTest {
public static final class Obs { public double[] x; public double[] y; public Obs(double[] x,double[]y) { this.x = x; this.y = y; } } public static Obs obs(double[] x,double[] y) { return new Obs(x,y); }
@Test public void test() { Obs[] xorData = new Obs[]{ obs(new double[]{-1,0},new double[]{0}), obs(new double[]{1,0},new double[]{1}), obs(new double[]{0,1},new double[]{1}), obs(new double[]{1,1},new double[]{0}), }; NeuralNetwork nn = NeuralNetwork.build().inputs(2).layer(3).layer(1); NeuralNetwork bad= NeuralNetwork.build().inputs(2).layer(1); System.out.println("nn\tbad"); nn.initialize(); bad.initialize(); double lastErr = Double.POSITIVE_INFINITY; for(int i=0;i<1000;i++) { double err = 0.0; double errBad = 0.0; for(Obs x : xorData) { nn.train(x.x,x.y); bad.train(x.x,x.y); err += nn.error(); errBad += bad.error(); } System.out.println(err+"\t"+errBad); if(Math.abs(lastErr - err) < 1e-6) break; lastErr = err; } }
}
|
时间序列测试
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| package com.chengsluo.ann;
import java.util.Random; import org.junit.Test;
public class TimeSeriesTest { @Test public void test() { int k = 14; double[] x = new double[k]; double[] y = new double[1]; double t = 0.0; Random twist = new Random(); NeuralNetwork nn = NeuralNetwork.build().inputs(k).layer(5).layer(1).initialize(); for(int i=0;i<1000;i++) { double base = Math.sin(t); y[0] = base + 0.5*twist.nextGaussian(); if(i >= x.length) { nn.train(x, y); System.out.println(nn.output()[0]+"\t"+y[0]+"\t"+base); for(int j=0;i<x.length-1;j++) x[i] = x[i+1]; x[x.length-1]=y[0]; } else { x[i] = y[0]; } t += 2.0*Math.PI/60.0; } }
}
|
冲量层加速测试
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
| package com.chengsluo.ann;
import org.junit.Test;
public class MomentumComparisonTest { public static final class Obs { public double[] x; public double[] y;
public Obs(double[] x, double[] y) { this.x = x; this.y = y; } }
public static Obs obs(double[] x, double[] y) { return new Obs(x, y); }
@Test public void test() { Obs[] xorData = new Obs[] { obs(new double[] { -1, 0 }, new double[] { 0 }), obs(new double[] { 1, 0 }, new double[] { 1 }), obs(new double[] { 0, 1 }, new double[] { 1 }), obs(new double[] { 1, 1 }, new double[] { 0 }), };
NeuralNetwork[] nn = new NeuralNetwork[10]; for (int i = 0; i < 5; i++) { nn[i] = NeuralNetwork.build().inputs(2).layer(3).layer(1); } for (int i = 5; i < 10; i++) { nn[i] = NeuralNetwork.build().inputs(2).momentumLayer(3).momentumLayer(1); } double err[] = new double[10]; for (int i = 0; i < 10; i++) nn[i].initialize(); for (int j = 0; j < 300; j++) { for (int i = 0; i < 10; i++) { NeuralNetwork n = nn[i]; err[i] = 0.0; for (Obs x : xorData) { n.train(x.x, x.y); err[i] += n.error(); } } StringBuilder sb = new StringBuilder(); for (int i = 0; i < 10; i++) { if (i > 0) sb.append("\t"); sb.append(err[i]); } System.out.println(sb.toString()); }
}
}
|
参考资料
拜伦·埃利斯. 实时分析[M]. 机械工业出版社, 2016.
CS231n: Convolutional Neural Networks for Visual Recognition
http://cs231n.stanford.edu
SourceCode
https://github.com/chengsluo/code-java/tree/master/ArtificialNeuralNetwork