LSTM-C++实现 发表于 2017-04-26 | 分类于 DL 转自:LSTM神经网络的详细推导与c++实现附:RNN推导过程RNN推导和Python实现RNN和LSTM CNN推导 原文错误部分已修正。123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392#include "iostream"#include "math.h"#include "stdlib.h"#include "time.h"#include "vector"#include "assert.h"#include "string.h"using namespace std;#define innode 2 //输入结点数,将输入2个加数#define hidenode 26 //隐藏结点数,存储“携带位”#define outnode 1 //输出结点数,将输出一个预测数字#define alpha 0.1 //学习速率#define binary_dim 8 //二进制数的最大长度#define randval(high) ( (double)rand() / RAND_MAX * high )#define uniform_plus_minus_one ( (double)( 2.0 * rand() ) / ((double)RAND_MAX + 1.0) - 1.0 ) //均匀随机分布int largest_number = ( pow(2, binary_dim) ); //跟二进制最大长度对应的可以表示的最大十进制数//激活函数double sigmoid(double x) { return 1.0 / (1.0 + exp(-x));}//激活函数的导数,y为激活函数值double dsigmoid(double y){ return y * (1.0 - y); } //tanh的导数,y为tanh值double dtanh(double y){ return 1.0 - y * y; }//将一个10进制整数转换为2进制数void int2binary(int n, int *arr){ int i = 0; while(n) { arr[i++] = n % 2; n /= 2; } while(i < binary_dim) arr[i++] = 0;}class RNN{public: RNN(); virtual ~RNN(); void train();public: double W_I[innode][hidenode]; //连接输入与隐含层单元中输入门的权值矩阵 double U_I[hidenode][hidenode]; //连接上一隐层输出与本隐含层单元中输入门的权值矩阵 double W_F[innode][hidenode]; //连接输入与隐含层单元中遗忘门的权值矩阵 double U_F[hidenode][hidenode]; //连接上一隐含层与本隐含层单元中遗忘门的权值矩阵 double W_O[innode][hidenode]; //连接输入与隐含层单元中遗忘门的权值矩阵 double U_O[hidenode][hidenode]; //连接上一隐含层与现在时刻的隐含层的权值矩阵 double W_G[innode][hidenode]; //用于产生新记忆的权值矩阵 double U_G[hidenode][hidenode]; //用于产生新记忆的权值矩阵 double W_out[hidenode][outnode]; //连接隐层与输出层的权值矩阵 double *x; //layer 0 输出值,由输入向量直接设定 //double *layer_1; //layer 1 输出值 double *y; //layer 2 输出值};void winit(double w[], int n) //权值初始化{ for(int i=0; i<n; i++) w[i] = uniform_plus_minus_one; //均匀随机分布}RNN::RNN(){ x = new double[innode]; y = new double[outnode]; winit((double*)W_I, innode * hidenode); winit((double*)U_I, hidenode * hidenode); winit((double*)W_F, innode * hidenode); winit((double*)U_F, hidenode * hidenode); winit((double*)W_O, innode * hidenode); winit((double*)U_O, hidenode * hidenode); winit((double*)W_G, innode * hidenode); winit((double*)U_G, hidenode * hidenode); winit((double*)W_out, hidenode * outnode);}RNN::~RNN(){ delete x; delete y;}void RNN::train(){ int epoch, i, j, k, m, p; vector<double*> I_vector; //输入门 vector<double*> F_vector; //遗忘门 vector<double*> O_vector; //输出门 vector<double*> G_vector; //新记忆 vector<double*> S_vector; //状态值 vector<double*> h_vector; //输出值 vector<double> y_delta; //保存误差关于输出层的偏导 for(epoch=0; epoch<16000; epoch++) //训练次数 { double e = 0.0; //误差 int predict[binary_dim]; //保存每次生成的预测值 memset(predict, 0, sizeof(predict)); int a_int = (int)randval(largest_number/2.0); //随机生成一个加数 a int a[binary_dim]; int2binary(a_int, a); //转为二进制数 int b_int = (int)randval(largest_number/2.0); //随机生成另一个加数 b int b[binary_dim]; int2binary(b_int, b); //转为二进制数 int c_int = a_int + b_int; //真实的和 c int c[binary_dim]; int2binary(c_int, c); //转为二进制数 //在0时刻是没有之前的隐含层的,所以初始化一个全为0的 double *S = new double[hidenode]; //状态值 double *h = new double[hidenode]; //输出值 for(i=0; i<hidenode; i++) { S[i] = 0; h[i] = 0; } S_vector.push_back(S); h_vector.push_back(h); //正向传播 for(p=0; p<binary_dim; p++) //循环遍历二进制数组,从最低位开始 { x[0] = a[p]; x[1] = b[p]; double t = (double)c[p]; //实际值 double *in_gate = new double[hidenode]; //输入门 double *out_gate = new double[hidenode]; //输出门 double *forget_gate = new double[hidenode]; //遗忘门 double *g_gate = new double[hidenode]; //新记忆 double *state = new double[hidenode]; //状态值 double *h = new double[hidenode]; //隐层输出值 for(j=0; j<hidenode; j++) { //输入层转播到隐层 double inGate = 0.0; double outGate = 0.0; double forgetGate = 0.0; double gGate = 0.0; double s = 0.0; for(m=0; m<innode; m++) { inGate += x[m] * W_I[m][j]; outGate += x[m] * W_O[m][j]; forgetGate += x[m] * W_F[m][j]; gGate += x[m] * W_G[m][j]; } double *h_pre = h_vector.back(); double *state_pre = S_vector.back(); for(m=0; m<hidenode; m++) { inGate += h_pre[m] * U_I[m][j]; outGate += h_pre[m] * U_O[m][j]; forgetGate += h_pre[m] * U_F[m][j]; gGate += h_pre[m] * U_G[m][j]; } in_gate[j] = sigmoid(inGate); out_gate[j] = sigmoid(outGate); forget_gate[j] = sigmoid(forgetGate); g_gate[j] = sigmoid(gGate); double s_pre = (j == 0 ? 0 : state[j-1]); state[j] = forget_gate[j] * s_pre + g_gate[j] * in_gate[j]; // h[j] = in_gate[j] * tanh(state[j]); h[j] = out_gate[j] * tanh(state[j]); //原文这儿有问题。 } for(k=0; k<outnode; k++) { //隐藏层传播到输出层 double out = 0.0; for(j=0; j<hidenode; j++) out += h[j] * W_out[j][k]; y[k] = sigmoid(out); //输出层各单元输出 } predict[p] = (int)floor(y[0] + 0.5); //记录预测值 //保存隐藏层,以便下次计算 I_vector.push_back(in_gate); F_vector.push_back(forget_gate); O_vector.push_back(out_gate); S_vector.push_back(state); G_vector.push_back(g_gate); h_vector.push_back(h); //保存标准误差关于输出层的偏导 y_delta.push_back( (t - y[0]) * dsigmoid(y[0]) ); e += fabs(t - y[0]); //误差 } //误差反向传播 //隐含层偏差,通过当前之后一个时间点的隐含层误差和当前输出层的误差计算 double h_delta[hidenode]; double *O_delta = new double[hidenode]; double *I_delta = new double[hidenode]; double *F_delta = new double[hidenode]; double *G_delta = new double[hidenode]; double *state_delta = new double[hidenode]; //当前时间之后的一个隐藏层误差 double *O_future_delta = new double[hidenode]; double *I_future_delta = new double[hidenode]; double *F_future_delta = new double[hidenode]; double *G_future_delta = new double[hidenode]; double *state_future_delta = new double[hidenode]; double *forget_gate_future = new double[hidenode]; for(j=0; j<hidenode; j++) { O_future_delta[j] = 0; I_future_delta[j] = 0; F_future_delta[j] = 0; G_future_delta[j] = 0; state_future_delta[j] = 0; forget_gate_future[j] = 0; } for(p=binary_dim-1; p>=0 ; p--) { x[0] = a[p]; x[1] = b[p]; //当前隐藏层 double *in_gate = I_vector[p]; //输入门 double *out_gate = O_vector[p]; //输出门 double *forget_gate = F_vector[p]; //遗忘门 double *g_gate = G_vector[p]; //新记忆 double *state = S_vector[p+1]; //状态值 double *h = h_vector[p+1]; //隐层输出值 //前一个隐藏层 double *h_pre = h_vector[p]; double *state_pre = S_vector[p]; for(k=0; k<outnode; k++) //对于网络中每个输出单元,更新权值 { //更新隐含层和输出层之间的连接权 for(j=0; j<hidenode; j++) W_out[j][k] += alpha * y_delta[p] * h[j]; } //对于网络中每个隐藏单元,计算误差项,并更新权值 for(j=0; j<hidenode; j++) { h_delta[j] = 0.0; for(k=0; k<outnode; k++) { h_delta[j] += y_delta[p] * W_out[j][k]; } for(k=0; k<hidenode; k++) { h_delta[j] += I_future_delta[k] * U_I[j][k]; h_delta[j] += F_future_delta[k] * U_F[j][k]; h_delta[j] += O_future_delta[k] * U_O[j][k]; h_delta[j] += G_future_delta[k] * U_G[j][k]; } O_delta[j] = 0.0; I_delta[j] = 0.0; F_delta[j] = 0.0; G_delta[j] = 0.0; state_delta[j] = 0.0; //隐含层的校正误差 O_delta[j] = h_delta[j] * tanh(state[j]) * dsigmoid(out_gate[j]); state_delta[j] = h_delta[j] * out_gate[j] * dtanh(state[j]) + state_future_delta[j] * forget_gate_future[j]; F_delta[j] = state_delta[j] * state_pre[j] * dsigmoid(forget_gate[j]); I_delta[j] = state_delta[j] * g_gate[j] * dsigmoid(in_gate[j]); G_delta[j] = state_delta[j] * in_gate[j] * dsigmoid(g_gate[j]); //更新前一个隐含层和现在隐含层之间的权值 for(k=0; k<hidenode; k++) { U_I[k][j] += alpha * I_delta[j] * h_pre[k]; U_F[k][j] += alpha * F_delta[j] * h_pre[k]; U_O[k][j] += alpha * O_delta[j] * h_pre[k]; U_G[k][j] += alpha * G_delta[j] * h_pre[k]; } //更新输入层和隐含层之间的连接权 for(k=0; k<innode; k++) { W_I[k][j] += alpha * I_delta[j] * x[k]; W_F[k][j] += alpha * F_delta[j] * x[k]; W_O[k][j] += alpha * O_delta[j] * x[k]; W_G[k][j] += alpha * G_delta[j] * x[k]; } } if(p == binary_dim-1) { delete O_future_delta; delete F_future_delta; delete I_future_delta; delete G_future_delta; delete state_future_delta; delete forget_gate_future; } O_future_delta = O_delta; F_future_delta = F_delta; I_future_delta = I_delta; G_future_delta = G_delta; state_future_delta = state_delta; forget_gate_future = forget_gate; } delete O_future_delta; delete F_future_delta; delete I_future_delta; delete G_future_delta; delete state_future_delta; if(epoch % 200 == 0) { cout << "error:" << e << endl; cout << "pred:" ; for(k=binary_dim-1; k>=0; k--) cout << predict[k]; cout << endl; cout << "true:" ; for(k=binary_dim-1; k>=0; k--) cout << c[k]; cout << endl; int out = 0; for(k=binary_dim-1; k>=0; k--) out += predict[k] * pow(2, k); cout << a_int << " + " << b_int << " = " << out << endl << endl; } for(i=0; i<I_vector.size(); i++) delete I_vector[i]; for(i=0; i<F_vector.size(); i++) delete F_vector[i]; for(i=0; i<O_vector.size(); i++) delete O_vector[i]; for(i=0; i<G_vector.size(); i++) delete G_vector[i]; for(i=0; i<S_vector.size(); i++) delete S_vector[i]; for(i=0; i<h_vector.size(); i++) delete h_vector[i]; I_vector.clear(); F_vector.clear(); O_vector.clear(); G_vector.clear(); S_vector.clear(); h_vector.clear(); y_delta.clear(); }}int main(){ srand(time(NULL)); RNN rnn; rnn.train(); return 0;} -------------本文结束 感谢您的阅读------------- 作者:GonewithGt有问题请 留言 或者私信我的 微博。 满分是10分的话,这篇文章你给几分 赏 微信打赏