GPUMLib  0.2.2
GPU Machine Learning Library
BackPropagation.h
1 /*
2  Noel Lopes is an Assistant Professor at the Polytechnic Institute of Guarda, Portugal
3  Copyright (C) 2009, 2010, 2011, 2012 Noel de Jesus Mendonša Lopes
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #ifndef BackPropagation_h
22 #define BackPropagation_h
23 
24 #define INITIAL_LEARNING_RATE (CUDA_VALUE(0.7))
25 
26 #include "../common/CudaDefinitions.h"
27 #include "../common/CudaStreams.h"
28 #include "../memory/DeviceArray.h"
29 #include "../memory/DeviceMatrix.h"
30 #include "../memory/DeviceAccessibleVariable.h"
31 
32 namespace GPUMLib {
33 
36 
39  private:
40  class Layer {
41  friend class BackPropagation;
42  friend class MultipleBackPropagation;
43 
44  private:
45  static int totalNeuronsWithSelectiveActivation;
46  static int patterns;
47 
48  DeviceArray<cudafloat> d_weights;
49  DeviceArray<cudafloat> d_bestWeights;
50  DeviceArray<cudafloat> d_learnRate;
51  DeviceArray<cudafloat> d_lastDelta;
52  DeviceArray<cudafloat> d_lastDeltaWithoutLearningMomentum;
53  DeviceArray<cudafloat> d_outputs;
54  DeviceArray<cudafloat> d_localGradient;
55 
56  int neurons;
57  int connections;
58  int inputsWithoutBias;
59  int mOffset;
60 
61  cudafloat * d_inputs;
62  cudafloat * d_desOutputs;
63  cudafloat * d_m;
64  cudafloat * d_localGradSpaceNet;
65  cudafloat * d_rms;
66 
67  dim3 dimInputsNeurons;
68  dim3 dimOutputsNeurons;
69 
70  int inputsBlockSize;
71  int sharedMemFire;
72  int sharedMemGradients;
73 
74  bool isOutputLayer;
75 
76  void RandomizeWeights(cudafloat minValue, cudafloat maxValue, cudafloat initialLearningRate);
77  void Init(int neurons, int inputs, int nextLayerNeurons, cudafloat initialLearningRate, cudafloat * layerInputs, bool isOutputLayer, cudafloat * m = nullptr, cudafloat * localGradSpaceNet = nullptr, int mOffset = 0);
78 
79  void Fire(cudaStream_t stream);
80 
81  void CalculateLocalGradient(cudaStream_t stream, cudafloat * rms, cudafloat * bestRMS, cudafloat rmsGrowToApplyRobustLearning, Layer & nextLayer);
82 
83  void CorrectWeights(cudaStream_t stream, int patternsBlockSize, cudafloat * rms, cudafloat * bestRMS, cudafloat rmsGrowToApplyRobustLearning, cudafloat robustFactor, cudafloat momentum, cudafloat u, cudafloat d, cudafloat maxStepSize);
84 
85  void CorrectWeights(cudaStream_t stream, int patternsBlockSize, cudafloat * rms, cudafloat * bestRMS, cudafloat rmsGrowToApplyRobustLearning, cudafloat robustFactor, cudafloat momentum, cudafloat u, cudafloat d);
86  };
87 
88  class SelectiveInputLayer {
89  friend class BackPropagation;
90  friend class MultipleBackPropagation;
91 
92  private:
93  int patterns;
94  int neurons;
95 
96  DeviceArray<cudafloat> d_weights;
98  DeviceArray<cudafloat> d_bestWeights;
99  DeviceArray<cudafloat> d_bestBias;
100  DeviceArray<cudafloat> d_learnRate;
101  DeviceArray<cudafloat> d_learnRateBias;
102  DeviceArray<cudafloat> d_lastDelta;
103  DeviceArray<cudafloat> d_lastDeltaBias;
104  DeviceArray<cudafloat> d_lastDeltaWithoutLearningMomentum;
105  DeviceArray<cudafloat> d_lastDeltaWithoutLearningMomentumBias;
106  DeviceArray<cudafloat> d_outputs;
107  DeviceArray<cudafloat> d_localGradient;
108 
109  cudafloat * d_inputs;
110 
111  dim3 dimOutputsNeurons;
112 
113  int fireBlockSize;
114  int fireBlocks;
115 
116  int sharedMemGradients;
117 
118  void RandomizeWeights(cudafloat minValue, cudafloat maxValue, cudafloat initialLearningRate, HostArray<bool> & selectiveInputs);
119 
120  SelectiveInputLayer(int patterns, HostArray<bool> & selectiveInputs, int nextLayerNeurons, cudafloat * inputs, cudafloat initialLearningRate) :
121  d_outputs(patterns * selectiveInputs.Length()),
122  dimOutputsNeurons(nextLayerNeurons, selectiveInputs.Length()),
123  d_localGradient(selectiveInputs.Length() * patterns)
124  {
125  this->patterns = patterns;
126  this->neurons = selectiveInputs.Length();
127 
128  sharedMemGradients = (nextLayerNeurons * (neurons + 1)) * sizeof(cudafloat);
129 
130  this->d_inputs = inputs;
131 
132  RandomizeWeights(CUDA_VALUE(-1.0), CUDA_VALUE(1.0), initialLearningRate, selectiveInputs);
133  }
134 
135  void Fire(cudaStream_t stream);
136  void CalculateLocalGradient(cudaStream_t stream, cudafloat * rms, cudafloat * bestRMS, cudafloat rmsGrowToApplyRobustLearning, Layer & nextLayer);
137 
138  void CorrectWeights(cudaStream_t stream, cudafloat * rms, cudafloat * bestRMS, cudafloat rmsGrowToApplyRobustLearning, cudafloat robustFactor, cudafloat momentum, cudafloat u, cudafloat d, cudafloat maxStepSize);
139  };
140 
141  DeviceMatrix<cudafloat> d_inputs;
142  DeviceMatrix<cudafloat> d_desOutputs;
143 
144  int maxNumberWeigths;
145 
146  cudafloat initialLearningRate;
147  cudafloat momentum;
148  cudafloat u;
149  cudafloat d;
150  cudafloat maxStepSize;
151 
153  DeviceArray<cudafloat> d_bestRMS;
154  DeviceArray<cudafloat> d_rmsOut;
156 
157  CudaStream streamKernels;
158  CudaStream streamRMS;
159 
160  int patternsBlockSize;
161  cudafloat numberPatternsNeurons;
162 
163  int epoch;
164 
165  // Robust learning
166  DeviceArray<int> d_numberWeightsLayer;
167 
168  DeviceArray<cudafloat *> d_weightsLayers;
169  DeviceArray<cudafloat *> d_bestWeightsLayers;
170  DeviceArray<cudafloat *> d_learnRatesLayers;
171  DeviceArray<cudafloat *> d_lastDeltaLayers;
172  DeviceArray<cudafloat *> d_lastDeltaWithoutLMlayers;
173 
174  bool robustLearning;
175  int layersRobustTraining;
176  cudafloat rmsGrowToApplyRobustLearning;
177  cudafloat robustFactor;
178 
179  HostArray<Layer> layers;
180  SelectiveInputLayer * selectiveInputLayer;
181 
182  HostArray<bool> selectiveInputs;
183 
184  void Fire();
185 
186  protected:
187  HostArray<Layer> spaceLayers;
188  SelectiveInputLayer * selectiveInputLayerSpaceNetwork;
189 
190  void CreateNetwork(HostArray<int> & sizeLayers, HostArray<int> * sizeSpaceLayers, HostArray<bool> * selectiveNeurons, HostMatrix<cudafloat> & trainInputPatterns, HostMatrix<cudafloat> & trainDesiredOutputPatterns, cudafloat initialLearningRate);
191  BackPropagation() {}
192 
193  public:
200  BackPropagation(HostArray<int> & sizeLayers, HostMatrix<cudafloat> & trainInputPatterns, HostMatrix<cudafloat> & trainDesiredOutputPatterns, cudafloat initialLearningRate = INITIAL_LEARNING_RATE);
201 
204  if (selectiveInputLayerSpaceNetwork != nullptr) delete selectiveInputLayerSpaceNetwork;
205  if (selectiveInputLayer != nullptr) delete selectiveInputLayer;
206  }
207 
211  void RandomizeWeights(cudafloat minValue, cudafloat maxValue);
212 
215  bool GetRobustLearning() const;
216 
219  void SetRobustLearning(bool value);
220 
224 
228 
231  cudafloat GetRobustFactor() const;
232 
235  void SetRobustFactor(cudafloat value);
236 
239  cudafloat GetMomentum() const;
240 
243  void SetMomentum(cudafloat value);
244 
248 
251  void SetUpStepSizeFactor(cudafloat value);
252 
256 
259  void SetDownStepSizeFactor(cudafloat value);
260 
263  cudafloat GetMaxStepSize() const;
264 
267  void SetMaxStepSize(cudafloat value);
268 
271  int GetEpoch() const;
272 
275  int GetNumberLayers() const;
276 
279  int GetNumberInputs() const;
280 
283  int GetNumberOutputs() const;
284 
288  int GetNumberNeurons(int layer) const;
289 
294 
299  cudafloat GetRMS();
300 
302  void TrainOneEpoch();
303 
306  void Train(int epochs);
307 
311  void Train(int epochs, cudafloat rmsStop);
312 
317 
322 
326  void SetLayerWeights(int layer, HostArray<cudafloat> & weights);
327 
332  void SetLayerWeights(int layer, HostMatrix<cudafloat> & weights, HostArray<cudafloat> & bias);
333 
336  bool HasSelectiveInputs() const {
337  return (selectiveInputLayer != nullptr);
338  }
339 
343 
347 
351 
355 };
356 
361 
363 
364 }
365 
366 #endif
cudafloat GetMaxPercentageRMSGrow() const
void SetSelectiveInputBias(HostArray< cudafloat > &bias)
Represents a feed-forward network that can be trained using the CUDA implementation of the Back-Propa...
cudafloat GetDownStepSizeFactor() const
void SetMaxStepSize(cudafloat value)
HostArray< cudafloat > GetSelectiveInputWeights()
void SetMomentum(cudafloat value)
cudafloat GetRobustFactor() const
bool HasSelectiveInputs() const
void TrainOneEpoch()
Trains the network one epoch.
void SetLayerWeights(int layer, HostArray< cudafloat > &weights)
void SetSelectiveInputWeights(HostArray< cudafloat > &weights)
KERNEL CalculateLocalGradient(cudafloat *rmsF, cudafloat *bestRMS, cudafloat maxErrorGrowth, cudafloat *outputs, cudafloat *weights, cudafloat *m, int mOffset, int totalNeuronsWithSelectiveActivation, cudafloat *localGradientNextLayer, cudafloat *localGradient, cudafloat *localGradientSpaceNet)
void SetDownStepSizeFactor(cudafloat value)
Represents a CUDA stream.
Definition: CudaStreams.h:32
cudafloat GetMomentum() const
void RandomizeWeights(cudafloat minValue, cudafloat maxValue)
HostArray< cudafloat > GetLayerWeights(int layer)
int GetNumberNeurons(int layer) const
void SetUpStepSizeFactor(cudafloat value)
cudafloat GetUpStepSizeFactor() const
int Length() const
Definition: BaseArray.h:63
cudafloat GetMaxStepSize() const
void SetMaxPercentageRMSGrow(cudafloat value)
void SetRobustFactor(cudafloat value)
void SetRobustLearning(bool value)
HostArray< cudafloat > GetSelectiveInputBias()
HostMatrix< cudafloat > GetOutputs(HostMatrix< cudafloat > &inputs)
#define CUDA_VALUE(X)
Represents a multiple feed-forward network that can be trained using the CUDA implementation of the M...
float cudafloat
~BackPropagation()
Destructor.