GPUMLib  0.2.2
GPU Machine Learning Library
RBMCorrectWeights.cu
1 /*
2  Noel Lopes is an Assistant Professor at the Polytechnic Institute of Guarda, Portugal
3  Copyright (C) 2009, 2010, 2011, 2012 Noel de Jesus Mendonša Lopes
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "RBMconfig.h"
22 #include "../reduction/SumWarp.h"
23 
24 namespace GPUMLib {
25 
26 __device__ __forceinline__ void UpdateWeight(cudafloat learningRate, cudafloat momentum, cudafloat delta, cudafloat * lastDelta, cudafloat * lastDeltaWithoutLearningMomentum, cudafloat * weights, int w) {
27  momentum *= learningRate;
28  if (momentum < CUDA_VALUE(0.1)) momentum = CUDA_VALUE(0.1);
29  if (momentum > CUDA_VALUE(0.9)) momentum = CUDA_VALUE(0.9);
30 
31  cudafloat neww = weights[w] + learningRate * delta + momentum * lastDelta[w];
32  delta += momentum * lastDelta[w];
33 
34  if (IsInfOrNaN(neww)) {
35  delta = CUDA_VALUE(0.0);
36  lastDeltaWithoutLearningMomentum[w] = CUDA_VALUE(0.0);
37  } else {
38  weights[w] = neww;
39  }
40 
41  lastDelta[w] = delta;
42 }
43 
44 __device__ __forceinline__ cudafloat UpdateLearningRate(cudafloat * lr, cudafloat * lastDeltaWithoutLearningMomentum, cudafloat delta, int w, cudafloat u, cudafloat d) {
45  cudafloat learningRate = lr[w];
46 
47  learningRate *= (SAME_DIRECTION(lastDeltaWithoutLearningMomentum[w], delta) ? u : d);
48  if (learningRate > MAX_STEP_SIZE) learningRate = MAX_STEP_SIZE;
49 
50  lr[w] = learningRate;
51  lastDeltaWithoutLearningMomentum[w] = delta;
52 
53  return learningRate;
54 }
55 
56 KERNEL CorrectWeightsRBM(cudafloat * v_data, cudafloat * h_data, cudafloat * v_recon, cudafloat * h_recon, int samples, cudafloat * learningRateW, cudafloat * lastDeltaWithoutLearningMomentumW, cudafloat * lastDeltaW, cudafloat * learningRateB, cudafloat * lastDeltaWithoutLearningMomentumB, cudafloat * lastDeltaB, cudafloat * learningRateA, cudafloat * lastDeltaWithoutLearningMomentumA, cudafloat * lastDeltaA, cudafloat u, cudafloat d, cudafloat momentum, cudafloat * weights, cudafloat * b, cudafloat * a, cudafloat * errors, int I, int J) {
57  __shared__ cudafloat vd[16];
58  __shared__ cudafloat vr[16];
59  __shared__ cudafloat hd[16];
60  __shared__ cudafloat hr[16];
61 
62  int i = blockIdx.x * blockDim.x + threadIdx.x;
63  int j = blockIdx.y * blockDim.y + threadIdx.y;
64 
65  cudafloat error = CUDA_VALUE(0.0);
66  cudafloat deltaW = CUDA_VALUE(0.0);
67  cudafloat deltaB = CUDA_VALUE(0.0);
68  cudafloat deltaA = CUDA_VALUE(0.0);
69 
70  for (int s = 0; s < samples; s++) {
71  if (threadIdx.y == 0 && i < I) {
72  cudafloat dat = v_data[s * I + i];
73  cudafloat rec = v_recon[s * I + i];
74 
75  vd[threadIdx.x] = dat;
76  vr[threadIdx.x] = rec;
77 
78  cudafloat e = dat - rec;
79  deltaA += e;
80 
81  error += e * e;
82  }
83 
84  if (threadIdx.x == 0 && j < J) {
85  cudafloat dat = h_data[s * J + j];
86  cudafloat rec = h_recon[s * J + j];
87 
88  hd[threadIdx.y] = dat;
89  hr[threadIdx.y] = rec;
90 
91  deltaB += dat - rec;
92  }
93 
94  __syncthreads();
95 
96  deltaW += vd[threadIdx.x] * hd[threadIdx.y] - vr[threadIdx.x] * hr[threadIdx.y];
97  }
98 
99  // update weights
100  if (i < I && j < J) {
101  deltaW /= samples;
102 
103  int w = j * I + i;
104 
105  cudafloat learningRate = UpdateLearningRate(learningRateW, lastDeltaWithoutLearningMomentumW, deltaW, w, u, d);
106  UpdateWeight(learningRate, momentum, deltaW, lastDeltaW, lastDeltaWithoutLearningMomentumW, weights, w);
107  }
108 
109  if(i < I && threadIdx.y == 0) {
110  errors[i] = error;
111 
112  // Update a
113  if (j == 0) {
114  deltaA /= samples;
115 
116  cudafloat learningRate = UpdateLearningRate(learningRateA, lastDeltaWithoutLearningMomentumA, deltaA, i, u, d);
117  UpdateWeight(learningRate, momentum, deltaA, lastDeltaA, lastDeltaWithoutLearningMomentumA, a, i);
118  }
119  }
120 
121  // Update b
122  if (i == 0 && j < J) {
123  deltaB /= samples;
124 
125  cudafloat learningRate = UpdateLearningRate(learningRateB, lastDeltaWithoutLearningMomentumB, deltaB, j, u, d);
126  UpdateWeight(learningRate, momentum, deltaB, lastDeltaB, lastDeltaWithoutLearningMomentumB, b, j);
127  }
128 }
129 
130 }
#define SAME_DIRECTION(X, Y)
Verifies if X and Y have the same signal.
#define KERNEL
Defines the type of a kernel function.
#define CUDA_VALUE(X)
float cudafloat