GPUMLib  0.2.2
GPU Machine Learning Library
LocalGradientKernel.cu
1 /*
2  Noel Lopes is an Assistant Professor at the Polytechnic Institute of Guarda, Portugal
3  Copyright (C) 2009, 2010, 2011, 2012 Noel de Jesus Mendonša Lopes
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "MBPkernels.h"
22 
23 #define OUTPUT_NEURON threadIdx.x
24 #define OUTPUT_INCLUDING_BIAS (threadIdx.x + 1)
25 #define NUM_OUTPUTS blockDim.x
26 
27 #define NEURON threadIdx.y
28 #define NUM_NEURONS blockDim.y
29 
30 #define NUM_INPUTS_OUTPUT_NEURON (NUM_NEURONS + 1)
31 
32 #define PATTERN blockIdx.x
33 
34 namespace GPUMLib {
35 
36 KERNEL CalculateLocalGradient(cudafloat * rmsF, cudafloat * bestRMS, cudafloat maxErrorGrowth, cudafloat * outputs, cudafloat * weights, cudafloat * m, int mOffset, int totalNeuronsWithSelectiveActivation, cudafloat * localGradientNextLayer, cudafloat * localGradient, cudafloat * localGradientSpaceNet) {
37  extern __shared__ cudafloat lg[];
38 
39  if (bestRMS != nullptr) {
40  __shared__ cudafloat rms;
41  __shared__ cudafloat bRMS;
42 
43  rms = *rmsF;
44  bRMS = *bestRMS;
45  if (rms >= bRMS * maxErrorGrowth) return;
46  }
47 
48  cudafloat * lgNextLayer = (lg + (NUM_OUTPUTS * NUM_NEURONS));
49 
50  if (NEURON == 0) lgNextLayer[OUTPUT_NEURON] = localGradientNextLayer[PATTERN * NUM_OUTPUTS + OUTPUT_NEURON];
51 
52  int connection = OUTPUT_NEURON * NUM_INPUTS_OUTPUT_NEURON + NEURON + 1;
53  int threadId = (NEURON * NUM_OUTPUTS + OUTPUT_NEURON);
54 
55  __syncthreads();
56 
57  lg[threadId] = weights[connection] * lgNextLayer[OUTPUT_NEURON];
58  __syncthreads();
59 
60  int numberElemSum = NUM_OUTPUTS;
61  for(int sumUpTo = (numberElemSum >> 1); numberElemSum > 1; sumUpTo = (numberElemSum >> 1)) {
62  int nextNumberElemSum = sumUpTo;
63  if (numberElemSum & 1) nextNumberElemSum++;
64 
65  if (OUTPUT_NEURON < sumUpTo) lg[threadId] += lg[threadId + nextNumberElemSum];
66 
67  numberElemSum = nextNumberElemSum;
68 
69  __syncthreads();
70  }
71 
72  if (OUTPUT_NEURON == 0) {
73  int n = PATTERN * NUM_NEURONS + NEURON;
74 
75  cudafloat Fh = outputs[n];
76 
77  cudafloat lgn = lg[threadId];
78 
79  if (m != nullptr) {
80  int nSelAct = PATTERN * totalNeuronsWithSelectiveActivation + NEURON + mOffset;
81 
82  cudafloat M = m[nSelAct];
83  if (M == CUDA_VALUE(0.0)) {
84  localGradientSpaceNet[nSelAct] = CUDA_VALUE(0.0);
85  } else {
86  Fh = Fh / M;
87  localGradientSpaceNet[nSelAct] = lgn * Fh * CUDA_SIGMOID_DERIVATE(M);
88  }
89  lgn *= M;
90  }
91 
92  localGradient[n] = lgn * CUDA_SIGMOID_DERIVATE(Fh);
93  }
94 }
95 
96 }
KERNEL CalculateLocalGradient(cudafloat *rmsF, cudafloat *bestRMS, cudafloat maxErrorGrowth, cudafloat *outputs, cudafloat *weights, cudafloat *m, int mOffset, int totalNeuronsWithSelectiveActivation, cudafloat *localGradientNextLayer, cudafloat *localGradient, cudafloat *localGradientSpaceNet)
#define KERNEL
Defines the type of a kernel function.
#define CUDA_SIGMOID_DERIVATE(OUTPUT)
#define CUDA_VALUE(X)
float cudafloat