GPUMLib  0.2.2
GPU Machine Learning Library
CalcLocalGradSelectiveInputs.cu
1 /*
2  Noel Lopes is an Assistant Professor at the Polytechnic Institute of Guarda, Portugal
3  Copyright (C) 2009, 2010, 2011, 2012 Noel de Jesus Mendonša Lopes
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "MBPkernels.h"
22 
23 #define OUTPUT_NEURON threadIdx.x
24 #define OUTPUT_INCLUDING_BIAS (threadIdx.x + 1)
25 #define NUM_OUTPUTS blockDim.x
26 
27 #define NEURON threadIdx.y
28 #define NUM_NEURONS blockDim.y
29 
30 #define NUM_INPUTS_OUTPUT_NEURON (NUM_NEURONS + 1)
31 
32 #define SAMPLE blockIdx.x
33 
34 namespace GPUMLib {
35 
36 KERNEL CalcLocalGradSelectiveInputs(cudafloat * rmsF, cudafloat * bestRMS, cudafloat maxErrorGrowth, cudafloat * inputs, cudafloat * selectiveNeuronsWeights, cudafloat * selectiveNeuronsBias, cudafloat * weights, cudafloat * localGradientNextLayer, cudafloat * localGradient) {
37  extern __shared__ cudafloat lg[];
38 
39  if (bestRMS != nullptr) {
40  __shared__ cudafloat rms;
41  __shared__ cudafloat bRMS;
42 
43  rms = *rmsF;
44  bRMS = *bestRMS;
45  if (rms >= bRMS * maxErrorGrowth) return;
46  }
47 
48  cudafloat * lgNextLayer = (lg + (NUM_OUTPUTS * NUM_NEURONS));
49 
50  if (NEURON == 0) lgNextLayer[OUTPUT_NEURON] = localGradientNextLayer[SAMPLE * NUM_OUTPUTS + OUTPUT_NEURON];
51 
52  int connection = OUTPUT_NEURON * NUM_INPUTS_OUTPUT_NEURON + NEURON + 1;
53  int threadId = (NEURON * NUM_OUTPUTS + OUTPUT_NEURON);
54 
55  __syncthreads();
56 
57  lg[threadId] = weights[connection] * lgNextLayer[OUTPUT_NEURON];
58  __syncthreads();
59 
60  int numberElemSum = NUM_OUTPUTS;
61  for(int sumUpTo = (numberElemSum >> 1); numberElemSum > 1; sumUpTo = (numberElemSum >> 1)) {
62  int nextNumberElemSum = sumUpTo;
63  if (numberElemSum & 1) nextNumberElemSum++;
64 
65  if (OUTPUT_NEURON < sumUpTo) lg[threadId] += lg[threadId + nextNumberElemSum];
66 
67  numberElemSum = nextNumberElemSum;
68 
69  __syncthreads();
70  }
71 
72  if (OUTPUT_NEURON == 0) {
73  cudafloat lgn = CUDA_VALUE(0.0);
74 
75  int n = SAMPLE * NUM_NEURONS + NEURON;
76 
77  cudafloat i = inputs[n];
78 
79  if (!IsInfOrNaN(i)) {
80  cudafloat w = selectiveNeuronsWeights[NEURON];
81  cudafloat b = selectiveNeuronsBias[NEURON];
82 
83  if (w != CUDA_VALUE(0.0) || b != CUDA_VALUE(0.0)) { // input may have missing values
84  cudafloat coshfx = CUDA_COSH(i * w + b);
85  lgn = lg[threadId] / (coshfx * coshfx); // derivate = 1 / (coshfx * coshfx)
86  }
87  }
88 
89  localGradient[n] = lgn;
90  }
91 }
92 
93 }
KERNEL CalcLocalGradSelectiveInputs(cudafloat *rmsF, cudafloat *bestRMS, cudafloat maxErrorGrowth, cudafloat *inputs, cudafloat *selectiveNeuronsWeights, cudafloat *selectiveNeuronsBias, cudafloat *weights, cudafloat *localGradientNextLayer, cudafloat *localGradient)
#define CUDA_COSH
#define KERNEL
Defines the type of a kernel function.
#define CUDA_VALUE(X)
float cudafloat