GPUMLib  0.2.2
GPU Machine Learning Library
RBMstatusSmall.cu
1 /*
2  Noel Lopes is an Assistant Professor at the Polytechnic Institute of Guarda, Portugal
3  Copyright (C) 2009, 2010, 2011, 2012 Noel de Jesus Mendonša Lopes
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "../common/CudaDefinitions.h"
22 
23 namespace GPUMLib {
24 
25 #define INPUT threadIdx.x
26 #define NUM_INPUTS blockDim.x
27 
28 #define NEURON threadIdx.y
29 #define NUM_NEURONS blockDim.y
30 
31 #define SAMPLE blockIdx.x
32 
33 KERNEL ComputeStatusHiddenUnitsSmallRBM(cudafloat * v, cudafloat * weights, cudafloat * b, cudafloat * h, float * randomValues) {
34  extern __shared__ cudafloat iw[];
35 
36  int connection = NEURON * NUM_INPUTS + INPUT;
37 
38  /*******
39  For each each input connection of all layer neurons, calculate the weight * input.
40  Results will be held in iw[]. This is done for the current sample.
41  *******/
42  cudafloat w = weights[connection];
43  iw[connection] = w * v[SAMPLE * NUM_INPUTS + INPUT];
44  __syncthreads();
45 
46  /*******
47  For each layer neuron, calculate its activation: sum(weight * input).
48  Results for neuron n will held on iw[n * NUM_INPUTS].
49  This is done for the current sample.
50  *******/
51  int numberElemSum = NUM_INPUTS;
52  for(int sumUpTo = (numberElemSum >> 1); numberElemSum > 1; sumUpTo = (numberElemSum >> 1)) {
53  int nextNumberElemSum = sumUpTo;
54  if (numberElemSum & 1) nextNumberElemSum++;
55 
56  if (INPUT < sumUpTo) iw[connection] += iw[connection + nextNumberElemSum];
57  numberElemSum = nextNumberElemSum;
58 
59  __syncthreads();
60  }
61 
62  /*******
63  Calculate the neurons output
64  *******/
65  __shared__ cudafloat output;
66  if (INPUT == 0) {
67  output = CUDA_SIGMOID(iw[connection] + b[NEURON]);
68  int idx = SAMPLE * NUM_NEURONS + NEURON;
69  if (randomValues != nullptr) output = (output > randomValues[idx]) ? CUDA_VALUE(1.0) : CUDA_VALUE(0.0);
70  h[idx] = output;
71  }
72 }
73 
74 KERNEL ComputeStatusVisibleUnitsSmallRBM(cudafloat * h, cudafloat * weights, cudafloat * a, cudafloat * v, float * randomValues) {
75  extern __shared__ cudafloat sum[];
76 
77  int connection = NEURON * NUM_INPUTS + INPUT;
78 
79  sum[connection] = h[SAMPLE * NUM_INPUTS + INPUT] * weights[INPUT * NUM_NEURONS + NEURON];
80  __syncthreads();
81 
83  //For each layer neuron, calculate its activation
84  //Results for neuron n will held on sum[n * NUM_INPUTS].
85  //This is done for the current sample.
86  //*******/
87  int numberElemSum = NUM_INPUTS;
88  for(int sumUpTo = (numberElemSum >> 1); numberElemSum > 1; sumUpTo = (numberElemSum >> 1)) {
89  int nextNumberElemSum = sumUpTo;
90  if (numberElemSum & 1) nextNumberElemSum++;
91 
92  if (INPUT < sumUpTo) sum[connection] += sum[connection + nextNumberElemSum];
93  numberElemSum = nextNumberElemSum;
94 
95  __syncthreads();
96  }
97 
99  //Calculate the neurons output
100  //*******/
101  if (INPUT == 0) {
102  cudafloat output = CUDA_SIGMOID(sum[connection] + a[NEURON]);
103  int idx = SAMPLE * NUM_NEURONS + NEURON;
104  if (randomValues != nullptr) output = (output > randomValues[idx]) ? CUDA_VALUE(1.0) : CUDA_VALUE(0.0);
105  v[idx] = output;
106  }
107 }
108 
109 }
#define KERNEL
Defines the type of a kernel function.
#define CUDA_SIGMOID(X)
#define CUDA_VALUE(X)
float cudafloat