GPUMLib  0.2.2
GPU Machine Learning Library
CalculateRMS.cu
1 /*
2  Noel Lopes is an Assistant Professor at the Polytechnic Institute of Guarda, Portugal
3  Copyright (C) 2009, 2010, 2011, 2012 Noel de Jesus Mendonša Lopes
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "../reduction/SumWarp.h"
22 #include "MBPkernels.h"
23 
24 namespace GPUMLib {
25 
26 template <int blockSize> KERNEL CalculateRMS(cudafloat * rms, cudafloat * rmsF, int numberPatterns, cudafloat numberPatternsNeurons) {
27  extern __shared__ cudafloat shared_rms[];
28 
29  shared_rms[threadIdx.x] = CUDA_VALUE(0.0);
30  for(int p = threadIdx.x; p < numberPatterns; p += blockDim.x) shared_rms[threadIdx.x] += rms[p];
31  __syncthreads();
32 
33  SumBeforeWarp<blockSize>(shared_rms);
34 
35  if (threadIdx.x < 32) {
36  SumWarp<blockSize>(shared_rms);
37 
38  if (threadIdx.x == 0) {
39  cudafloat fRMS = CUDA_SQRT(shared_rms[0] / numberPatternsNeurons) / CUDA_VALUE(2.0);
40  if (IsInfOrNaN(fRMS)) fRMS = numberPatternsNeurons;
41  *rmsF = fRMS;
42  }
43  }
44 }
45 
46 void KernelCalculateRMS(cudaStream_t stream, int blockSize, cudafloat * rms, cudafloat * rmsOut, int numberPatterns, cudafloat numberPatternsNeurons) {
47  switch(blockSize) {
48  #ifdef FERMI
49  case 1024:
50  CalculateRMS<1024><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
51  break;
52  #endif
53  case 512:
54  CalculateRMS<512><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
55  break;
56 
57  case 256:
58  CalculateRMS<256><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
59  break;
60 
61  case 128:
62  CalculateRMS<128><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
63  break;
64 
65  case 64:
66  CalculateRMS<64><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
67  break;
68 
69  case 32:
70  CalculateRMS<32><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
71  break;
72 
73  case 16:
74  CalculateRMS<16><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
75  break;
76 
77  case 8:
78  CalculateRMS<8><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
79  break;
80 
81  case 4:
82  CalculateRMS<4><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
83  break;
84 
85  case 2:
86  CalculateRMS<2><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
87  break;
88 
89  case 1:
90  CalculateRMS<1><<<1, blockSize, blockSize * sizeof(cudafloat), stream>>>(rms, rmsOut, numberPatterns, numberPatternsNeurons);
91  break;
92  }
93 }
94 
95 }
void KernelCalculateRMS(cudaStream_t stream, int blockSize, cudafloat *rms, cudafloat *rmsOut, int numberPatterns, cudafloat numberPatternsNeurons)
Definition: CalculateRMS.cu:46
#define KERNEL
Defines the type of a kernel function.
#define CUDA_VALUE(X)
float cudafloat
#define CUDA_SQRT