GPUMLib  0.2.2
GPU Machine Learning Library
NMFquality.cu
1 /*
2  Noel Lopes is an Assistant Professor at the Polytechnic Institute of Guarda, Portugal
3  Copyright (C) 2009, 2010, 2011, 2012 Noel de Jesus Mendonša Lopes
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "NMFkernels.h"
22 
23 namespace GPUMLib {
24 
27 
28 template <int blockSize> KERNEL NMFquality(cudafloat * V, cudafloat * WH, int n, cudafloat * quality) {
29  extern __shared__ cudafloat sum[];
30 
31  sum[threadIdx.x] = CUDA_VALUE(0.0);
32  for(int k = threadIdx.x; k < n; k += blockSize) {
33  cudafloat wh = WH[k];
34  sum[threadIdx.x] += (V[k] * log10(wh + SMALL_VALUE_TO_ADD_DENOMINATOR) - wh);
35  }
36  __syncthreads();
37 
38  if (blockSize >= 1024) {
39  if (threadIdx.x < 512) sum[threadIdx.x] += sum[threadIdx.x + 512];
40  __syncthreads();
41  }
42 
43  if (blockSize >= 512) {
44  if (threadIdx.x < 256) sum[threadIdx.x] += sum[threadIdx.x + 256];
45  __syncthreads();
46  }
47 
48  if (blockSize >= 256) {
49  if (threadIdx.x < 128) sum[threadIdx.x] += sum[threadIdx.x + 128];
50  __syncthreads();
51  }
52 
53  if (blockSize >= 128) {
54  if (threadIdx.x < 64) sum[threadIdx.x] += sum[threadIdx.x + 64];
55  __syncthreads();
56  }
57 
58  if (threadIdx.x < 32) {
59  volatile cudafloat * _sum = sum;
60 
61  if (blockSize >= 64) _sum[threadIdx.x] += _sum[threadIdx.x + 32];
62  if (blockSize >= 32) _sum[threadIdx.x] += _sum[threadIdx.x + 16];
63  if (blockSize >= 16) _sum[threadIdx.x] += _sum[threadIdx.x + 8];
64  if (blockSize >= 8) _sum[threadIdx.x] += _sum[threadIdx.x + 4];
65  if (blockSize >= 4) _sum[threadIdx.x] += _sum[threadIdx.x + 2];
66  if (blockSize >= 2) _sum[threadIdx.x] += _sum[threadIdx.x + 1];
67 
68  if (threadIdx.x == 0) *quality = sum[0];
69  }
70 }
71 
72 void KernelNMFquality(int blockSize, cudafloat * V, cudafloat * WH, int n, cudafloat * quality) {
73  switch(blockSize) {
74  #ifdef FERMI
75  case 1024:
76  NMFquality<1024><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
77  break;
78  #endif
79  case 512:
80  NMFquality<512><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
81  break;
82  case 256:
83  NMFquality<256><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
84  break;
85  case 128:
86  NMFquality<128><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
87  break;
88  case 64:
89  NMFquality<64><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
90  break;
91  case 32:
92  NMFquality<32><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
93  break;
94  case 16:
95  NMFquality<16><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
96  break;
97  case 8:
98  NMFquality<8><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
99  break;
100  case 4:
101  NMFquality<4><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
102  break;
103  case 2:
104  NMFquality<2><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
105  break;
106  case 1:
107  NMFquality<1><<<1, blockSize, blockSize * sizeof(cudafloat)>>>(V, WH, n, quality);
108  break;
109  }
110 }
111 
113 
114 }
void KernelNMFquality(int blockSize, cudafloat *V, cudafloat *WH, int n, cudafloat *quality)
Definition: NMFquality.cu:72
#define SMALL_VALUE_TO_ADD_DENOMINATOR
Small value added to the denominator of a fraction to prevent division by zero.
Definition: NMFkernels.h:32
#define KERNEL
Defines the type of a kernel function.
#define CUDA_VALUE(X)
float cudafloat