GPUMLib  0.2.2
GPU Machine Learning Library
EuclidianDistanceKernel.cu
1 /*
2  Ricardo Quintas is a MSc Student at the University of Coimbra, Portugal
3  Copyright (C) 2009, 2010 Ricardo Quintas
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "../Common/CudaDefinitions.h"
22 #include "RBFkernels.h"
23 
24 
25 /* KERNEL Euclidian Distance */
26 KERNEL EuclidianDistance(cudafloat *Output, int output_height, int output_width, cudafloat *Input, int input_width, cudafloat *Centers, int centers_width){
27 
28  int idx = threadIdx.y * output_width + threadIdx.x;
29 
30  int bx = blockIdx.x;
31  int by = blockIdx.y;
32 
33  int idnx = blockIdx.x*blockDim.x + threadIdx.x;
34  int idny = blockIdx.y*blockDim.y + threadIdx.y;
35 
36  if(idnx < output_width && idny < output_height){
37 
38  double sum = 0;
39 
40  double a;
41  double b;
42 
43  for(int i = 0; i < centers_width; i++){
44 
45  a = Centers[idnx * centers_width + i];
46  b = Input[idny + i * input_width];
47 
48  sum = sum + pow( a - b , 2);
49 
50  }
51 
52  Output[idnx + idny * output_width] = sqrt(sum);
53  }
54 }
55 
56 extern "C" void KernelEuclidianDistance(cudafloat *Output, int output_height, int output_width, cudafloat *Input, int input_width, cudafloat *Centers, int centers_width)
57 {
58  int blockSize = 16;
59 
60  int wBlocks = output_width/blockSize + ((output_width%blockSize == 0)?0:1);
61  int hBlocks = output_height/blockSize + ((output_height%blockSize == 0)?0:1);
62 
63  dim3 grid(wBlocks,hBlocks);
64  dim3 threads(blockSize,blockSize);
65  EuclidianDistance<<<grid,threads>>>(Output, output_height, output_width, Input, input_width, Centers, centers_width);
66 }
#define KERNEL
Defines the type of a kernel function.
void KernelEuclidianDistance(cudafloat *d_C, cudafloat *d_A, cudafloat *d_B, int uiWA, int uiWB, int uiWC, int uiHC)
float cudafloat