GPUMLib  0.2.2
GPU Machine Learning Library
KMeans.cpp
1 /*
2  Ricardo Quintas is an MSc Student at the University of Coimbra, Portugal
3  Copyright (C) 2009, 2010 Ricardo Quintas
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "KMeans.h"
22 #include "kmeanskernels.h"
23 
24 namespace GPUMLib {
25 
27  this->seed = (unsigned) time(0);
28 
29  changed_var.Value() = 1;
30  changed_bool.Value() = false;
31 }
32 
33 KMeans::~KMeans(){
34 }
35 
36 void KMeans::arrayShuffle(int *array, int length) {
37 
38  srand(this->seed);
39 
40  for(int i = 0; i < length; i++) {
41  int r = rand() % length;
42  int t = array[r];
43  array[r] = array[i];
44  array[i] = t;
45  }
46 }
47 
48 bool sort_pred(const my_pair& left, const my_pair& right)
49 {
50  return left.second < right.second;
51 }
52 
53 
54 int KMeans::randint(int lowest, int highest){
55 
56  srand(this->seed);
57 
58  int range=(highest-lowest)+1;
59 
60  return lowest+int(range*rand()/(RAND_MAX + 1.0));
61 }
62 
63 
65 
66  int i; //,j;
67 
68  /*Random initialization of centers*/
69 
70  HostArray<int> used(Input.Rows());
71 
72  for(i = 0; i < Input.Rows(); i++) {
73  used[i]= i;
74  }
75 
76  arrayShuffle(used.Pointer(), used.Length());
77 
78  DeviceMatrix<float> dCenters(kneighbours,Input.Columns());
79 
80  for(int i = 0; i < kneighbours; i++){
81  cudaMemcpy(&(dCenters.Pointer()[i*dCenters.Columns()]),&(Input.Pointer()[used[i]*Input.Columns()]),sizeof(float)*Input.Columns(),cudaMemcpyDeviceToDevice);
82  }
83 
84  /*Selection of centers with k-means*/
85  bool changed = true;
86 
87  DeviceArray<float> UpperBounds(Input.Rows());
88  DeviceArray<int> CenterAttrib(Input.Rows());
89 
90  DeviceMatrix<float> DistanceBeetweenCenters(dCenters.Rows(),dCenters.Rows());
91 
92  KernelEuclidianDistance(DistanceBeetweenCenters.Pointer(),dCenters.Pointer(),dCenters.Pointer(),dCenters.Columns(),dCenters.Columns(),DistanceBeetweenCenters.Columns(),DistanceBeetweenCenters.Rows());
93 
94 
95  DeviceMatrix<float> InitialDistances(Input.Rows(),dCenters.Rows());
96  DeviceMatrix<float> Buffer(Input.Rows(),dCenters.Rows());
97  DeviceMatrix<float> LowerBounds(Input.Rows(),dCenters.Rows());
98 
99  KernelEuclidianDistance(InitialDistances.Pointer(),Input.Pointer(),dCenters.Pointer(),Input.Columns(),dCenters.Columns(),InitialDistances.Columns(),InitialDistances.Rows());
100  cudaMemcpy(LowerBounds.Pointer(),InitialDistances.Pointer(),sizeof(float)*InitialDistances.Elements(),cudaMemcpyDeviceToDevice);
101 
102  KernelCenterAttribution_Bounds(InitialDistances.Pointer(),InitialDistances.Rows(),InitialDistances.Columns(),CenterAttrib.Pointer(),UpperBounds.Pointer());
103 
104 
105  DeviceMatrix<float> NewCenters(dCenters.Rows(),dCenters.Columns());
106 
107  //Copy data to new centers, averaging data
108 
109  KernelCopyCenters2(NewCenters.Pointer(),NewCenters.Rows(),NewCenters.Columns(),Input.Pointer(),Input.Rows(),CenterAttrib.Pointer());
110 
111  DeviceArray<float> S(dCenters.Rows());
112 
113  DeviceArray<bool> R(Input.Rows());
114  cudaMemset(R.Pointer(),true,sizeof(bool)*R.Length());
115 
116 
117  KernelEuclidianDistance(DistanceBeetweenCenters.Pointer(),dCenters.Pointer(),NewCenters.Pointer(),dCenters.Columns(),NewCenters.Columns(),DistanceBeetweenCenters.Columns(),DistanceBeetweenCenters.Rows());
118 
119 
120  bool flag = true;
121 
122  while(changed){
123 
124  KernelS(DistanceBeetweenCenters.Pointer(),DistanceBeetweenCenters.Rows(),DistanceBeetweenCenters.Columns(),S.Pointer());
125 
126 
127  KernelStep3(Input.Pointer(),Input.Rows(),UpperBounds.Pointer(),S.Pointer(),R.Pointer(),CenterAttrib.Pointer(),LowerBounds.Pointer(),DistanceBeetweenCenters.Pointer(),InitialDistances.Pointer(),NewCenters.Pointer(),NewCenters.Rows(),NewCenters.Columns());
128 
129  KernelCopyCenters2(NewCenters.Pointer(),NewCenters.Rows(),NewCenters.Columns(),Input.Pointer(),Input.Rows(),CenterAttrib.Pointer());
130 
131 
132  changed = false;
133 
134  KernelReduce_bool(R.Pointer(),R.Pointer(),R.Length());
135 
136  if (cudaStreamQuery(streamChanged) == cudaSuccess) changed_bool.UpdateValueAsync(R.Pointer(), streamChanged);
137 
138  if(!changed_bool.Value()){
139  changed = true;
140  }
141 
142  KernelEuclidianDistance(DistanceBeetweenCenters.Pointer(),dCenters.Pointer(),NewCenters.Pointer(),dCenters.Columns(),NewCenters.Columns(),DistanceBeetweenCenters.Columns(),DistanceBeetweenCenters.Rows());
143  KernelStep5(Input.Rows(),UpperBounds.Pointer(),R.Pointer(),CenterAttrib.Pointer(),LowerBounds.Pointer(),DistanceBeetweenCenters.Pointer(),InitialDistances.Pointer(),NewCenters.Pointer(),NewCenters.Rows(),NewCenters.Columns());
144 
145  //replace each center
146  dCenters = NewCenters;
147 
148  }
149 
150  return dCenters;
151 
152 }
153 
154 
156 
157  int i; //,j;
158  /*Random initialization of centers*/
159 
160  int *used;
161  used = (int*) malloc(sizeof(int)*device_X.Rows());
162 
163  for(i = 0; i < device_X.Rows(); i++) {
164  used[i]= i;
165  }
166  arrayShuffle(used, device_X.Rows());
167 
168  DeviceMatrix<float> device_Centers(kneighbours,device_X.Columns());
169 
170  for(int i = 0; i < kneighbours; i++){
171  cudaMemcpy(&(device_Centers.Pointer()[i*device_Centers.Columns()]),&(device_X.Pointer()[used[i]*device_X.Columns()]),sizeof(float)*device_X.Columns(),cudaMemcpyDeviceToDevice);
172  }
173 
174  free(used);
175 
176  /*Selection of centers with k-means*/
177  bool changed = true;
178 
179  DeviceMatrix<float> device_output2(device_X.Rows(),device_Centers.Rows());
180 
181  DeviceArray<int>device_attrib_center(device_output2.Rows());
182  DeviceArray<int>device_attrib_center_old(device_output2.Rows());
183  DeviceArray<int>device_attrib_center_out(device_output2.Rows());
184 
185  cudaMemset(device_attrib_center_old.Pointer(),0,sizeof(int)*device_attrib_center_old.Length());
186 
187  DeviceMatrix<float> device_newCenters(device_Centers.Rows(),device_Centers.Columns());
188 
189  while(changed){
190  KernelEuclidianDistance(device_output2.Pointer(),device_X.Pointer(),device_Centers.Pointer(),device_X.Columns(),device_Centers.Columns(),device_output2.Columns(),device_output2.Rows());
191 
192  /*Attribution of samples to centers*/
193  cudaMemset(device_attrib_center.Pointer(),0,sizeof(int)*device_attrib_center.Length());
194  KernelCenterAttribution(device_output2.Pointer(),device_output2.Rows(), device_output2.Columns(),device_attrib_center.Pointer());
195 
196  cudaMemset(device_output2.Pointer(),0,sizeof(float)*device_output2.Elements());
197  KernelPrepareCenterCopy(device_output2.Pointer(),device_output2.Rows(),device_output2.Columns(),device_attrib_center.Pointer());
198 
199  /*Copy data to new centers, averaging data*/
200  cudaMemset(device_newCenters.Pointer(),0,sizeof(float)*device_newCenters.Elements());
201  KernelCopyCenters(device_newCenters.Pointer(),device_newCenters.Rows(),device_newCenters.Columns(),device_X.Pointer(),device_X.Rows(),device_attrib_center.Pointer(),device_output2.Pointer(),device_output2.Rows(), device_output2.Columns());
202 
203  /*Check if centers changed*/
204  changed = false;
205 
206  KernelReduce2(device_attrib_center_out.Pointer(),device_attrib_center.Pointer(),device_attrib_center_old.Pointer(),device_attrib_center.Length());
207 
208  if (cudaStreamQuery(streamChanged) == cudaSuccess) changed_var.UpdateValueAsync(device_attrib_center_out.Pointer(), streamChanged);
209 
210  if(changed_var.Value() > 0){
211  changed = true;
212  }
213 
214  cudaMemcpy(device_attrib_center_old.Pointer(),device_attrib_center.Pointer(),sizeof(int)*device_attrib_center.Length(),cudaMemcpyDeviceToDevice);
215  device_Centers = device_newCenters;
216 
217  }
218 
219  return device_Centers;
220 }
221 
222 }
void KernelReduce_bool(bool *output, bool *input, int n)
void UpdateValueAsync(Type *deviceValue, cudaStream_t stream)
void KernelCopyCenters2(cudafloat *Output, int output_height, int output_width, cudafloat *Input, int input_width, int *attrib_center)
DeviceMatrix< float > Execute(DeviceMatrix< float > &Input, int kneighbours)
Definition: KMeans.cpp:155
Create an array of any type, on the host, that automatically manages the memory used to hold its elem...
Definition: HostArray.h:40
void KernelReduce2(int *output, int *input, int *g_idata_old, int n)
void KernelCopyCenters(cudafloat *Output, int output_height, int output_width, cudafloat *Input, int input_width, int *attrib_center, cudafloat *Indexes, int idx_height, int idx_width)
void KernelS(cudafloat *Output, int output_height, int output_width, float *S)
Type * Pointer() const
Definition: BaseMatrix.h:88
int Columns() const
Definition: BaseMatrix.h:80
void KernelCenterAttribution(cudafloat *Output, int output_height, int output_width, int *attrib_center)
DeviceMatrix< float > Execute_TI(DeviceMatrix< float > &Input, int kneighbours)
Definition: KMeans.cpp:64
bool sort_pred(const my_pair &left, const my_pair &right)
Sorting predicate, for use with the sort function.
Definition: rbf.h:65
void KernelStep5(int input_height, float *Upperbounds, bool *R, int *CenterAttrib, float *LowerBounds, float *DistanceBeetweenCenters, float *InitialDistances, float *NewCenters, int centers_height, int centers_width)
KMeans()
Base class for the KMeans algorithm.
Definition: KMeans.cpp:26
int Rows() const
Definition: BaseMatrix.h:74
void KernelCenterAttribution_Bounds(cudafloat *Output, int output_height, int output_width, int *attrib_center, float *upperbound)
void KernelStep3(float *Input, int input_height, float *Upperbounds, float *S, bool *R, int *CenterAttrib, float *LowerBounds, float *DistanceBeetweenCenters, float *InitialDistances, float *NewCenters, int centers_height, int centers_width)
void KernelEuclidianDistance(cudafloat *d_C, cudafloat *d_A, cudafloat *d_B, int uiWA, int uiWB, int uiWC, int uiHC)
void KernelPrepareCenterCopy(cudafloat *Output, int output_height, int output_width, int *attrib_center)