GPUMLib  0.2.2
GPU Machine Learning Library
ResourceAllocatingNetwork.cpp
1 /*
2  Ricardo Quintas is an MSc Student at the University of Coimbra, Portugal
3  Copyright (C) 2009, 2010 Ricardo Quintas
4 
5  This file is part of GPUMLib.
6 
7  GPUMLib is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include "ResourceAllocatingNetwork.h"
22 
23 namespace GPUMLib {
24 
25 ResourceAllocatingNetwork::ResourceAllocatingNetwork(float scale_of_interest_max, float desired_accuracy, float overlap_factor, int Rows, int Columns, int NumClasses){
26 
27 
28  this->scale_of_interest_max = scale_of_interest_max;
29  // this->scale_of_interest_min = scale_of_interest_min;
30  this->scale_of_interest = scale_of_interest_max;
31 
32  //this->decay = decay;
33  this->desired_accuracy = desired_accuracy;
34 
35  //this->alpha = alpha;
36  this->overlap_factor = overlap_factor;
37 
38  this->MaxWidth = 1;
39 
40  this->start = clock();
41 
42  this->times[0] = 0;
43  this->times[1] = 0;
44  this->times[2] = 0;
45  this->times[3] = 0;
46 
47 
48  this->Columns = Columns;
49  this->NumCenters = 0;
50  this->NumMemoryItems = 1;
51  this->NumClasses = NumClasses;
52 
53 
54  this->dCenters = DeviceMatrix<float>(Rows,Columns);
55  this->dWeights = DeviceMatrix<float>(Rows,NumClasses);
56  this->dWidths = DeviceArray<float>(Rows);
57 
58  this->dIMemory = DeviceMatrix<float>(Rows,Columns);
59  this->dTMemory_2 = DeviceMatrix<float>(Rows,NumClasses);
60 
61  this->CounterCenters = (int*)malloc(sizeof(int)*Rows);
62  memset(this->CounterCenters,0,sizeof(int)*Rows);
63 
64 
65  cudaMalloc((void **) &(this->derror),sizeof(float));
66  cudaMalloc((void **) &(this->ddistance),sizeof(float));
67 
68 
69  cudaMalloc((void **) &result, NumClasses * sizeof(float));
70 
71 
72  this->Distances = DeviceArray<float>(Rows);
73  this->error_arr = DeviceArray<float>(NumClasses);
74 
75 
76 }
77 
78 ResourceAllocatingNetwork::~ResourceAllocatingNetwork(){
79  free(this->CounterCenters);
80  cudaFree(this->derror);
81  cudaFree(this->ddistance);
82 }
83 
84 
86 
87  DeviceMatrix<float> device_output2(device_X.Rows(),device_X.Rows());
88 
89  KernelEuclidianDistance(device_output2.Pointer(), device_output2.Rows(), device_output2.Columns(), device_X.Pointer(), device_X.Columns(), device_X.Pointer(), device_X.Columns());
90 
91  DeviceArray<float> dMin(device_output2.Rows());
92  DeviceArray<int> dIdx(device_output2.Rows());
93 
94  //Min
95  FindMin(device_output2.Pointer(),device_output2.Rows(), device_output2.Columns(),dMin.Pointer(),dIdx.Pointer(),Y.Pointer());
96 
97  HostArray<float> hMin(dMin);
98  //HostArray<int> hIdx(dIdx);
99 
100  std::sort(hMin.Pointer(), hMin.Pointer() + hMin.Length());
101 
102  //Median
103  MaxWidth = hMin[(int)(device_output2.Rows()/2)];
104 
105  return MaxWidth;
106 }
107 
108 void ResourceAllocatingNetwork::FindNearestCenter(cudafloat *Sample, int Length,float *value, int* index,cudafloat* output){
109  KernelFindNearestCenter(output, NumCenters, Sample, dCenters.Pointer(), dCenters.Columns(),value);
110 }
111 
113 
114  DeviceMatrix<float> output(NumCenters,NumClasses);
115 
116  KernelCalculateNetworkActivation(output.Pointer(),Sample,Length,dCenters.Pointer(),NumCenters,dWeights.Pointer(),NumClasses,dWidths.Pointer(),overlap_factor);
117  KernelSumActivations(output.Pointer(),NumClasses,NumCenters);
118 
119  cudaMemcpy(result,output.Pointer(),sizeof(float)*NumClasses,cudaMemcpyDeviceToDevice);
120 
121  return result;
122 }
123 
124 void ResourceAllocatingNetwork::AddCenter(cudafloat *Sample,int Length, float* Width, float* Weight){
125 
126  cudaMemcpy(&(dCenters.Pointer()[NumCenters*dCenters.Columns()]),Sample,sizeof(float)*Length,cudaMemcpyDeviceToDevice);
127  cudaMemcpy(&(dWeights.Pointer()[NumCenters*dWeights.Columns()]),Weight,sizeof(float)*NumClasses,cudaMemcpyDeviceToDevice);
128  cudaMemcpy(&(dWidths.Pointer()[NumCenters]),Width,sizeof(float),cudaMemcpyDeviceToDevice);
129 
130  // if(Width < MaxWidth)
131 
132  // else
133  // cudaMemcpy(&(dWidths.Pointer()[NumCenters]),&MaxWidth,sizeof(float),cudaMemcpyHostToDevice);
134 
135  CounterCenters[NumCenters] = 1;
136 
137  NumCenters = NumCenters + 1;
138 
139 }
140 
141 
142 void ResourceAllocatingNetwork::AddMemory(cudafloat *Sample,int Length, float* Target){
143 
144  cudaMemcpy(&(dIMemory.Pointer()[NumMemoryItems*dIMemory.Columns()]),Sample,sizeof(float)*Length,cudaMemcpyDeviceToDevice);
145  cudaMemcpy(&(dTMemory_2.Pointer()[NumMemoryItems*dTMemory_2.Columns()]),Target,sizeof(float)*NumClasses,cudaMemcpyDeviceToDevice);
146 
147  NumMemoryItems = NumMemoryItems + 1;
148 
149 }
150 
151 void ResourceAllocatingNetwork::UpdateWeights(cudafloat *Sample,int Length, float* Target){
152 
153  cudaMemcpy(dIMemory.Pointer(),Sample,sizeof(float)*Length,cudaMemcpyDeviceToDevice);
154  cudaMemcpy(dTMemory_2.Pointer(),Target,sizeof(float)*NumClasses,cudaMemcpyDeviceToDevice);
155 
156  DeviceMatrix<float> device_output2(NumMemoryItems,NumCenters,ColumnMajor);
157 
158  KernelActivationMatrix(device_output2.Pointer(), device_output2.Rows(), device_output2.Columns(),
159  dIMemory.Pointer(), dIMemory.Columns(),
160  dCenters.Pointer(), dCenters.Columns(),
161  dWidths.Pointer(),overlap_factor);
162 
163  UTILS::pseudoinverse2(device_output2);
164 
165  matmul(dWeights.Pointer(), device_output2.Pointer(), dTMemory_2.Pointer(),device_output2.Rows(),dTMemory_2.Columns(), dTMemory_2.Columns(), device_output2.Columns());
166 }
167 
168 void ResourceAllocatingNetwork::Train(cudafloat *Sample,int Length,float Target, cudafloat* dTargetArr){
169 
170  if(NumCenters == 0){
171 
172  float* dMaxWidth; cudaMalloc((void **) &dMaxWidth,sizeof(float));
173  cudaMemcpy(dMaxWidth,&MaxWidth,sizeof(float),cudaMemcpyHostToDevice);
174 
175  AddCenter(Sample,Length,dMaxWidth,dTargetArr);
176  AddMemory(Sample,Length,dTargetArr);
177 
178  cudaFree(dMaxWidth);
179 
180  return;
181  }
182 
183  //float distance;
184  int index;
185  //float error;
186  // UTILS::checkGpuMem("3");
187 
188  CalculateNetworkActivation(Sample,Length);
189 
190 
191 
192  FindNearestCenter(Sample,Length,ddistance,&index,Distances.Pointer());
193  var_distance.UpdateValue(ddistance);
194 
195  KernelCalculateError(result,dTargetArr,error_arr.Pointer(),NumClasses,derror);
196  var_error.UpdateValue(derror);
197 
198 
199  //std::cout << Target << " " << var_error.Value() << " " << var_distance.Value() << std::endl;
200 
201 
202  if(var_error.Value() > desired_accuracy && var_distance.Value() > scale_of_interest){
203 
204  AddCenter(Sample,Length,ddistance,error_arr.Pointer());
205  KernelUpdateWidths(dWidths.Pointer(),Distances.Pointer(),NumCenters-1);
206  AddMemory(Sample,Length,dTargetArr);
207 
208  }else{
209 
210  UpdateWeights(Sample,Length,dTargetArr);
211 
212  //run input through the network again (step 5 of IncrementaLearningofFeatureSpace-Seiichi)
213  CalculateNetworkActivation(Sample,Length);
214 
215  FindNearestCenter(Sample,Length,ddistance,&index,Distances.Pointer());
216  KernelCalculateError(result,dTargetArr,error_arr.Pointer(),NumClasses,derror);
217 
218  var_error.UpdateValue(derror);
219 
220  //std::cout << "--> error " << var_error.Value() << std::endl;
221 
222  if(var_error.Value() > desired_accuracy){
223 
224  AddCenter(Sample,Length,ddistance,error_arr.Pointer());
225  KernelUpdateWidths(dWidths.Pointer(),Distances.Pointer(),NumCenters-1);
226  AddMemory(Sample,Length,dTargetArr);
227 
228  }
229 
230 
231  }
232 
233 }
234 
235 }
Type * Pointer() const
Definition: BaseArray.h:70
float * CalculateNetworkActivation(cudafloat *Sample, int Length)
Type * Pointer() const
Definition: BaseMatrix.h:88
int Columns() const
Definition: BaseMatrix.h:80
ResourceAllocatingNetwork(float scale_of_interest_max, float desired_accuracy, float overlap_factor, int Rows, int Columns, int NumClasses)
int Rows() const
Definition: BaseMatrix.h:74
void Train(cudafloat *Sample, int Length, float Target, float *dTargetArr)
void KernelEuclidianDistance(cudafloat *d_C, cudafloat *d_A, cudafloat *d_B, int uiWA, int uiWB, int uiWC, int uiHC)
float cudafloat
float FindMaxWidth(DeviceMatrix< float > &X, DeviceMatrix< float > &Y)