0001
0002
0003
0004
0005 close all
0006 clc
0007 clear all
0008
0009 p_opt = 2;
0010
0011 flag_GPU = 0;
0012
0013 disp(['Test of the GPU calculations. Search GPU devices...']);
0014 disp('----------------------------------------------------------------------');
0015
0016 try
0017 for igpu = 1:length(gpuDevice),
0018 tic,reset(gpuDevice(igpu));
0019 gpuStatus(igpu) = gpuDevice(igpu);
0020 etime_init(igpu) = toc;
0021
0022 disp(['The initial phase (memory reset + CUDA compatibility) of the GPU card #',int2str(igpu),' last ',num2str(etime_init),' (s).']);
0023 disp('----------------------------------------------------------------------');
0024 end
0025 flag_GPU = 1;
0026 catch
0027 error(['No supported GPU device was found on this computer']);
0028 end
0029
0030 test_mode = input('GPU test mode (1): repetitive scan (same matrix), (2): progressive scan (matrix size is growing to the max. allowed by the GPU memory) ? ','s');
0031 matrix_type = input('Matrix type (1): all coefficients are set to 1, (2): random coefficients ? ','s');
0032 fun_mode = input('Operation type (1): dot-product between matrix coefficients, (2): full matrix product ? ','s');
0033 inputs_mode = input('Inputs type (1): basic, (2): basic + table of cells, (3): basic + table of cells and structures ? ','s');
0034 outputs_mode = input('Outputs type (0): none, (1): ones tensor, (2): two tensors, (3): one structure with two fields, (4): table of cells (5): all data ? ','s');
0035
0036 for igpu = 1:length(gpuDevice),
0037 if flag_GPU && gpuStatus(igpu).DeviceSupported,
0038 disp('----------------------------------------------------------------------');
0039 disp(['Calculations on the GPU unit #',int2str(igpu),' ''',gpuStatus.Name,''' are supported']);
0040 GPUmem = [num2str(round(gpuStatus.FreeMemory/1e6)),' MBytes'];
0041 disp(['Max. GPU free memory available : ',GPUmem]);
0042 disp('----------------------------------------------------------------------');
0043
0044 a = 2;
0045 b = 3;
0046
0047 if strcmp(fun_mode,'2'),
0048 a = a + i;
0049 end
0050
0051 c{1} = pi;c{2} = 3.0*i;c{3} = 2.0;
0052 d(1).a = 4.0;d(2).a = 3.0;d(1).b = 1.0+i;
0053
0054 if strcmp(inputs_mode,'1'),
0055 nmax_single = round(sqrt(gpuStatus.FreeMemory/8)/3);
0056 nmax_double = round(sqrt(gpuStatus.FreeMemory/16)/3);
0057 elseif strcmp(inputs_mode,'2'),
0058 nmax_single = round(sqrt(gpuStatus.FreeMemory/8)/3);
0059 nmax_double = round(sqrt(gpuStatus.FreeMemory/16)/3);
0060 elseif strcmp(inputs_mode,'3'),
0061 nmax_single = round(sqrt(gpuStatus.FreeMemory/8)/3);
0062 nmax_double = round(sqrt(gpuStatus.FreeMemory/16)/3);
0063 else
0064 inputs_mode = '1';
0065 nmax_single = round(sqrt(gpuStatus.FreeMemory/8)/3);
0066 nmax_double = round(sqrt(gpuStatus.FreeMemory/16)/3);
0067 end
0068
0069
0070
0071 nm_single = [100:100:nmax_single];
0072
0073 if strcmp(test_mode,'1'),
0074 nm_single = ones(size(nm_single))*round(nmax_single/2);
0075 end
0076
0077 etime = zeros(1,length(nm_single));
0078 etime_single_GPU = zeros(1,length(nm_single));
0079
0080 sizeX_single = zeros(1,length(nm_single));
0081
0082 if strcmp(matrix_type,'1'),
0083 X = ones(round(nmax_single),'single');
0084 else
0085 X = rand(round(nmax_single),'single');
0086 end
0087
0088 for im = 1:length(nm_single),
0089
0090 if strcmp(test_mode,'2'),
0091 if strcmp(matrix_type,'1'),
0092 X = ones(nm_single(im),'single');
0093 else
0094 X = rand(nm_single(im),'single');
0095 end
0096 end
0097
0098 sizeX = whos('X');
0099 sizeX_single(im) = sizeX.bytes/1e6;
0100
0101 if strcmp(inputs_mode,'1'),
0102 tic,[Ys0,Zs0,Ts0,Ws0] = funGPU(X,a,b);etime_single(im) = toc;
0103 elseif strcmp(inputs_mode,'2'),
0104 tic,[Ys0,Zs0,Ts0,Ws0] = funGPU(X,a,b,c);etime_single(im) = toc;
0105 elseif strcmp(inputs_mode,'3'),
0106 tic,[Ys0,Zs0,Ts0,Ws0] = funGPU(X,a,b,c,d);etime_single(im) = toc;
0107 end
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128 if strcmp(inputs_mode,'1'),
0129 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b);
0130 elseif strcmp(inputs_mode,'2'),
0131 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b,c);
0132 elseif strcmp(inputs_mode,'3'),
0133 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b,c,d);
0134 end
0135
0136 if strcmp(outputs_mode,'1'),
0137 Ys1 = gather(Y_GPU);
0138 elseif strcmp(outputs_mode,'2'),
0139 Ys1 = gather(Y_GPU);
0140 Zs1 = gather(Z_GPU);
0141 elseif strcmp(outputs_mode,'3'),
0142 Ts1 = gather(T_GPU);
0143 elseif strcmp(outputs_mode,'4'),
0144 Ws1 = gather(W_GPU);
0145 elseif strcmp(outputs_mode,'5'),
0146 Ys1 = gather(Y_GPU);
0147 Zs1 = gather(Z_GPU);
0148 Ts1 = gather(T_GPU);
0149 Ws1 = gather(W_GPU);
0150 end
0151
0152 etime_single_GPU(im) = toc;
0153
0154 disp(['Single precision calculations, iteration #',int2str(im),'/',int2str(length(nm_single)),', tfun = ',num2str(etime_single(im)),', tfunGPU = ',num2str(etime_single_GPU(im))])
0155
0156 end
0157
0158 gain_single_GPU = etime_single./etime_single_GPU;
0159
0160
0161
0162
0163 nm_double = [100:100:nmax_double];
0164
0165 if strcmp(test_mode,'1'),
0166 nm_double = ones(size(nm_double))*round(nmax_double/2);
0167 end
0168
0169 etime = zeros(1,length(nm_double));
0170 etime_double_GPU = zeros(1,length(nm_double));
0171
0172 sizeX_double = zeros(1,length(nm_double));
0173
0174 if strcmp(matrix_type,'1'),
0175 X = ones(round(nmax_double),'double');
0176 else
0177 X = rand(round(nmax_double),'double');
0178 end
0179
0180 for im = 1:length(nm_double),
0181
0182 if strcmp(test_mode,'2'),
0183 if strcmp(matrix_type,'1'),
0184 X = ones(nm_double(im),'double');
0185 else
0186 X = rand(nm_double(im),'double');
0187 end
0188 end
0189
0190 sizeX = whos('X');
0191 sizeX_double(im) = sizeX.bytes/1e6;
0192
0193 if strcmp(inputs_mode,'1'),
0194 tic,[Yd0,Zd0,Td0,Wd0] = funGPU(X,a,b);etime_double(im) = toc;
0195 elseif strcmp(inputs_mode,'2'),
0196 tic,[Yd0,Zd0,Td0,Wd0] = funGPU(X,a,b,c);etime_double(im) = toc;
0197 elseif strcmp(inputs_mode,'3'),
0198 tic,[Yd0,Zd0,Td0,Wd0] = funGPU(X,a,b,c,d);etime_double(im) = toc;
0199 end
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220 if strcmp(inputs_mode,'1'),
0221 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b);
0222 elseif strcmp(inputs_mode,'2'),
0223 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b,c);
0224 elseif strcmp(inputs_mode,'3'),
0225 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b,c,d);
0226 end
0227
0228 if strcmp(outputs_mode,'1'),
0229 Yd1 = gather(Y_GPU);
0230 elseif strcmp(outputs_mode,'2'),
0231 Yd1 = gather(Y_GPU);
0232 Zd1 = gather(Z_GPU);
0233 elseif strcmp(outputs_mode,'3'),
0234 Td1 = gather(T_GPU);
0235 elseif strcmp(outputs_mode,'4'),
0236 Wd1 = gather(W_GPU);
0237 elseif strcmp(outputs_mode,'5'),
0238 Yd1 = gather(Y_GPU);
0239 Zd1 = gather(Z_GPU);
0240 Td1 = gather(T_GPU);
0241 Wd1 = gather(W_GPU);
0242 end
0243
0244 etime_double_GPU(im) = toc;
0245
0246 disp(['Double precision calculations, iteration #',int2str(im),'/',int2str(length(nm_double)),', tfun = ',num2str(etime_double(im)),', tfunGPU = ',num2str(etime_double_GPU(im))])
0247
0248 end
0249
0250 gain_double_GPU = etime_double./etime_double_GPU;
0251
0252
0253 dkepath = load_structures_yp('dkepath','','');
0254
0255 for im = 1:length(nm_single),
0256
0257 if strcmp(test_mode,'2'),
0258 if strcmp(matrix_type,'1'),
0259 Xmdce{im} = ones(nm_single(im),'single');
0260 else
0261 Xmdce{im} = rand(nm_single(im),'single');
0262 end
0263 end
0264 end
0265
0266 mdce_mode = 0;
0267
0268 clustermode.funGPU.scheduler.mode = real(mdce_mode);
0269 if ~isreal(mdce_mode),
0270 clustermode.funGPU.scheduler.gpu = imag(mdce_mode);
0271 else
0272 clustermode.funGPU.scheduler.gpu = 0;
0273 end
0274
0275 dkecluster = clustermode_luke(clustermode,'funGPU',dkepath);
0276
0277 for j = 1:length(nm_single),
0278 tic,[flag,Y_GPU,Z_GPU,T_GPU,W_GPU] = mdce_run(@funGPU,{NaN,a,b},1,Xmdce(1:j),dkecluster);etime_single_tot(j) = toc;
0279 end
0280
0281 mdce_mode = i;
0282
0283 clustermode.funGPU.scheduler.mode = real(mdce_mode);
0284 if ~isreal(mdce_mode),
0285 clustermode.funGPU.scheduler.gpu = imag(mdce_mode);
0286 else
0287 clustermode.funGPU.scheduler.gpu = 0;
0288 end
0289
0290 dkecluster = clustermode_luke(clustermode,'funGPU',dkepath);
0291
0292 for j = 1:length(nm_single),
0293 tic,[flag,Y_GPU,Z_GPU,T_GPU,W_GPU] = mdce_run(@funGPU,{NaN,a,b},1,Xmdce(1:j),dkecluster);etime_single_GPU_tot(j) = toc;
0294 end
0295
0296 disp(['Single precision calculations, ',int2str(length(nm_single)),' iterations, tfun_mdce = ',num2str(etime_single_tot(end)),', tfunGPU_mdce = ',num2str(etime_single_GPU_tot(end))])
0297
0298 if strcmp(matrix_type,'2'),
0299 figure('Name','Random matrix size'),
0300 graph1D_jd(nm_single,gain_single_GPU,0,0,'Random number matrix size: (n*n)','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'-','none','r',2,20,gca,0.9,0.7,0.7);
0301
0302 graph1D_jd(nm_double,gain_double_GPU,0,0,'','','','Double',NaN,NaN,'-','none','b',2,20,gca,0.9,0.7,0.7);hold on
0303
0304
0305 legend(['Single';'Double']);
0306
0307 print_jd(p_opt,'fig_GPU_MatrixSize','./figures',1)
0308
0309 figure('Name','Random matrix memory size')
0310 graph1D_jd(sizeX_single,gain_single_GPU,0,0,'Random number matrix (MBytes)','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'-','none','r',2,20,gca,0.9,0.7,0.7);
0311
0312 graph1D_jd(sizeX_double,gain_double_GPU,0,0,'','','','Double',NaN,NaN,'-','none','b',2,20,gca,0.9,0.7,0.7);hold on
0313
0314
0315 legend(['Single';'Double']);
0316
0317 print_jd(p_opt,'fig_GPU_MatrixMemorySize','./figures',1)
0318 else
0319 figure('Name','Uniform matrix size'),
0320 graph1D_jd([1:length(gain_single_GPU)],gain_single_GPU,0,0,'Number of iterations','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'-','none','r',2,20,gca,0.9,0.7,0.7);
0321
0322 graph1D_jd([1:length(gain_double_GPU)],gain_double_GPU,0,0,'','','','Double',NaN,NaN,'-','none','b',2,20,gca,0.9,0.7,0.7);hold on
0323
0324
0325 legend(['Single';'Double']);
0326
0327 print_jd(p_opt,'fig_GPU_MatrixSize','./figures',1)
0328 end
0329
0330 figure('Name','Cumulative time'),
0331 graph1D_jd(1:length(nm_single),cumsum(etime_single),0,0,'Iteration number (n/100)','Cumulative time (s)',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'-','none','r',2,20,gca,0.9,0.7,0.7);
0332 graph1D_jd(1:length(nm_single),etime_single_tot,0,0,'Iteration number (n/100)','Cumulative time (s)',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'--','none','r',2,20,gca,0.9,0.7,0.7);
0333 graph1D_jd(1:length(nm_single),etime_init(1)+cumsum(etime_single_GPU),0,0,'','','','Double',NaN,NaN,'-','none','b',2,20,gca,0.9,0.7,0.7);hold on
0334 graph1D_jd(1:length(nm_single),etime_single_GPU_tot,0,0,'','','','Double',NaN,NaN,'--','none','b',2,20,gca,0.9,0.7,0.7);hold on
0335 legend(['CPU - Single ';'CPU(LUKE) - Single';'GPU - Single ';'GPU(LUKE) - Single']);
0336
0337 print_jd(p_opt,'fig_GPU_Cumulative_time','./figures',1)
0338
0339 filename = ['GPU_test_',test_mode,'_matrix_',matrix_type,'_fun_',fun_mode,'_inputs_',inputs_mode,'_outputs_',outputs_mode,'.mat'];
0340 save(filename,'etime_init','nm_single','etime_single','etime_single_GPU','nm_double','etime_double','etime_double_GPU');
0341 disp(['Results saved in ',filename]);
0342
0343 else
0344 disp('----------------------------------------------------------------------');
0345 error(['The GPU calculations on the ',gpuStatus.Name,' graphic card is not supported']);
0346 end
0347 end
0348
0349