testGPU

PURPOSE ^

LUKE - Test function for LUKE GPU computing

SYNOPSIS ^

This is a script file.

DESCRIPTION ^

LUKE - Test function for LUKE GPU computing

 by Y. Peysson (CEA-IRFM) <yves.peysson@cea.fr> and J. Decker (CEA-IRFM) <joan.decker@cea.fr>

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 %LUKE - Test function for LUKE GPU computing
0002 %
0003 % by Y. Peysson (CEA-IRFM) <yves.peysson@cea.fr> and J. Decker (CEA-IRFM) <joan.decker@cea.fr>
0004 %
0005 close all
0006 clc
0007 clear all
0008 %
0009 p_opt = 2;
0010 %
0011 flag_GPU = 0;
0012 %
0013 disp(['Test of the GPU calculations. Search GPU devices...']);
0014 disp('----------------------------------------------------------------------');
0015 %
0016 try
0017     for igpu = 1:length(gpuDevice),
0018         tic,reset(gpuDevice(igpu));%reset memory on each GPU unit (should be done to have the largest memory available)
0019         gpuStatus(igpu) = gpuDevice(igpu);%call each GPU unit to get the graphics card status, and the CUDA compatibility
0020         etime_init(igpu) = toc;
0021         %
0022         disp(['The initial phase (memory reset + CUDA compatibility) of the GPU card #',int2str(igpu),' last ',num2str(etime_init),' (s).']);
0023         disp('----------------------------------------------------------------------');
0024     end
0025     flag_GPU = 1;
0026 catch
0027     error(['No supported GPU device was found on this computer']);
0028 end
0029 %
0030 test_mode = input('GPU test mode (1): repetitive scan (same matrix), (2): progressive scan (matrix size is growing to the max. allowed by the GPU memory) ? ','s');
0031 matrix_type = input('Matrix type (1): all coefficients are set to 1, (2): random coefficients ? ','s');
0032 fun_mode = input('Operation type (1): dot-product between matrix coefficients, (2): full matrix product ? ','s');
0033 inputs_mode = input('Inputs type (1): basic, (2): basic + table of cells, (3): basic + table of cells and structures ? ','s');
0034 outputs_mode = input('Outputs type (0): none, (1): ones tensor, (2): two tensors, (3): one structure with two fields, (4): table of cells (5): all data ? ','s');
0035 %
0036 for igpu = 1:length(gpuDevice),%loop over the number of available GPU units
0037     if flag_GPU && gpuStatus(igpu).DeviceSupported,
0038         disp('----------------------------------------------------------------------');
0039         disp(['Calculations on the GPU unit #',int2str(igpu),' ''',gpuStatus.Name,''' are supported']);
0040         GPUmem = [num2str(round(gpuStatus.FreeMemory/1e6)),' MBytes'];
0041         disp(['Max. GPU free memory available : ',GPUmem]);
0042         disp('----------------------------------------------------------------------');
0043         %
0044         a = 2;%for parametric dependence of funGPU
0045         b = 3;%for parametric dependence of funGPU
0046         %
0047         if strcmp(fun_mode,'2'),
0048             a = a + i;
0049         end
0050         %
0051         c{1} = pi;c{2} = 3.0*i;c{3} = 2.0;
0052         d(1).a = 4.0;d(2).a = 3.0;d(1).b = 1.0+i;
0053         %
0054         if strcmp(inputs_mode,'1'),
0055             nmax_single = round(sqrt(gpuStatus.FreeMemory/8)/3);%the factor three comes from the number input variables in funGPU.m
0056             nmax_double = round(sqrt(gpuStatus.FreeMemory/16)/3);%the factor three comes from the number input variables in funGPU.m
0057         elseif strcmp(inputs_mode,'2'),
0058             nmax_single = round(sqrt(gpuStatus.FreeMemory/8)/3);%the factor three comes from the number input variables in funGPU.m
0059             nmax_double = round(sqrt(gpuStatus.FreeMemory/16)/3);%the factor three comes from the number input variables in funGPU.
0060         elseif strcmp(inputs_mode,'3'),
0061             nmax_single = round(sqrt(gpuStatus.FreeMemory/8)/3);%the factor three comes from the number input variables in funGPU.m
0062             nmax_double = round(sqrt(gpuStatus.FreeMemory/16)/3);%the factor three comes from the number input variables in funGPU.m
0063         else
0064             inputs_mode = '1';
0065             nmax_single = round(sqrt(gpuStatus.FreeMemory/8)/3);%the factor three comes from the number input variables in funGPU.m
0066             nmax_double = round(sqrt(gpuStatus.FreeMemory/16)/3);%the factor three comes from the number input variables in funGPU.m
0067         end
0068         %
0069         % Single precision calculations
0070         %
0071         nm_single = [100:100:nmax_single];
0072         %
0073         if strcmp(test_mode,'1'),
0074             nm_single = ones(size(nm_single))*round(nmax_single/2);
0075         end
0076         %
0077         etime = zeros(1,length(nm_single));
0078         etime_single_GPU = zeros(1,length(nm_single));
0079         %etime_single_arrayfun_GPU = zeros(1,length(nm_single));
0080         sizeX_single = zeros(1,length(nm_single));
0081         %
0082         if strcmp(matrix_type,'1'),
0083             X = ones(round(nmax_single),'single');
0084         else
0085             X = rand(round(nmax_single),'single');    
0086         end
0087         %
0088         for im = 1:length(nm_single),
0089             %
0090             if strcmp(test_mode,'2'),
0091                 if strcmp(matrix_type,'1'),
0092                     X = ones(nm_single(im),'single');  
0093                 else
0094                     X = rand(nm_single(im),'single');    
0095                 end    
0096             end
0097             %
0098             sizeX = whos('X');
0099             sizeX_single(im) = sizeX.bytes/1e6;%Mbytes
0100             %
0101             if strcmp(inputs_mode,'1'),
0102                 tic,[Ys0,Zs0,Ts0,Ws0] = funGPU(X,a,b);etime_single(im) = toc;
0103             elseif strcmp(inputs_mode,'2'),
0104                 tic,[Ys0,Zs0,Ts0,Ws0] = funGPU(X,a,b,c);etime_single(im) = toc;
0105             elseif strcmp(inputs_mode,'3'),
0106                 tic,[Ys0,Zs0,Ts0,Ws0] = funGPU(X,a,b,c,d);etime_single(im) = toc;
0107             end
0108             %
0109             %if strcmp(inputs_mode,'1'),
0110             %   tic,[Y_GPU,Z_GPU,T_GPU] = arrayfun(@funGPU,gpuArray(X),gpuArray.ones(size(X))*a,gpuArray.ones(size(X))*b);%
0111             %end
0112             %
0113             %if outputs_mode == 1,
0114             %    Ys1 = gather(Y_GPU);
0115             %elseif outputs_mode == 2,
0116             %    Ys1 = gather(Y_GPU);
0117             %    Zs1 = gather(Z_GPU);
0118             %elseif outputs_mode == 3,
0119             %    Ts1 = gather(T_GPU);
0120             %elseif outputs_mode == 4,
0121             %    Ys1 = gather(Y_GPU);
0122             %    Zs1 = gather(Z_GPU);
0123             %    Ts1 = gather(T_GPU);
0124             %end
0125             %
0126             %etime_single_arrayfun_GPU(im) = toc;
0127             %
0128             if strcmp(inputs_mode,'1'),
0129                 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b);
0130             elseif strcmp(inputs_mode,'2'), 
0131                 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b,c);
0132             elseif strcmp(inputs_mode,'3'), 
0133                 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b,c,d);
0134             end
0135             %
0136             if strcmp(outputs_mode,'1'),
0137                 Ys1 = gather(Y_GPU);
0138             elseif strcmp(outputs_mode,'2'),
0139                 Ys1 = gather(Y_GPU);
0140                 Zs1 = gather(Z_GPU);
0141             elseif strcmp(outputs_mode,'3'),
0142                 Ts1 = gather(T_GPU);
0143             elseif strcmp(outputs_mode,'4'),
0144                 Ws1 = gather(W_GPU);
0145             elseif strcmp(outputs_mode,'5'),
0146                 Ys1 = gather(Y_GPU);
0147                 Zs1 = gather(Z_GPU);
0148                 Ts1 = gather(T_GPU);
0149                 Ws1 = gather(W_GPU);
0150             end
0151             %
0152             etime_single_GPU(im) = toc;
0153             %
0154             disp(['Single precision calculations, iteration #',int2str(im),'/',int2str(length(nm_single)),', tfun = ',num2str(etime_single(im)),', tfunGPU = ',num2str(etime_single_GPU(im))])
0155             %
0156         end
0157         %
0158         gain_single_GPU = etime_single./etime_single_GPU;
0159         %gain_single_arrayfun_GPU = etime_single./etime_single_arrayfun_GPU;
0160         %
0161         % Double precision calculations
0162         %
0163         nm_double = [100:100:nmax_double];
0164         %
0165         if strcmp(test_mode,'1'),
0166             nm_double = ones(size(nm_double))*round(nmax_double/2);
0167         end
0168         %
0169         etime = zeros(1,length(nm_double));
0170         etime_double_GPU = zeros(1,length(nm_double));
0171         %etime_double_arrayfun_GPU = zeros(1,length(nm_double));
0172         sizeX_double = zeros(1,length(nm_double));
0173         %
0174         if strcmp(matrix_type,'1'),
0175             X = ones(round(nmax_double),'double');
0176         else
0177             X = rand(round(nmax_double),'double');    
0178         end
0179         %
0180         for im = 1:length(nm_double),
0181             %
0182             if strcmp(test_mode,'2'),
0183                 if strcmp(matrix_type,'1'),
0184                     X = ones(nm_double(im),'double');  
0185                 else
0186                     X = rand(nm_double(im),'double');    
0187                 end 
0188             end
0189             %
0190             sizeX = whos('X');
0191             sizeX_double(im) = sizeX.bytes/1e6;%Mbytes
0192             %
0193             if strcmp(inputs_mode,'1'),
0194                 tic,[Yd0,Zd0,Td0,Wd0] = funGPU(X,a,b);etime_double(im) = toc;
0195             elseif strcmp(inputs_mode,'2'),
0196                 tic,[Yd0,Zd0,Td0,Wd0] = funGPU(X,a,b,c);etime_double(im) = toc;
0197             elseif strcmp(inputs_mode,'3'),
0198                 tic,[Yd0,Zd0,Td0,Wd0] = funGPU(X,a,b,c,d);etime_double(im) = toc;
0199             end
0200             %
0201             %if strcmp(inputs_mode,'1'),
0202             %     tic,[Y_GPU,Z_GPU,T_GPU] = arrayfun(@funGPU,gpuArray(X),gpuArray.ones(size(X))*a,gpuArray.ones(size(X))*b);%
0203             %end
0204             %
0205             %if outputs_mode == 1,
0206             %    Y = gather(Y_GPU);
0207             %elseif outputs_mode == 2,
0208             %    Y = gather(Y_GPU);
0209             %    Z = gather(Z_GPU);
0210             %elseif outputs_mode == 3,
0211             %    T = gather(T_GPU);
0212             %elseif outputs_mode == 4,
0213             %    Y = gather(Y_GPU);
0214             %    Z = gather(Z_GPU);
0215             %    T = gather(T_GPU);
0216             %end
0217             %
0218             %etime_double_arrayfun_GPU(im) = toc;
0219             %
0220             if strcmp(inputs_mode,'1'),
0221                 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b);
0222             elseif strcmp(inputs_mode,'2'), 
0223                 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b,c);
0224             elseif strcmp(inputs_mode,'3'), 
0225                 tic,[Y_GPU,Z_GPU,T_GPU,W_GPU] = funGPU(gpuArray(X),a,b,c,d);
0226             end
0227             %
0228             if strcmp(outputs_mode,'1'),
0229                 Yd1 = gather(Y_GPU);
0230             elseif strcmp(outputs_mode,'2'),
0231                 Yd1 = gather(Y_GPU);
0232                 Zd1 = gather(Z_GPU);
0233             elseif strcmp(outputs_mode,'3'),
0234                 Td1 = gather(T_GPU);
0235             elseif strcmp(outputs_mode,'4'),
0236                 Wd1 = gather(W_GPU);
0237             elseif strcmp(outputs_mode,'5'),
0238                 Yd1 = gather(Y_GPU);
0239                 Zd1 = gather(Z_GPU);
0240                 Td1 = gather(T_GPU);
0241                 Wd1 = gather(W_GPU);
0242             end
0243             %
0244             etime_double_GPU(im) = toc;
0245             %
0246             disp(['Double precision calculations, iteration #',int2str(im),'/',int2str(length(nm_double)),', tfun = ',num2str(etime_double(im)),', tfunGPU = ',num2str(etime_double_GPU(im))])
0247             %
0248         end
0249         %
0250         gain_double_GPU = etime_double./etime_double_GPU;
0251         %gain_double_arrayfun_GPU = etime_double./etime_double_arrayfun_GPU;
0252         %
0253         dkepath = load_structures_yp('dkepath','','');
0254         %
0255         for im = 1:length(nm_single),
0256             %
0257             if strcmp(test_mode,'2'),
0258                 if strcmp(matrix_type,'1'),
0259                     Xmdce{im} = ones(nm_single(im),'single');  
0260                 else
0261                     Xmdce{im} = rand(nm_single(im),'single');    
0262                 end 
0263             end
0264         end
0265         %
0266         mdce_mode = 0;
0267         %
0268         clustermode.funGPU.scheduler.mode = real(mdce_mode);
0269         if ~isreal(mdce_mode),
0270             clustermode.funGPU.scheduler.gpu = imag(mdce_mode);
0271         else
0272             clustermode.funGPU.scheduler.gpu = 0;
0273         end
0274         %
0275         dkecluster = clustermode_luke(clustermode,'funGPU',dkepath);
0276         %
0277         for j = 1:length(nm_single),
0278             tic,[flag,Y_GPU,Z_GPU,T_GPU,W_GPU] = mdce_run(@funGPU,{NaN,a,b},1,Xmdce(1:j),dkecluster);etime_single_tot(j) = toc;
0279         end
0280         %
0281         mdce_mode = i;
0282         %
0283         clustermode.funGPU.scheduler.mode = real(mdce_mode);
0284         if ~isreal(mdce_mode),
0285             clustermode.funGPU.scheduler.gpu = imag(mdce_mode);
0286         else
0287             clustermode.funGPU.scheduler.gpu = 0;
0288         end
0289         %
0290         dkecluster = clustermode_luke(clustermode,'funGPU',dkepath);
0291         %
0292         for j = 1:length(nm_single),
0293             tic,[flag,Y_GPU,Z_GPU,T_GPU,W_GPU] = mdce_run(@funGPU,{NaN,a,b},1,Xmdce(1:j),dkecluster);etime_single_GPU_tot(j) = toc;
0294         end
0295         %
0296         disp(['Single precision calculations, ',int2str(length(nm_single)),' iterations, tfun_mdce = ',num2str(etime_single_tot(end)),', tfunGPU_mdce = ',num2str(etime_single_GPU_tot(end))])
0297         %
0298         if strcmp(matrix_type,'2'),
0299             figure('Name','Random matrix size'),
0300             graph1D_jd(nm_single,gain_single_GPU,0,0,'Random number matrix size: (n*n)','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'-','none','r',2,20,gca,0.9,0.7,0.7);
0301             %graph1D_jd(nm_single,gain_single_arrayfun_GPU,0,0,'Random number matrix size: (n*n)','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'--','none','r',2,20,gca,0.9,0.7,0.7);
0302             graph1D_jd(nm_double,gain_double_GPU,0,0,'','','','Double',NaN,NaN,'-','none','b',2,20,gca,0.9,0.7,0.7);hold on
0303             %graph1D_jd(nm_double,gain_double_arrayfun_GPU,0,0,'','','','Double',NaN,NaN,'--','none','b',2,20,gca,0.9,0.7,0.7);hold on
0304             %legend(['Single              ';'Single with Arrayfun';'Double              ';'Double with Arrayfun']);
0305             legend(['Single';'Double']);
0306             %
0307             print_jd(p_opt,'fig_GPU_MatrixSize','./figures',1)
0308             %
0309             figure('Name','Random matrix memory size')
0310             graph1D_jd(sizeX_single,gain_single_GPU,0,0,'Random number matrix (MBytes)','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'-','none','r',2,20,gca,0.9,0.7,0.7);
0311             %graph1D_jd(sizeX_single,gain_single_arrayfun_GPU,0,0,'Random number matrix (MBytes)','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'--','none','r',2,20,gca,0.9,0.7,0.7);
0312             graph1D_jd(sizeX_double,gain_double_GPU,0,0,'','','','Double',NaN,NaN,'-','none','b',2,20,gca,0.9,0.7,0.7);hold on
0313             %graph1D_jd(sizeX_double,gain_double_arrayfun_GPU,0,0,'','','','Double',NaN,NaN,'--','none','b',2,20,gca,0.9,0.7,0.7);hold on
0314             %legend(['Single              ';'Single with Arrayfun';'Double              ';'Double with Arrayfun']);
0315             legend(['Single';'Double']);
0316             %
0317             print_jd(p_opt,'fig_GPU_MatrixMemorySize','./figures',1)
0318         else
0319             figure('Name','Uniform matrix size'),
0320             graph1D_jd([1:length(gain_single_GPU)],gain_single_GPU,0,0,'Number of iterations','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'-','none','r',2,20,gca,0.9,0.7,0.7);
0321             %graph1D_jd([1:length(gain_single_arrayfun_GPU)],gain_single_arrayfun_GPU,0,0,'Number of trials','GPU speed gain',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'--','none','r',2,20,gca,0.9,0.7,0.7);
0322             graph1D_jd([1:length(gain_double_GPU)],gain_double_GPU,0,0,'','','','Double',NaN,NaN,'-','none','b',2,20,gca,0.9,0.7,0.7);hold on
0323             %graph1D_jd([1:length(gain_double_arrayfun_GPU)],gain_double_arrayfun_GPU,0,0,'','','','Double',NaN,NaN,'--','none','b',2,20,gca,0.9,0.7,0.7);hold on
0324             %legend(['Single              ';'Single with Arrayfun';'Double              ';'Double with Arrayfun']);
0325             legend(['Single';'Double']);
0326             %
0327             print_jd(p_opt,'fig_GPU_MatrixSize','./figures',1)
0328         end
0329         %
0330         figure('Name','Cumulative time'),
0331         graph1D_jd(1:length(nm_single),cumsum(etime_single),0,0,'Iteration number (n/100)','Cumulative time (s)',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'-','none','r',2,20,gca,0.9,0.7,0.7);
0332         graph1D_jd(1:length(nm_single),etime_single_tot,0,0,'Iteration number (n/100)','Cumulative time (s)',[gpuStatus.Name,', ',GPUmem],'Single',NaN,NaN,'--','none','r',2,20,gca,0.9,0.7,0.7);
0333         graph1D_jd(1:length(nm_single),etime_init(1)+cumsum(etime_single_GPU),0,0,'','','','Double',NaN,NaN,'-','none','b',2,20,gca,0.9,0.7,0.7);hold on
0334         graph1D_jd(1:length(nm_single),etime_single_GPU_tot,0,0,'','','','Double',NaN,NaN,'--','none','b',2,20,gca,0.9,0.7,0.7);hold on
0335         legend(['CPU - Single      ';'CPU(LUKE) - Single';'GPU - Single      ';'GPU(LUKE) - Single']);
0336         %
0337         print_jd(p_opt,'fig_GPU_Cumulative_time','./figures',1)
0338         %
0339         filename = ['GPU_test_',test_mode,'_matrix_',matrix_type,'_fun_',fun_mode,'_inputs_',inputs_mode,'_outputs_',outputs_mode,'.mat'];
0340         save(filename,'etime_init','nm_single','etime_single','etime_single_GPU','nm_double','etime_double','etime_double_GPU');
0341         disp(['Results saved in ',filename]);
0342         %
0343     else
0344         disp('----------------------------------------------------------------------');
0345         error(['The GPU calculations on the ',gpuStatus.Name,' graphic card is not supported']);
0346     end
0347 end
0348 
0349

Community support and wiki are available on Redmine. Last update: 18-Apr-2019.