0001 function [outGPU,outHost] = gpuBench()
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037 gpubench.checkMATLABVersion();
0038 gpubench.checkPCT();
0039
0040
0041
0042 hasGPU = parallel.gpu.GPUDevice.isAvailable();
0043 if ~hasGPU
0044 title = 'Continue without a GPU?';
0045 question = ['The GPU could not be used. ' ...
0046 'Do you wish to continue and collect results for your CPU?'];
0047 buttons = {'Collect CPU results', 'Stop'};
0048 answer = questdlg(question, title, buttons{:}, buttons{end});
0049 if ~strcmp(answer,buttons{1})
0050 warning( 'GPUBench:NoGPU', 'No GPU was available for GPUBench to use.' );
0051 return;
0052 end
0053 end
0054
0055
0056 release = regexp( version, 'R\d*[ab]', 'match' );
0057 gpuData = gpubench.PerformanceData( ...
0058 release{1}, ...
0059 gpubench.cpuinfo(), ...
0060 gpubench.gpuinfo(), ...
0061 now() );
0062 hostData = gpubench.PerformanceData( ...
0063 release{1}, ...
0064 gpubench.cpuinfo(), ...
0065 struct(), ...
0066 now() );
0067 hostData.IsHostData = true;
0068
0069
0070 doHost = (nargout~=1);
0071 numTasks = 6*(hasGPU+doHost);
0072 reps = 3;
0073 progressTitle = 'Running GPUBench...';
0074 gpubench.multiWaitbar( progressTitle, 0 );
0075
0076 if hasGPU
0077 gpuData = runBackslash( gpuData, reps, 'single', 'GPU', progressTitle, numTasks );
0078 gpuData = runBackslash( gpuData, reps, 'double', 'GPU', progressTitle, numTasks );
0079
0080 gpuData = runMTimes( gpuData, reps, 'single', 'GPU', progressTitle, numTasks );
0081 gpuData = runMTimes( gpuData, reps, 'double', 'GPU', progressTitle, numTasks );
0082
0083 gpuData = runFFT( gpuData, reps, 'single', 'GPU', progressTitle, numTasks );
0084 gpuData = runFFT( gpuData, reps, 'double', 'GPU', progressTitle, numTasks );
0085
0086
0087 end
0088
0089 if doHost
0090 hostData = runBackslash( hostData, reps, 'single', 'Host', progressTitle, numTasks );
0091 hostData = runBackslash( hostData, reps, 'double', 'Host', progressTitle, numTasks );
0092
0093 hostData = runMTimes( hostData, reps, 'single', 'Host', progressTitle, numTasks );
0094 hostData = runMTimes( hostData, reps, 'double', 'Host', progressTitle, numTasks );
0095
0096 hostData = runFFT( hostData, reps, 'single', 'Host', progressTitle, numTasks );
0097 hostData = runFFT( hostData, reps, 'double', 'Host', progressTitle, numTasks );
0098
0099
0100 end
0101
0102 gpubench.multiWaitbar( progressTitle, 'Close' );
0103
0104 if nargout
0105
0106 outGPU = gpuData;
0107 outHost = hostData;
0108 else
0109
0110 reportData = {};
0111 if hasGPU
0112 reportData{end+1} = gpuData;
0113 end
0114 if doHost
0115 reportData{end+1} = hostData;
0116 end
0117 web( gpuBenchReport( reportData{:} ) );
0118 end
0119
0120
0121
0122 function data = runFFT( data, reps, type, device, mainProgressTitle, numTasks )
0123
0124 safetyFactor = 6;
0125 sizes = getTestSizes( type, safetyFactor, device );
0126 times = inf( size( sizes ) );
0127 worstTime = 0;
0128
0129 progressTitle = sprintf( 'FFT (%s, %s)', device, type );
0130 progressTotal = sum(sizes);
0131 gpubench.multiWaitbar( progressTitle, 0 );
0132
0133 for ii=1:numel(sizes)
0134
0135 if tooCloseToTimeout( worstTime, device )
0136
0137 times(ii) = nan;
0138 continue;
0139 end
0140 N = sizes(ii);
0141 try
0142 A = complex( rand( N, 1, type ), rand( N, 1, type ) );
0143 if strcmpi( device, 'GPU' )
0144 A = gpuArray(A);
0145 end
0146
0147 for rr=1:reps
0148 t = tic();
0149 B = fft(A);
0150 elapsedTime = gtoc(t);
0151 times(ii) = min( times(ii), elapsedTime );
0152 worstTime = max( worstTime, elapsedTime );
0153 clear B;
0154
0155 inc = sizes(ii)/(reps*progressTotal);
0156 gpubench.multiWaitbar( progressTitle, 'Increment', inc );
0157 gpubench.multiWaitbar( mainProgressTitle, 'Increment', inc/numTasks );
0158 end
0159 catch err
0160
0161 times(ii) = nan;
0162 end
0163 end
0164 gpubench.multiWaitbar( progressTitle, 'Close' );
0165
0166
0167 sizes(isnan( times )) = [];
0168 times(isnan( times )) = [];
0169
0170 data = addResult( data, 'FFT', type, sizes, 5*sizes.*log2(sizes), times );
0171
0172
0173
0174 function data = runMTimes( data, reps, type, device, mainProgressTitle, numTasks )
0175 safetyFactor = 3.5;
0176 sizes = getTestSizes( type, safetyFactor, device );
0177
0178 times = inf( size( sizes ) );
0179 worstTime = 0;
0180
0181 progressTitle = sprintf( 'MTimes (%s, %s)', device, type );
0182 progressTotal = sum(sizes);
0183 gpubench.multiWaitbar( progressTitle, 0 );
0184
0185 N = round( sqrt( sizes ) );
0186 for ii=1:numel(sizes)
0187
0188 if tooCloseToTimeout( worstTime, device )
0189
0190 times(ii) = nan;
0191 continue;
0192 end
0193
0194 try
0195 A = rand( N(ii), N(ii), type );
0196 B = rand( N(ii), N(ii), type );
0197 if strcmpi( device, 'GPU' )
0198 A = gpuArray(A);
0199 B = gpuArray(B);
0200 end
0201 for rr=1:reps
0202 t = tic();
0203 C = A*B;
0204 elapsedTime = gtoc(t);
0205 times(ii) = min( times(ii), elapsedTime );
0206 worstTime = max( worstTime, elapsedTime );
0207 clear C;
0208
0209 inc = sizes(ii)/(reps*progressTotal);
0210 gpubench.multiWaitbar( progressTitle, 'Increment', inc );
0211 gpubench.multiWaitbar( mainProgressTitle, 'Increment', inc/numTasks );
0212 end
0213 catch err
0214
0215 times(ii) = nan;
0216 end
0217 end
0218 gpubench.multiWaitbar( progressTitle, 'Close' );
0219
0220
0221 N(isnan( times )) = [];
0222 times(isnan( times )) = [];
0223
0224 data = addResult( data, 'MTimes', type, N.*N, N.*N.*(2.*N-1), times );
0225
0226
0227
0228
0229 function data = runBackslash( data, reps, type, device, mainProgressTitle, numTasks )
0230 safetyFactor = 1.5;
0231 sizes = getTestSizes( type, safetyFactor, device );
0232
0233
0234 sizes(sizes>1e8) = [];
0235
0236 times = inf( size( sizes ) );
0237 worstTime = 0;
0238
0239 progressTitle = sprintf( 'Backslash (%s, %s)', device, type );
0240 progressTotal = sum(sizes);
0241 gpubench.multiWaitbar( progressTitle, 0 );
0242
0243 N = round( sqrt( sizes ) );
0244 for ii=1:numel(sizes)
0245
0246 if tooCloseToTimeout( worstTime, device )
0247
0248 times(ii) = nan;
0249 continue;
0250 end
0251 try
0252 A = 100*eye( N(ii), N(ii), type ) + rand( N(ii), N(ii), type );
0253 b = rand( N(ii), 1, type );
0254 if strcmpi( device, 'GPU' )
0255 A = gpuArray(A);
0256 b = gpuArray(b);
0257 end
0258 for rr=1:reps
0259 t = tic();
0260 C = A\b;
0261 elapsedTime = gtoc(t);
0262 times(ii) = min( times(ii), elapsedTime );
0263 worstTime = max( worstTime, elapsedTime );
0264 clear C;
0265
0266 inc = sizes(ii)/(reps*progressTotal);
0267 gpubench.multiWaitbar( progressTitle, 'Increment', inc );
0268 gpubench.multiWaitbar( mainProgressTitle, 'Increment', inc/numTasks );
0269 end
0270
0271 catch err
0272
0273 times(ii) = nan;
0274 end
0275 end
0276 gpubench.multiWaitbar( progressTitle, 'Close' );
0277
0278
0279 N(isnan( times )) = [];
0280 times(isnan( times )) = [];
0281
0282 data = addResult( data, 'Backslash', type, N.*N, round(2/3*N.^3 + 3/2*N.^2), times );
0283
0284
0285
0286 function data = runMandelbrot( data, reps, type, device, mainProgressTitle, numTasks )
0287
0288 safetyFactor = 3;
0289 sizes = getTestSizes( type, safetyFactor, device );
0290
0291 times = inf( size( sizes ) );
0292 worstTime = 0;
0293 numops = inf( size( sizes ) );
0294 maxIterations = 200;
0295 xlim = [-2, 0.5];
0296 ylim = [ -1.25, 1.25];
0297
0298 progressTitle = sprintf( 'Mandelbrot (%s, %s)', type, device );
0299 progressTotal = sum(sizes);
0300 gpubench.multiWaitbar( progressTitle, 0 );
0301
0302 for ii=1:numel(sizes)
0303 gridSize = round( sqrt( sizes(ii) ) );
0304
0305 if tooCloseToTimeout( worstTime, device )
0306
0307 times(ii) = nan;
0308 continue;
0309 end
0310 if strcmpi( device, 'GPU' )
0311 try
0312
0313 x = parallel.gpu.GPUArray.linspace( xlim(1), xlim(2), gridSize );
0314 y = parallel.gpu.GPUArray.linspace( ylim(1), ylim(2), gridSize );
0315 [xGrid,yGrid] = meshgrid( x, y );
0316
0317
0318 for rr=1:reps
0319 t = tic();
0320 count = arrayfun( @processMandelbrotElement, xGrid, yGrid, maxIterations );
0321 elapsedTime = gtoc(t);
0322 times(ii) = min( times(ii), elapsedTime );
0323 worstTime = max( worstTime, elapsedTime );
0324 end
0325
0326
0327
0328
0329
0330 numops(ii) = gather( sum(count(:)*12) );
0331
0332 clear count;
0333
0334 inc = sizes(ii)/(reps*progressTotal);
0335 gpubench.multiWaitbar( progressTitle, 'Increment', inc );
0336 gpubench.multiWaitbar( mainProgressTitle, 'Increment', inc/numTasks );
0337
0338 catch err
0339
0340 times(ii) = nan;
0341 end
0342
0343 else
0344
0345
0346 x = linspace( xlim(1), xlim(2), gridSize );
0347 y = linspace( ylim(1), ylim(2), gridSize );
0348 [xGrid,yGrid] = meshgrid( x, y );
0349 z0 = complex(xGrid,yGrid);
0350 t = tic();
0351 z = z0;
0352 count = zeros(size(z));
0353 for n = 1:maxIterations
0354 inside = ((real(z).^2 + imag(z).^2) <= 4);
0355 count = count + inside;
0356 z = z.*z + z0;
0357 end
0358 times(ii) = toc(t);
0359
0360
0361
0362
0363
0364
0365 numops(ii) = 12*numel(z)*maxIterations;
0366
0367
0368 gpubench.multiWaitbar( progressTitle, 'Increment', sizes(ii)/progressTotal );
0369 gpubench.multiWaitbar( mainProgressTitle, 'Increment', (sizes(ii)/progressTotal)/numTasks );
0370 end
0371 end
0372 gpubench.multiWaitbar( progressTitle, 'Close' );
0373
0374
0375 sizes(isnan( times )) = [];
0376 numops(isnan( times )) = [];
0377 times(isnan( times )) = [];
0378
0379 data = addResult( data, 'Mandelbrot', type, sizes, numops, times );
0380
0381
0382 function elapsedTime = gtoc( timer )
0383
0384 persistent hasWait;
0385 if isempty(hasWait)
0386 try
0387 wait(gpuDevice);
0388 hasWait = true;
0389 catch err
0390 hasWait = false;
0391 end
0392 elseif hasWait
0393 wait(gpuDevice);
0394 end
0395 elapsedTime = toc(timer);
0396
0397
0398 function sizes = getTestSizes( type, safetyFactor, device )
0399
0400 elementSize = gpubench.sizeof( type );
0401 if strcmpi( device, 'Host' )
0402
0403 safetyFactor = safetyFactor*2;
0404 end
0405
0406
0407 if parallel.gpu.GPUDevice.isAvailable()
0408 gpu = gpuDevice();
0409 freeMem = gpu.FreeMemory;
0410 else
0411
0412 freeMem = 4*2^30;
0413 end
0414 maxNumElements = floor( freeMem / (elementSize*safetyFactor) );
0415 if isnan( maxNumElements ) || maxNumElements < 1e6
0416 error( 'gpuBench:NotEnoughMemory', 'Not enough free device memory to run tasks' );
0417 end
0418
0419
0420 maxPower = floor( log2( maxNumElements ) );
0421 sizes = power( 2, 10:2:maxPower );
0422
0423
0424
0425 function stopNow = tooCloseToTimeout( time, device )
0426
0427 stopNow = false;
0428 if strcmpi( device, 'Host' )
0429
0430 else
0431 gpu = gpuDevice();
0432
0433
0434
0435 stopNow = (gpu.KernelExecutionTimeout && time>0.25);
0436 end
0437
0438
0439
0440
0441 function count = processMandelbrotElement(x0,y0,maxIterations)
0442
0443
0444
0445
0446
0447
0448
0449 z0 = complex(x0,y0);
0450 z = z0;
0451 count = 0;
0452 while (count < maxIterations) ...
0453 && ((real(z)*real(z) + imag(z)*imag(z)) <= 4)
0454 count = count + 1;
0455 z = z*z + z0;
0456 end