%% MAX (Single Precision Vector)
% Benchmark to test CPU and GPU computations and compare these.
%
% Torben Larsen, Aalborg University, Denmark. tl@es.aau.dk. 01-FEB-2010.
% http://www.accelereyes.com/wiki/index.php?title=Torbens_Corner
%
%     0.1:  01-FEB-2010   -Basic functionality. 
% 
% Remember to set the desired GPU immediately after starting MATLAB.
% Otherwise the default GPU will be used for the GPU part of the test.
%
% A '.mat' file is saved which contains the simulation information.
% In addition the date and time (TimeStamp) is saved in the file.
clear all;


%% USER INPUT
FCT   = 'MAX';                   % Function to be benchmarked
GPU   = 'FX3800';                % GPU type (9600MGT / Tesla / FX3800 / 9400M)
OS    = 'W7';                    % Operating system (OSX / Ubuntu / W7 / VST)
ATP   = 'V';                     % Array type (Matrix/Vector)
PRC   = 'SP';                    % Precision (Single Precision / Double Precision)
L_CPU = 'Core i7-975 3.33GHz';   % Legend for CPU type
L_GPU = 'Quadro FX-3800';        % Legend for GPU type


% Array sizes analyzed - from small to large. Matrices are square.
Size = [0.1,0.25,0.5,0.75,1,1.5,2:1:15]*1E6;


%% INITIALIZATION
% Set PRNG to ensure same starting state for reproducibility
RandStream.setDefaultStream(RandStream('mt19937ar','seed',1004397));

% Time stamp
Tmp = clock;
TimeStamp = ['DATE: ' num2str(Tmp(3),'%02i') '-' ...
    num2str(Tmp(2),'%02i') '-' num2str(Tmp(1)) ' ## ' ...
    ' TIME: ' num2str(Tmp(4),'%02i') '.' num2str(Tmp(5),'%02i')];


%% Benchmarking
%==========================================================================
count = 0;
for sz=Size
    fprintf('Problem size:   1 x %5.2f [M-]\n', sz/1E6);
    
    % Calculate number of runs, sz^2*NoRuns = MxSize^2
    NoRuns = min(round(10*max(Size)^2/sz^2),200);
    
    % Set counter and Size vector
    count = count + 1;
  
    % CPU based computations
    Ac = randn(1,sz,'single');
    Bc = gsingle(0.0);
    tstart1 = tic;
    for no=1:NoRuns
        Bc = max(Ac);                            % Compute SUM
    end
    T_CPU(count) = toc(tstart1)/NoRuns;          % Time in s
 
    % GPU based computations
    Ag = gsingle(Ac);
    Bg = gsingle(0.0);
    tstart2 = tic;
    for no=1:NoRuns
        Bg = max(Ag);                            % Compute SUM
        gforce(Bg);                              % Force computation of Bg
    end
    T_GPU(count) = toc(tstart2)/NoRuns;          % Time in s
end


%% SAVE DATA
Name = [FCT '_' GPU '_' OS '_' ATP PRC];
SaveCmd = ['save ' Name '.mat Size T_CPU T_GPU TimeStamp' ...
    ' FCT GPU OS ATP PRC L_CPU L_GPU'];
eval(SaveCmd);


%% PLOT RESULTS
%==========================================================================
% Set up title
if ATP=='V',
    ATPL = 'Vector';
else
    ATPL = 'Square Matrix';
end
if PRC=='SP',
    PRCL = 'Single Precision (SP)';
else
    PRCL = 'Double Precision (DP)';
end
F_TTL = [FCT ' - ' PRCL ', ' ATPL];

% Make figure
figure(2); clf(2);
subplot(2,1,1);
plot(Size/1E6, T_CPU*1E3, 'c-*', ...
    Size/1E6, T_GPU*1E3, 'm-x', 'LineWidth', 2);
grid;
title(F_TTL);
xlabel('Vector size   [M-]');
ylabel('Execution time   [ms]');
legend(L_CPU, L_GPU, 'Location', 'NorthWest');
 
subplot(2,1,2);
plot(Size/1E6, T_CPU ./ T_GPU, 'r-*', 'LineWidth', 2);
grid;
xlabel('Vector size   [M-]');
ylabel('Speed-up   [-]');


%% SAVE FIGURES
jpgFname = [Name '.jpg'];
print( gcf, '-djpeg99', '-r100', jpgFname );