%% TIMES (Matrix)
% Benchmark to test CPU and GPU computations and compare these.
%
% Torben Larsen, Aalborg University, Denmark. tl@es.aau.dk. 06-FEB-2010.
% http://www.accelereyes.com/wiki/index.php?title=Torben's_Corner
%
%     0.1:  02-FEB-2010   -Basic functionality.
%     0.2:  03-FEB-2010   -Control of execution/plot via RunBench.
%     0.3:  05-FEB-2010   -Fine tuning of testing procedure.
%     0.4:  06-FEB-2010   -Update of gforce'ing GPU data.
%
% The .m file MUST be named in the following way:
%
%    FCT_GPU_CPU_OS_ATP_PRC.m  >>  FCT_GPU_CPU_OS_M_PRC.m
%
% where:
%
%   FCT:     Function to be benchmarked
%   GPU:     GPU type (9600MGT / Tesla / FX3800 / 9400M / 9800GT / GTX260)
%   CPU:     CPU type (C2D28 / i7x975 / i7x820QM)
%   OS:      Operating system (OSX / Ubuntu / W7 / Vista / RHEL)
%   ATP:     Array type (MUST be M here - Matrix)
%   PRC:     Precision (SP or DP - Single Precision or Double Precision)
%
% Test procedure:
%   0. If you only want to make the figure of pre-run data, then set
%      RunBench=0 below. If you want to run a test, then set
%      RunBench=1, and go to the next step.
%   1. Start a fresh MATLAB.
%   2. Run "ginfo" - find the GPU to test and run "gselect(#); where
%      # is the number (0,1,2,...) of the GPU to be tested.
%   3. Run the file "FCT_GPU_CPU_OS_M_PRC".
%   4. Once again, run the file "FCT_GPU_CPU_OS_M_PRC".
%   5. The figure "FCT_GPU_CPU_OS_M_PRC.jpg" is now saved and the
%      simulation data is saved in the file "FCT_GPU_CPU_OS_M_PRC.mat".
%      A time stamp is also saved in the file.
%
clear all;

% Set RunBench=1 if benchmark must be done even when a .mat file exist
% If RunBench=0 then the benchmark is not executed if a .mat file exist -
% in this case the data in the .mat file is plotted and the plot is
% saved.
RunBench = 1;


%% FILE NAME EXTRACTION
S = regexp(mfilename, '_', 'split');
FCT   = char(S(1));     % Function to be benchmarked
GPU   = char(S(2));     % GPU type (9600MGT / Tesla / FX3800 / ...)
CPU   = char(S(3));     % CPU type (C2D / i7x975 / i7x820QM / ...)
OS    = char(S(4));     % Operating system (OSX / Ubuntu / W7 / ...)
ATP   = char(S(5));     % Array type (Matrix/Vector)
PRC   = char(S(6));     % Precision (Single Precision / Double Precision)
if ~strcmp(ATP,'M'),
    error('This benchmark file only applies to Matrix analysis');
end


%% LEGENDS FOR CPUS AND GPUS
L_CPU = CPU;   % Default name is the CPU variable in the filename
if strcmp(CPU,'C2D28'), L_CPU = 'Core 2 Duo (2.8GHz)'; end
if strcmp(CPU,'i7x975'), L_CPU = 'Core i7 975 (3.33GHz)'; end
if strcmp(CPU,'X5570'), L_CPU = 'Xeon X5570 (2.93GHz)'; end

L_GPU = GPU;   % Default name if the GPU variable in the filename
if strcmp(GPU,'9800GT'), L_GPU = 'GeForce 9800GT'; end
if strcmp(GPU,'9400M'), L_GPU = 'GeForce 9400M'; end
if strcmp(GPU,'9600MGT'), L_GPU = 'GeForce 9600M GT'; end
if strcmp(GPU,'FX3800'), L_GPU = 'Quadro FX-3800'; end
if strcmp(GPU,'Tesla'), L_GPU = 'C1060 Tesla'; end
if strcmp(GPU,'GTX260'), L_GPU = 'GeForce GTX260'; end


%% PERFORM BENCHMARK IF .MAT FILE DOES NOT EXIST OR IF BENCHMARK MUST RUN
Name = [FCT '_' GPU '_' CPU '_' OS '_' ATP '_' PRC];
if exist([Name '.mat'])~=2 || RunBench,
    % Array sizes analyzed - from small to large. Matrices are square.
    Size = [3,5,10,25,50,75,100:100:2500];

    % Set PRNG to ensure same starting state for reproducibility
    RandStream.setDefaultStream(RandStream('mt19937ar','seed',1004397));

    % Time stamp
    Tmp = clock;
    TimeStamp = ['DATE: ' num2str(Tmp(3),'%02i') '-' ...
        num2str(Tmp(2),'%02i') '-' num2str(Tmp(1)) ' ## ' ...
        ' TIME: ' num2str(Tmp(4),'%02i') '.' num2str(Tmp(5),'%02i')];

    % Benchmarking
    count = 0;
    for sz=Size
        fprintf('Problem size:   %4.0f x %4.0f [-]\n', sz, sz);
    
        % Calculate number of runs
        NoRuns = min(round(10*max(Size)^2/sz^2),200);
    
        % Set counter
        count = count + 1;
  
        % CPU based computations
        Ac1 = rand(sz,sz,'single');
        Ac2 = rand(sz,sz,'single');
        Bc = Ac1.*Ac2;
        tstart1 = tic;
        for no=1:NoRuns
            Bc = Ac1.*Ac2;
        end
        T_CPU(count) = toc(tstart1)/NoRuns;
 
        % GPU based computations
        Ag1 = gsingle(Ac1);
        Ag2 = gsingle(Ac2);
        Bg = Ag1.*Ag2;
        gforce(Ag1,Ag2,Bg); gforce;
        tstart2 = tic;
        for no=1:NoRuns
            Bg = Ag1.*Ag2;
            gforce(Bg);
        end
        T_GPU(count) = toc(tstart2)/NoRuns;
    end
    
    % Save data and figures
    SaveCmd = ['save ' Name '.mat Size T_CPU T_GPU TimeStamp' ...
        ' FCT GPU CPU OS ATP PRC L_CPU L_GPU'];
    eval(SaveCmd);
else
    eval(['load ' Name '.mat']);
end


%% PLOT RESULTS AND SAVE FIGURE
%==========================================================================
% Create figure and first plot
figure(1); clf(1);
subplot(2,1,1);
plot(Size, T_CPU*1E3, 'c-*', ...
    Size, T_GPU*1E3, 'm-x', 'LineWidth', 2);
grid;
    
% Make title
PRCL = 'Single Precision (SP)';
if strcmp(PRC,'DP'), PRCL = 'Double Precision (DP)'; end
F_TTL = [FCT ' - ' PRCL ', Matrix'];
title(F_TTL);
    
% Labels & Legends
xlabel('#Rows = #Columns   [-]');
ylabel('Execution time   [ms]');
legend(L_CPU, L_GPU, 'Location', 'NorthWest');
 
% Create second plot
subplot(2,1,2);
plot(Size, T_CPU ./ T_GPU, 'r-*', 'LineWidth', 2);
grid;
xlabel('#Rows = #Columns   [-]');
ylabel('Speed-up   [-]');

%  Save figures
jpgFname = [Name '.jpg'];
print( gcf, '-djpeg99', '-r100', jpgFname );