function [ Speedup ] = det_MD( N )
%% INITIALIZATION
% Minimum execution time for the individual benchmark point
Tmin = 5;

% Max. number of repetitions in loop timing estimation. The max.
% number that can be handled in loops is MaxAvg = 2147483647.
MaxAvg = 1E9;

% Set PRNG to ensure same starting state for reproducibility
RandStream.setDefaultStream(RandStream('mt19937ar','seed',1004397));

% Define arrays
Ac = randn(N,N,'double');   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%

% Print current matrix size
fprintf('%4.0f x %4.0f   ', N, N);



%% CPU BENCHMARK
% CPU test begin --------------------------------------------------
whilecount = 0;
Telap_cpu = -1;
while Telap_cpu < Tmin
    whilecount = whilecount + 1;
    if Telap_cpu == -1
        t1 = tic;
        Rc = det(Ac);         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%
        Rc = det(Ac);         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%
        Telap_cpu = toc(t1)/2;
        NoRunsCPU = ceil(1.5*Tmin/Telap_cpu);
    else
        NoRunsCPU = ceil(1.5*whilecount*NoRunsCPU/Telap_cpu*Tmin);
    end
        
    % Warm-up
    for no=1:NoRunsCPU
        Rc = det(Ac);         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%
    end

    % Benchmark
    tstart1 = tic;
    for no=1:NoRunsCPU
        Rc = det(Ac);         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%
    end
    Telap_cpu = toc(tstart1);
end
   
% Determine time for CPU loop alone
RPT = min(5E3,ceil(MaxAvg/NoRunsCPU));
tstart = tic;
for AvgNo=1:RPT
    for no=1:NoRunsCPU
    end
end
T_CPU_Loop = toc(tstart)/RPT;

% Compute CPU times
T_CPU = max((Telap_cpu-T_CPU_Loop)/NoRunsCPU,2.5E-10);
T_CPU_tot = Telap_cpu;
fprintf('  |  T_CPU: %6.1f,', T_CPU_tot);
% CPU test end   --------------------------------------------------
       


%% GPU BENCHMARK
% GPU test begin --------------------------------------------------
Ag = gdouble(Ac);
    
whilecount = 0;
Telap_gpu = -1;
while Telap_gpu < Tmin
    whilecount = whilecount + 1;
    if Telap_gpu == -1
        gsync;
        t1 = tic;
        Rg = det(Ag);         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%
        geval(Rg);
        Rg = det(Ag);         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%
        geval(Rg);
        gsync;
        Telap_gpu = toc(t1)/2;
        NoRunsGPU = ceil(1.5*Tmin/Telap_gpu);
    else
        NoRunsGPU = ceil(1.5*whilecount*NoRunsGPU/Telap_gpu*Tmin);
    end
        
    % Warm-up
    gsync;
    for no=1:NoRunsGPU
        Rg = det(Ag);         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%
        geval(Rg);
    end
        
    % Benchmark
    gsync;
    tstart1 = tic;
    for no=1:NoRunsGPU
        Rg = det(Ag);         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% HERE %%%
        geval(Rg);
    end
    gsync;
    Telap_gpu = toc(tstart1);
end
        
% Determine time for GPU loop alone
RPT = min(5E3,ceil(MaxAvg/NoRunsGPU));
tstart = tic;
for AvgNo=1:RPT
    for no=1:NoRunsGPU
    end
end
T_GPU_Loop = toc(tstart)/RPT;

% Compute GPU times
T_GPU = max((Telap_gpu-T_GPU_Loop)/NoRunsGPU,2.5E-10);
T_GPU_tot = Telap_gpu;
fprintf('   T_GPU: %6.1f', T_GPU_tot);
    
% Speed-up
Speedup = T_CPU/T_GPU;
fprintf('   |   Speed-up (det-MD):       %10.4f', Speedup);
    
% Memory
gpu_info = gpu_entry(13);
Mem_MB = gpu_info.gpu_free/1E6;
clear gpu_hook;
fprintf('   |   Mem free [MB]:  %6.1f\n', Mem_MB);
% GPU test end   --------------------------------------------------

end