#include <stdio.h>
#include <cuda.h>
#include <arrayfire.h>
#include <string.h>
using namespace af;
struct pml_t {
int npml_x, npml_z;
int pml_vec[4];
};
struct medium_t {
float Mstart, epsilon_r, chi_1, tau_0, sigma, dt;
bool back_bound;
array gbc;
};
typedef enum { P_GAUSS=1, P_SINE=2 } pchoice_t;
typedef enum { P_ADDITIVE=1, P_HARD=0 } ptype_t;
struct pulse_t {
int Az, Aw, Ac;
seq ind;
pchoice_t choice;
ptype_t type;
float stdev, cut_off, t0, dt, freq_in, amplitude;
};
static float *cpu_zeros(int n)
{
float *vals = (float *)malloc(n*sizeof(*vals));
memset(vals, 0, n*sizeof(*vals));
return vals;
}
static float *cpu_ones(int n)
{
float *vals = (float *)malloc(n*sizeof(*vals));
for (int i = 0; i < n; ++i)
vals[i] = 1;
return vals;
}
static void get_pulse(int Nx, float dt, struct pulse_t &pulse)
{
if (pulse.choice == P_GAUSS) {
pulse.t0 = 20;
pulse.stdev = 6;
pulse.cut_off = 2 * pulse.t0;
} else if (pulse.choice == P_SINE) {
float freq_in_MHz = 3000;
pulse.freq_in = 1e6f*freq_in_MHz;
float cut_off = 3;
pulse.cut_off = cut_off/(pulse.freq_in*dt);
if (ceil(cut_off) != cut_off)
fprintf(stderr, __FILE__":%d: nonzero sine wave cuttoff\n", __LINE__);
pulse.dt = dt;
} else{
fprintf(stderr, __FILE__":%d: undefined wave type: %d\n", __LINE__, pulse.choice);
exit(EXIT_FAILURE);
}
pulse.amplitude = 1;
pulse.type = P_HARD;
int Az = pulse.Az, Aw = pulse.Aw, Ac = pulse.Ac;
pulse.ind = Nx*Az + Ac - 1 + seq(-Aw/2, Aw/2);
}
static void medium_params_debye(array &ga, array &gb, array &gbc, int Nx, int Nz, float dt, struct medium_t &m)
{
seq dielectric(m.Mstart-1, Nz-m.Mstart+1);
array epsilon_r_vec = ones(Nx,Nz);
epsilon_r_vec(span, dielectric) = m.epsilon_r;
array sigma_vec = zeros(Nx,Nz);
sigma_vec(span, dielectric) = m.sigma;
if (m.back_bound)
sigma_vec(span,end) = 1e6;
float epsilon_0 = 8.8e-12f;
gb = sigma_vec * dt / epsilon_0;
gbc = zeros(Nx,Nz);
gbc(span, dielectric) = m.chi_1 * dt / m.tau_0;
ga = 1/(epsilon_r_vec + gb + gbc);
ga(0, span) = 0;
ga(end, span) = 0;
ga(span, 0 ) = 0;
ga(span, end) = 0;
m.gbc = gbc;
m.dt = dt;
}
static void get_pmlr(array &_gi1, array &_gi2, array &_gi3,
array &_gj1, array &_gj2, array &_gj3,
array &_fi2, array &_fi3,
array &_fj2, array &_fj3,
int Nz, int Nx, pml_t p)
{
float *gi1 = cpu_zeros(Nx), *gi2 = cpu_ones(Nx), *gi3 = cpu_ones(Nx);
float *gj1 = cpu_zeros(Nz), *gj2 = cpu_ones(Nz), *gj3 = cpu_ones(Nz);
float *fi2 = cpu_ones(Nx), *fi3 = cpu_ones(Nx);
float *fj2 = cpu_ones(Nz), *fj3 = cpu_ones(Nz);
if (p.npml_x != 0) {
for (int i = 0; i <= p.npml_x; ++i) {
float xnum = (float)p.npml_x - i;
float xxn = xnum/p.npml_x;
float xn = .333f*pow(xxn,3);
if (p.pml_vec[1] == 1) {
gi1[i] = xn;
gi2[i] = 1/(1+xn);
gi3[i] = (1-xn)/(1+xn);
}
if (p.pml_vec[0] == 1) {
gi1[Nx-i-1] = xn;
gi2[Nx-i-1] = 1/(1+xn);
gi3[Nx-i-1] = (1-xn)/(1+xn);
}
xxn = (xnum - 0.5f)/p.npml_x;
xn = .333f*pow(xxn,3);
if (p.pml_vec[1] == 1) {
fi2[i] = 1/(1+xn);
fi3[i] = (1-xn)/(1+xn);
}
if (p.pml_vec[0] == 1) {
fi2[Nx-i-2] = 1/(1+xn);
fi3[Nx-i-2] = (1-xn)/(1+xn);
}
}
}
if (p.npml_z != 0) {
for (int j = 0; j <= p.npml_z; ++j) {
float xnum = (float)p.npml_z - j;
float xxn = xnum/p.npml_z;
float xn = .333f*pow(xxn,3);
if (p.pml_vec[3] == 1) {
gj1[j] = xn;
gj2[j] = 1/(1+xn);
gj3[j] = (1-xn)/(1+xn);
}
if (p.pml_vec[2] == 1) {
gj1[Nz-j-1] = xn;
gj2[Nz-j-1] = 1/(1+xn);
gj3[Nz-j-1] = (1-xn)/(1+xn);
}
xxn = (xnum - 0.5f)/p.npml_z;
xn = .333f*pow(xxn,3);
if (p.pml_vec[3] == 1) {
fj2[j] = 1/(1+xn);
fj3[j] = (1-xn)/(1+xn);
}
if (p.pml_vec[2] == 1) {
fj2[Nz-j-2] = 1/(1+xn);
fj3[Nz-j-2] = (1-xn)/(1+xn);
}
}
}
_gi1 = tile(array(Nx, gi1), 1, Nz); free(gi1);
_gi2 = tile(array(Nx, gi2), 1, Nz); free(gi2);
_gi3 = tile(array(Nx, gi3), 1, Nz); free(gi3);
_gj1 = tile(array(Nz, gj1), 1, Nx).T(); free(gj1);
_gj2 = tile(array(Nz, gj2), 1, Nx).T(); free(gj2);
_gj3 = tile(array(Nz, gj3), 1, Nx).T(); free(gj3);
_fi2 = tile(array(Nx, fi2), 1, Nz); free(fi2);
_fi3 = tile(array(Nx, fi3), 1, Nz); free(fi3);
_fj2 = tile(array(Nz, fj2), 1, Nx).T(); free(fj2);
_fj3 = tile(array(Nz, fj3), 1, Nx).T(); free(fj3);
}
static void compute_pulse(array &d, float T, struct pulse_t ¶ms)
{
float cut_off = params.cut_off;
bool is_hard = (params.type == P_HARD);
seq ind = params.ind;
if (params.choice == P_GAUSS && T < cut_off) {
float pulse = params.amplitude * exp(-0.5f * pow((params.t0-T)/params.stdev,2));
if (is_hard) d(ind) = pulse;
else d(ind) += pulse;
}
else if (params.choice == P_SINE && T < cut_off) {
float pulse = params.amplitude * sin(2 * 3.14159f * params.freq_in * params.dt * T);
if (is_hard) d(ind) = pulse;
else d(ind) += pulse;
}
else {
if (is_hard) d(ind) = 0;
}
}
static void update_s_debye_TM(array &py, const array &ey, const struct medium_t &medium)
{
float dt = medium.dt;
float tau_0 = medium.tau_0;
array gbc = medium.gbc;
py = ((1 - 0.5f * dt / tau_0) * py + mul(gbc, ey)) / (1 + 0.5f * dt / tau_0);
}
void fdtd(bool console)
{
double time_total = 30;
float Lz = .4f;
int Nz = 160;
int Nx = 400;
float ddx = Lz/Nz;
float dt = ddx/6e8f;
struct pml_t pml;
pml.npml_x = 8;
pml.npml_z = 20;
pml.pml_vec[0] = 1;
pml.pml_vec[1] = 1;
pml.pml_vec[2] = 0;
pml.pml_vec[3] = 1;
Nx += (pml.pml_vec[0] + pml.pml_vec[1])*pml.npml_x;
Nz += (pml.pml_vec[2] + pml.pml_vec[3])*pml.npml_z;
struct medium_t medium;
float Ml = .3f;
medium.Mstart = ceil((Lz - Ml)/ddx) + pml.npml_z;
medium.epsilon_r = 5;
medium.chi_1 = 30;
medium.tau_0 = 1e-11f;
medium.sigma = .01f;
medium.back_bound = false;
struct pulse_t pulse;
int Az = 10;
pulse.Az = pml.npml_z + (Az - 1);
pulse.Aw = 8;
pulse.Ac = Nx/2;
pulse.choice = P_SINE;
array gi1, gi2, gi3;
array gj1, gj2, gj3;
array fi2, fi3, fj2, fj3;
get_pmlr(gi1,gi2,gi3, gj1,gj2,gj3, fi2,fi3, fj2,fj3, Nz,Nx,pml);
array ga, gb, gbc;
medium_params_debye(ga, gb, gbc, Nx, Nz, dt, medium);
get_pulse(Nx, dt, pulse);
array ZERO = zeros(Nx,Nz);
array hx = ZERO, hz = ZERO;
array ey = ZERO, sy = ZERO, iy = ZERO;
array ihx = ZERO, ihz = ZERO;
array dy = ZERO, dy_hat = ZERO;
array dyhz = ZERO, dxhx = ZERO;
array cey = ZERO, cez = ZERO;
printf("Finite-difference time-domain simulation of electromagnetic field\n");
timer time_start, time_last;
time_start = time_last = timer::tic();
int iter = 0, iter_last = 0;
double max_rate = 0;
while (true) {
dyhz(seq(1,end),span) = diff1(hz);
dxhx(span, seq(1,end)) = diff1(hx, 1);
array dy_hat_temp = mul(gi3, dy_hat) + mul(mul(0.5f, gi2), (dyhz - dxhx));
dy = mul(gj3, dy) + mul(gj2,(dy_hat_temp - dy_hat));
dy_hat = dy_hat_temp;
compute_pulse(dy, (float)iter++, pulse);
array ey = mul(ga, (dy - iy - sy));
ey(0, span) = 0;
ey(end, span) = 0;
ey(span, 0 ) = 0;
ey(span, end) = 0;
iy = iy + mul(gb, ey);
update_s_debye_TM(sy,ey,medium);
cez(seq(0,end-1), span) = diff1(ey);
cey(span, seq(0,end-1)) = diff1(-ey, 1);
ihz += mul(gj1, cez);
hz = mul(fi3 , hz) + mul(fi2 , (0.5f * cez + ihz));
ihx += mul(gi1, cey);
hx = mul(fj3 , hx) + mul(fj2 , (0.5f * cey + ihx));
ey = abs(ey);
if (!console) {
subfigure(2,2,2); imgplot(ey);
subfigure(2,2,4); plot(ey);
subfigure(2,1,1); plot3d(ey);
}
double elapsed = timer::toc(time_last);
if (elapsed > 1) {
double rate = (iter - iter_last) / elapsed;
double total_elapsed = timer::toc(time_start);
time_last = timer::tic();
iter_last = iter;
if (rate > max_rate)
max_rate = rate;
if (total_elapsed >= time_total)
break;
if (!console)
printf(" iterations per second: %.0f (progress %.0f%%)\n",
rate, 100.0f * total_elapsed / time_total);
}
}
if (console)
printf(" ### fdtd_example: %f Iterations per second\n", max_rate);
}
int main(int argc, char* argv[])
{
bool console = false;
if ( argc > 2 || (argc == 2 && strcmp(argv[1],"-")) ){
printf("Usage: fdtd [-]\n");
return -1;
} else if (argc == 2 && !strcmp(argv[1],"-")) {
console = true;
}
try {
fdtd(console);
} catch (af::exception& e) {
fprintf(stderr, "%s\n", e.what());
}
#ifdef WIN32 // pause in Windows
if (!console) {
printf("hit [enter]...");
getchar();
}
#endif
return 0;
}