Do more. Code less. Free software for GPU computing.
<scroll to top>

examples/pi/pi_cuda.cu

/*-----------------------------------------------------------------------*/

/*   Product:   ARRAYFIRE                                                */
/*   Example:   pi_example                                               */

/*   Copyright (c) AccelerEyes LLC. All rights reserved.                 */
/*   See http://www.accelereyes.com/eula for details.                    */

/*   This software is distributed WITHOUT ANY WARRANTY; without even     */
/*   the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR */
/*   PURPOSE.  See the above copyright notices for more information.     */

/*-----------------------------------------------------------------------*/

// Monte carlo estimation of pi
#include <stdio.h>
#include <cuda.h>
#include <arrayfire.h>

#define divup(x,y) (x%y) ? ((x+y-1)/y) : (x/y)

#define CUDA(call) do {                                         \
        cudaError_t _e = (call);                                \
        if (_e == cudaSuccess) break;                           \
        fprintf(stderr, __FILE__":%d: cuda error: %s (%d)\n",   \
                __LINE__, cudaGetErrorString(_e), _e);          \
        exit(-1);                                               \
    } while (0)

#define AF(call) do {                                               \
        afError _e = (call);                                        \
        if (_e == AF_SUCCESS) break;                                \
        fprintf(stderr, __FILE__":%d: arrayfire error: %s (%d)\n",  \
                __LINE__, af_errstr(), _e);                         \
        exit(-1);                                                   \
    } while (0)

__global__
void test_inside(int n, float *d_inside, float *d_x, float *d_y)
{
    unsigned bid = blockIdx.y * gridDim.x + blockIdx.x;
    int idx = bid * blockDim.x + threadIdx.x;
    if (idx >= n) return;

    float x = d_x[idx];
    float y = d_y[idx];
    d_inside[idx] = (sqrt(x*x + y*y) <= 1);
}

int main(int argc, char *argv[])
{
        int n = 2e6;
    printf("using %d samples\n", n);

    // random scattering of (x,y) points
    float *d_x = NULL, *d_y = NULL;
    unsigned bytes = sizeof(float) * n;
    CUDA(cudaMalloc(&d_x, bytes));
    CUDA(cudaMalloc(&d_y, bytes));
    AF(af_randu_S(d_x,  n));
    AF(af_randu_S(d_y,  n));

    // test if inside unit circle
    float *d_inside;
    CUDA(cudaMalloc((void **)&d_inside, n * sizeof(float)));

    dim3 threads(256);
    dim3 blocks(divup(n, threads.x));
    if (blocks.x > 65535) {
        blocks.y = divup(blocks.x, 65535);
        blocks.x = divup(blocks.x, blocks.y);
    }

    test_inside<<<blocks, threads>>>(n, d_inside, d_x, d_y);

    #ifdef DEBUG // For debugging purposes
    // Check for CUDA runtime errors
    CUDA(cudaDeviceSynchronize());
    CUDA(cudaGetLastError());
    #endif

    // count how many fell inside
    float h_result;
    AF(af_sum_vector_S(&h_result, n, d_inside));

    // approximate PI
    float pi = 4.0f * h_result / n;
    printf("pi %f\n", pi);

    CUDA(cudaFree(d_x));
    CUDA(cudaFree(d_y));
    CUDA(cudaFree(d_inside));

    #ifdef WIN32 // pause in Windows
    if (!(argc == 2 && argv[1][0] == '-')) {
        printf("hit [enter]...");
        getchar();
    }
    #endif
    return 0;
}
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines