Matrix multiply: Device pointer interface |
- Parameters:
-
| TRANSA | Must be 'N' indicating non-transposed |
| TRANSB | Must be 'N' indicating non-transposed. |
| M | Rows of A and C |
| N | Columns of B and C |
| K | Columns of A and rows of B |
| ALPHA | Must be 1.0 |
| [in] | d_A | Left matrix (A). |
| [in] | d_B | Right matrix (B). |
| [out] | d_C | Output matrix (C). |
| BETA | Must be 0.0 |
| batch_A | Number of tiles in matrix A |
| batch_B | Number of tiles in matrix A |
|
| afError | af_gemm_SS (char TRANSA, char TRANSB, int M, int N, int K, float ALPHA, const float *d_A, int LDA, const float *d_B, int LDB, float BETA, float *d_C, int LDC, unsigned batch_A=0, unsigned batch_B=0) |
| | Single precision, Real matrix multiply.
|
| afError | af_gemm_DD (char TRANSA, char TRANSB, int M, int N, int K, double ALPHA, const double *d_A, int LDA, const double *d_B, int LDB, double BETA, double *d_C, int LDC, unsigned batch_A=0, unsigned batch_B=0) |
| | Double precision, Real matrix multiply.
|
| afError | af_gemm_CC (char TRANSA, char TRANSB, int M, int N, int K, cuComplex ALPHA, const cuComplex *d_A, int LDA, const cuComplex *d_B, int LDB, cuComplex BETA, cuComplex *d_C, int LDC, unsigned batch_A=0, unsigned batch_B=0) |
| | Single precision, Complex matrix multiply.
|
| afError | af_gemm_ZZ (char TRANSA, char TRANSB, int M, int N, int K, cuDoubleComplex ALPHA, const cuDoubleComplex *d_A, int LDA, const cuDoubleComplex *d_B, int LDB, cuDoubleComplex BETA, cuDoubleComplex *d_C, int LDC, unsigned batch_A=0, unsigned batch_B=0) |
| | Double precision, Complex matrix multiply.
|
| afError | af_matmul_SS (const float *d_A, const float *d_B, float *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| | Mixed precision, mixed complexity matrix multiply.
|
| afError | af_matmul_DS (const double *d_A, const float *d_B, float *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_SD (const float *d_A, const double *d_B, float *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_DD (const double *d_A, const double *d_B, double *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_CS (const cuComplex *d_A, const float *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_ZS (const cuDoubleComplex *d_A, const float *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_CD (const cuComplex *d_A, const double *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_ZD (const cuDoubleComplex *d_A, const double *d_B, cuDoubleComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_SC (const float *d_A, const cuComplex *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_DC (const double *d_A, const cuComplex *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_SZ (const float *d_A, const cuDoubleComplex *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_DZ (const double *d_A, const cuDoubleComplex *d_B, cuDoubleComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_CC (const cuComplex *d_A, const cuComplex *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_ZC (const cuDoubleComplex *d_A, const cuComplex *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_CZ (const cuComplex *d_A, const cuDoubleComplex *d_B, cuComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |
| afError | af_matmul_ZZ (const cuDoubleComplex *d_A, const cuDoubleComplex *d_B, cuDoubleComplex *d_C, int M, int N, int K, unsigned batch_A=0, unsigned batch_B=0) |