00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef __AF_MATRIX_H
00010 #define __AF_MATRIX_H
00011
00012 #include <cuComplex.h>
00013 #include "defines.h"
00014
00015
00016 #ifdef __cplusplus
00017 #define BASE_ONE =false
00018 #else
00019 #define BASE_ONE
00020 #endif
00021
00022 namespace af {
00023
00024 class dim4;
00025 class array;
00026
00029
00038 AFAPI array tile(const array &A, unsigned d0, unsigned d1=1, unsigned d2=1);
00039
00047 AFAPI array tile(const array &A, const dim4 &dims);
00048
00056 AFAPI array flat(const array &A);
00058
00061
00072 AFAPI void grid(array& x, array& y, const array& v1, const array& v2);
00074
00077
00087 AFAPI array shift(const array& in, int dim0=0, int dim1=0, int dim2=0, int dim3=0);
00088
00095 AFAPI array shift(const array& in, const array& shift);
00096
00105 AFAPI array reorder(const array& in, int dim0=-1, int dim1=-1, int dim2=-1, int dim3=-1);
00106
00107
00109
00110
00111
00114
00121 AFAPI array sort(const array& in, int dim=-1, bool isdescending=false);
00122
00131 AFAPI void sort(array& values, array& indices, const array& in, int dim=-1, bool isdescending=false);
00132
00141 AFAPI void sort(array& outkeys, array& outvalues, const array& keys, const array& values, bool isdescending=false);
00142
00143
00144
00150 AFAPI array sortdim(const array& in, int dim);
00151
00159 AFAPI array sortdim(const array& in, int dim, unsigned norder, const int* h_order);
00160
00161
00163
00164 }
00165
00166 #ifdef __cplusplus
00167 extern "C" {
00168 #endif
00169
00185
00186 AFAPI afError af_ctranspose_C(cuComplex *d_out,
00187 unsigned w, unsigned h,
00188 const cuComplex *d_in, unsigned batch);
00190 AFAPI afError af_ctranspose_Z(cuDoubleComplex *d_out,
00191 unsigned w, unsigned h,
00192 const cuDoubleComplex *d_in, unsigned batch);
00194
00200
00201 AFAPI afError af_transpose_B(bool *d_out,
00202 unsigned w, unsigned h,
00203 const bool *d_in,
00204 unsigned batch);
00206 AFAPI afError af_transpose_I(int *d_out,
00207 unsigned w, unsigned h,
00208 const int *d_in,
00209 unsigned batch);
00211 AFAPI afError af_transpose_U(unsigned *d_out,
00212 unsigned w, unsigned h,
00213 const unsigned *d_in,
00214 unsigned batch);
00216 AFAPI afError af_transpose_S(float *d_out,
00217 unsigned w, unsigned h,
00218 const float *d_in,
00219 unsigned batch);
00221 AFAPI afError af_transpose_D(double *d_out,
00222 unsigned w, unsigned h,
00223 const double *d_in,
00224 unsigned batch);
00226 AFAPI afError af_transpose_C(cuComplex *d_out,
00227 unsigned w, unsigned h,
00228 const cuComplex *d_in,
00229 unsigned batch);
00231 AFAPI afError af_transpose_Z(cuDoubleComplex *d_out,
00232 unsigned w, unsigned h,
00233 const cuDoubleComplex *d_in,
00234 unsigned batch);
00236
00238
00239
00256
00257 AFAPI afError af_tile_S(float *d_out,
00258 unsigned ndims, const unsigned *dims,
00259 const float * d_in,
00260 unsigned nreps, const unsigned *reps,
00261 unsigned batch);
00263 AFAPI afError af_tile_D(double *d_out,
00264 unsigned ndims, const unsigned *dims,
00265 const double * d_in,
00266 unsigned nreps, const unsigned *reps,
00267 unsigned batch);
00269 AFAPI afError af_tile_B(bool *d_out,
00270 unsigned ndims, const unsigned *dims,
00271 const bool * d_in,
00272 unsigned nreps, const unsigned *reps,
00273 unsigned batch);
00275 AFAPI afError af_tile_U(unsigned *d_out,
00276 unsigned ndims, const unsigned *dims,
00277 const unsigned * d_in,
00278 unsigned nreps, const unsigned *reps,
00279 unsigned batch);
00281 AFAPI afError af_tile_I(int *d_out,
00282 unsigned ndims, const unsigned *dims,
00283 const int * d_in,
00284 unsigned nreps, const unsigned *reps,
00285 unsigned batch);
00287 AFAPI afError af_tile_C(cuComplex *d_out,
00288 unsigned ndims, const unsigned *dims,
00289 const cuComplex * d_in,
00290 unsigned nreps, const unsigned *reps,
00291 unsigned batch);
00293 AFAPI afError af_tile_Z(cuDoubleComplex *d_out,
00294 unsigned ndims, const unsigned *dims,
00295 const cuDoubleComplex * d_in,
00296 unsigned nreps, const unsigned *reps,
00297 unsigned batch);
00298
00301
00302
00303
00320
00321 AFAPI afError af_shift_B(bool *d_out,
00322 unsigned ndims,const unsigned *dims,
00323 int nshifts, const int *h_shifts,
00324 const bool *d_in, unsigned batch);
00326 AFAPI afError af_shift_I(int *d_out,
00327 unsigned ndims,const unsigned *dims,
00328 int nshifts, const int *h_shifts,
00329 const int *d_in, unsigned batch);
00331 AFAPI afError af_shift_U(unsigned *d_out,
00332 unsigned ndims,const unsigned *dims,
00333 int nshifts, const int *h_shifts,
00334 const unsigned *d_in, unsigned batch);
00336 AFAPI afError af_shift_S(float *d_out,
00337 unsigned ndims,const unsigned *dims,
00338 int nshifts, const int *h_shifts,
00339 const float *d_in, unsigned batch);
00341 AFAPI afError af_shift_D(double *d_out,
00342 unsigned ndims,const unsigned *dims,
00343 int nshifts, const int *h_shifts,
00344 const double *d_in, unsigned batch);
00346 AFAPI afError af_shift_C(cuComplex *d_out,
00347 unsigned ndims,const unsigned *dims,
00348 int nshifts, const int *h_shifts,
00349 const cuComplex *d_in, unsigned batch);
00351 AFAPI afError af_shift_Z(cuDoubleComplex *d_out,
00352 unsigned ndims,const unsigned *dims,
00353 int nshifts, const int *h_shifts,
00354 const cuDoubleComplex *d_in, unsigned batch);
00355
00357
00372
00373 AFAPI afError af_shiftd_B(bool *d_out,
00374 unsigned ndims,const unsigned *dims,
00375 int nshifts, const int *d_shifts,
00376 const bool *d_in, unsigned batch,
00377 unsigned batch_shift);
00379 AFAPI afError af_shiftd_I(int *d_out,
00380 unsigned ndims,const unsigned *dims,
00381 int nshifts, const int *d_shifts,
00382 const int *d_in, unsigned batch,
00383 unsigned batch_shift);
00385 AFAPI afError af_shiftd_U(unsigned *d_out,
00386 unsigned ndims,const unsigned *dims,
00387 int nshifts, const int *d_shifts,
00388 const unsigned *d_in, unsigned batch,
00389 unsigned batch_shift);
00391 AFAPI afError af_shiftd_S(float *d_out,
00392 unsigned ndims,const unsigned *dims,
00393 int nshifts, const int *d_shifts,
00394 const float *d_in, unsigned batch,
00395 unsigned batch_shift);
00397 AFAPI afError af_shiftd_D(double *d_out,
00398 unsigned ndims,const unsigned *dims,
00399 int nshifts, const int *d_shifts,
00400 const double *d_in, unsigned batch,
00401 unsigned batch_shift);
00403 AFAPI afError af_shiftd_C(cuComplex *d_out,
00404 unsigned ndims,const unsigned *dims,
00405 int nshifts, const int *d_shifts,
00406 const cuComplex *d_in, unsigned batch,
00407 unsigned batch_shift);
00409 AFAPI afError af_shiftd_Z(cuDoubleComplex *d_out,
00410 unsigned ndims,const unsigned *dims,
00411 int nshifts, const int *d_shifts,
00412 const cuDoubleComplex *d_in, unsigned batch,
00413 unsigned batch_shift);
00416
00417
00434
00435 AFAPI afError af_reorder_B(bool *d_out,
00436 unsigned ndims, const unsigned *dims,
00437 int nperms, const int *perms,
00438 const bool *d_in, unsigned batch);
00440 AFAPI afError af_reorder_I(int *d_out,
00441 unsigned ndims, const unsigned *dims,
00442 int nperms, const int *perms,
00443 const int *d_in, unsigned batch);
00445 AFAPI afError af_reorder_U(unsigned *d_out,
00446 unsigned ndims, const unsigned *dims,
00447 int nperms, const int *perms,
00448 const unsigned *d_in, unsigned batch);
00450 AFAPI afError af_reorder_S(float *d_out,
00451 unsigned ndims, const unsigned *dims,
00452 int nperms, const int *perms,
00453 const float *d_in, unsigned batch);
00455 AFAPI afError af_reorder_D(double *d_out,
00456 unsigned ndims, const unsigned *dims,
00457 int nperms, const int *perms,
00458 const double *d_in, unsigned batch);
00460 AFAPI afError af_reorder_C(cuComplex *d_out,
00461 unsigned ndims, const unsigned *dims,
00462 int nperms, const int *perms,
00463 const cuComplex *d_in, unsigned batch);
00465 AFAPI afError af_reorder_Z(cuDoubleComplex *d_out,
00466 unsigned ndims, const unsigned *dims,
00467 int nperms, const int *perms,
00468 const cuDoubleComplex *d_in, unsigned batch);
00471
00472
00491
00492 AFAPI afError af_sort_S(float *d_sorted, float *d_Indices,
00493 unsigned ndims, const unsigned *dims,
00494 float *d_input, bool DIR,
00495 unsigned dim, unsigned batch,
00496 bool base_one BASE_ONE);
00498 AFAPI afError af_sort_D(double *d_sorted, double *d_Indices,
00499 unsigned ndims, const unsigned *dims,
00500 double *d_input, bool DIR,
00501 unsigned dim, unsigned batch,
00502 bool base_one BASE_ONE);
00504 AFAPI afError af_sort_I(int *d_sorted, int *d_Indices,
00505 unsigned ndims, const unsigned *dims,
00506 int *d_input, bool DIR,
00507 unsigned dim, unsigned batch,
00508 bool base_one BASE_ONE);
00510 AFAPI afError af_sort_U(unsigned *d_sorted, unsigned *d_Indices,
00511 unsigned ndims, const unsigned *dims,
00512 unsigned *d_input, bool DIR,
00513 unsigned dim, unsigned batch,
00514 bool base_one BASE_ONE);
00517
00518
00519
00535
00536 AFAPI afError af_issorted_S(unsigned char *res,
00537 unsigned rows, unsigned cols, const float *d_in,
00538 bool is_row, unsigned batch);
00540 AFAPI afError af_issorted_D(unsigned char *res,
00541 unsigned rows, unsigned cols, const double *d_in,
00542 bool is_row, unsigned batch);
00545
00546
00563
00564 AFAPI afError af_sortrows_S(float* d_Y, unsigned* d_I,
00565 unsigned rows, unsigned cols, const float* d_X,
00566 unsigned numel_C, const int* h_C);
00568 AFAPI afError af_sortrows_D(double* d_Y, unsigned* d_I,
00569 unsigned rows, unsigned cols, const double* d_X,
00570 unsigned numel_C, const int* h_C);
00573
00574
00575
00576 AFAPI afError af_sortByKeys_S(float *d_okeys, float *d_ovals,
00577 unsigned numel,
00578 float *d_ikeys, float *d_ivals,
00579 unsigned batch,
00580 bool DIR);
00581
00582
00583 AFAPI afError af_hankel_SSS(float *d_H, unsigned nc, float *d_C, unsigned nr, float *d_R);
00584 AFAPI afError af_hankel_CSC(cuComplex *d_H, unsigned nc, float *d_C, unsigned nr, cuComplex *d_R);
00585 AFAPI afError af_hankel_CCS(cuComplex *d_H, unsigned nc, cuComplex *d_C, unsigned nr, float *d_R);
00586 AFAPI afError af_hankel_CCC(cuComplex *d_H, unsigned nc, cuComplex *d_C, unsigned nr, cuComplex *d_R);
00587 AFAPI afError af_hankel_SSD(float *d_H, unsigned nc, float *d_C, unsigned nr, double *d_R);
00588 AFAPI afError af_hankel_CSZ(cuComplex *d_H, unsigned nc, float *d_C, unsigned nr, cuDoubleComplex *d_R);
00589 AFAPI afError af_hankel_CCD(cuComplex *d_H, unsigned nc, cuComplex *d_C, unsigned nr, double *d_R);
00590 AFAPI afError af_hankel_CCZ(cuComplex *d_H, unsigned nc, cuComplex *d_C, unsigned nr, cuDoubleComplex *d_R);
00591 AFAPI afError af_hankel_SDS(float *d_H, unsigned nc, double *d_C, unsigned nr, float *d_R);
00592 AFAPI afError af_hankel_CDC(cuComplex *d_H, unsigned nc, double *d_C, unsigned nr, cuComplex *d_R);
00593 AFAPI afError af_hankel_CZS(cuComplex *d_H, unsigned nc, cuDoubleComplex *d_C, unsigned nr, float *d_R);
00594 AFAPI afError af_hankel_CZC(cuComplex *d_H, unsigned nc, cuDoubleComplex *d_C, unsigned nr, cuComplex *d_R);
00595 AFAPI afError af_hankel_DSD(double *d_H, unsigned nc, float *d_C, unsigned nr, double *d_R);
00596 AFAPI afError af_hankel_ZSZ(cuDoubleComplex *d_H, unsigned nc, float *d_C, unsigned nr, cuDoubleComplex *d_R);
00597 AFAPI afError af_hankel_ZCD(cuDoubleComplex *d_H, unsigned nc, cuComplex *d_C, unsigned nr, double *d_R);
00598 AFAPI afError af_hankel_ZCZ(cuDoubleComplex *d_H, unsigned nc, cuComplex *d_C, unsigned nr, cuDoubleComplex *d_R);
00599 AFAPI afError af_hankel_DDS(double *d_H, unsigned nc, double *d_C, unsigned nr, float *d_R);
00600 AFAPI afError af_hankel_ZDC(cuDoubleComplex *d_H, unsigned nc, double *d_C, unsigned nr, cuComplex *d_R);
00601 AFAPI afError af_hankel_ZZS(cuDoubleComplex *d_H, unsigned nc, cuDoubleComplex *d_C, unsigned nr, float *d_R);
00602 AFAPI afError af_hankel_ZZC(cuDoubleComplex *d_H, unsigned nc, cuDoubleComplex *d_C, unsigned nr, cuComplex *d_R);
00603 AFAPI afError af_hankel_DDD(double *d_H, unsigned nc, double *d_C, unsigned nr, double *d_R);
00604 AFAPI afError af_hankel_ZDZ(cuDoubleComplex *d_H, unsigned nc, double *d_C, unsigned nr, cuDoubleComplex *d_R);
00605 AFAPI afError af_hankel_ZZD(cuDoubleComplex *d_H, unsigned nc, cuDoubleComplex *d_C, unsigned nr, double *d_R);
00606 AFAPI afError af_hankel_ZZZ(cuDoubleComplex *d_H, unsigned nc, cuDoubleComplex *d_C, unsigned nr, cuDoubleComplex *d_R);
00607
00608
00609 AFAPI afError af_toeplitz_SS(unsigned numelC,
00610 float *TPLZol,
00611 unsigned numelR,
00612 float *Row,
00613 float *TPLZ,
00614 unsigned NA, unsigned NB);
00615 AFAPI afError af_toeplitz_SC(unsigned numelC,
00616 float *TPLZol,
00617 unsigned numelR,
00618 cuComplex *Row,
00619 cuComplex *TPLZ,
00620 unsigned NA, unsigned NB);
00621 AFAPI afError af_toeplitz_CS(unsigned numelC,
00622 cuComplex *TPLZol,
00623 unsigned numelR,
00624 float *Row,
00625 cuComplex *TPLZ,
00626 unsigned NA, unsigned NB);
00627 AFAPI afError af_toeplitz_CC(unsigned numelC,
00628 cuComplex *TPLZol,
00629 unsigned numelR,
00630 cuComplex *Row,
00631 cuComplex *TPLZ,
00632 unsigned NA, unsigned NB);
00633 AFAPI afError af_toeplitz_SD(unsigned numelC,
00634 float *TPLZol,
00635 unsigned numelR,
00636 double *Row,
00637 float *TPLZ,
00638 unsigned NA, unsigned NB);
00639 AFAPI afError af_toeplitz_SZ(unsigned numelC,
00640 float *TPLZol,
00641 unsigned numelR,
00642 cuDoubleComplex *Row,
00643 cuComplex *TPLZ,
00644 unsigned NA, unsigned NB);
00645 AFAPI afError af_toeplitz_CD(unsigned numelC,
00646 cuComplex *TPLZol,
00647 unsigned numelR,
00648 double *Row,
00649 cuComplex *TPLZ,
00650 unsigned NA, unsigned NB);
00651 AFAPI afError af_toeplitz_CZ(unsigned numelC,
00652 cuComplex *TPLZol,
00653 unsigned numelR,
00654 cuDoubleComplex *Row,
00655 cuComplex *TPLZ,
00656 unsigned NA, unsigned NB);
00657 AFAPI afError af_toeplitz_DS(unsigned numelC,
00658 double *TPLZol,
00659 unsigned numelR,
00660 float *Row,
00661 float *TPLZ,
00662 unsigned NA, unsigned NB);
00663 AFAPI afError af_toeplitz_DC(unsigned numelC,
00664 double *TPLZol,
00665 unsigned numelR,
00666 cuComplex *Row,
00667 cuComplex *TPLZ,
00668 unsigned NA, unsigned NB);
00669 AFAPI afError af_toeplitz_ZS(unsigned numelC,
00670 cuDoubleComplex *TPLZol,
00671 unsigned numelR,
00672 float *Row,
00673 cuComplex *TPLZ,
00674 unsigned NA, unsigned NB);
00675 AFAPI afError af_toeplitz_ZC(unsigned numelC,
00676 cuDoubleComplex *TPLZol,
00677 unsigned numelR,
00678 cuComplex *Row,
00679 cuComplex *TPLZ,
00680 unsigned NA, unsigned NB);
00681 AFAPI afError af_toeplitz_DD(unsigned numelC,
00682 double *TPLZol,
00683 unsigned numelR,
00684 double *Row,
00685 double *TPLZ,
00686 unsigned NA, unsigned NB);
00687 AFAPI afError af_toeplitz_DZ(unsigned numelC,
00688 double *TPLZol,
00689 unsigned numelR,
00690 cuDoubleComplex *Row,
00691 cuDoubleComplex *TPLZ,
00692 unsigned NA, unsigned NB);
00693 AFAPI afError af_toeplitz_ZD(unsigned numelC,
00694 cuDoubleComplex *TPLZol,
00695 unsigned numelR,
00696 double *Row,
00697 cuDoubleComplex *TPLZ,
00698 unsigned NA, unsigned NB);
00699 AFAPI afError af_toeplitz_ZZ(unsigned numelC,
00700 cuDoubleComplex *TPLZol,
00701 unsigned numelR,
00702 cuDoubleComplex *Row,
00703 cuDoubleComplex *TPLZ,
00704 unsigned NA, unsigned NB);
00705
00706
00707 AFAPI afError af_triMat_S(unsigned m, unsigned n,
00708 float *d_A, float *d_T,
00709 int diag, bool direction,
00710 unsigned batch);
00711 AFAPI afError af_triMat_C(unsigned m, unsigned n,
00712 cuComplex *d_A, cuComplex *d_T,
00713 int diag, bool direction,
00714 unsigned batch);
00715 AFAPI afError af_triMat_D(unsigned m, unsigned n,
00716 double *d_A, double *d_T,
00717 int diag, bool direction,
00718 unsigned batch);
00719 AFAPI afError af_triMat_Z(unsigned m, unsigned n,
00720 cuDoubleComplex *d_A, cuDoubleComplex *d_T,
00721 int diag, bool direction,
00722 unsigned batch);
00723
00724
00725
00726
00727
00728
00729
00730
00731 AFAPI afError af_conj_inp_C(unsigned numel, cuComplex *vals);
00732 AFAPI afError af_conj_inp_Z(unsigned numel, cuDoubleComplex *vals);
00733
00734
00735 AFAPI afError af_norm_inp_C(unsigned numel, cuComplex *vals, float val);
00736 AFAPI afError af_norm_inp_Z(unsigned numel, cuDoubleComplex *vals, double val);
00737
00738
00739 AFAPI afError af_isTrig_B(bool *res,
00740 unsigned rows, unsigned columns,
00741 const bool *in, unsigned batch, bool dir);
00742 AFAPI afError af_isTrig_S(bool *res,
00743 unsigned rows, unsigned columns,
00744 const float *in, unsigned batch, bool dir);
00745 AFAPI afError af_isTrig_C(bool *res,
00746 unsigned rows, unsigned columns,
00747 const cuComplex *in, unsigned batch, bool dir);
00748 AFAPI afError af_isTrig_D(bool *res,
00749 unsigned rows, unsigned columns,
00750 const double *in, unsigned batch, bool dir);
00751 AFAPI afError af_isTrig_Z(bool *res,
00752 unsigned rows, unsigned columns,
00753 const cuDoubleComplex *in, unsigned batch, bool dir);
00754
00755
00756 AFAPI afError af_isSym_B(bool *res,
00757 unsigned width,
00758 const bool *in, unsigned batch);
00759 AFAPI afError af_isSym_S(bool *res,
00760 unsigned width,
00761 const float *in, unsigned batch);
00762 AFAPI afError af_isSym_C(bool *res,
00763 unsigned width,
00764 const cuComplex *in, unsigned batch);
00765 AFAPI afError af_isSym_D(bool *res,
00766 unsigned width,
00767 const double *in, unsigned batch);
00768 AFAPI afError af_isSym_Z(bool *res,
00769 unsigned width,
00770 const cuDoubleComplex *in, unsigned batch);
00771
00772
00773
00774 AFAPI afError af_isHerm_B(bool *res,
00775 unsigned width,
00776 const bool *in, unsigned batch);
00777 AFAPI afError af_isHerm_S(bool *res,
00778 unsigned width,
00779 const float *in, unsigned batch);
00780 AFAPI afError af_isHerm_C(bool *res,
00781 unsigned width,
00782 const cuComplex *in, unsigned batch);
00783 AFAPI afError af_isHerm_D(bool *res,
00784 unsigned width,
00785 const double *in, unsigned batch);
00786 AFAPI afError af_isHerm_Z(bool *res,
00787 unsigned width,
00788 const cuDoubleComplex *in, unsigned batch);
00789
00790
00791 AFAPI afError af_isneg_S(const float *d_in, unsigned n, bool *is_neg);
00792 AFAPI afError af_isneg_D(const double *d_in, unsigned n, bool *is_neg);
00793
00794 #ifdef __cplusplus
00795 }
00796 #endif
00797 #endif // __AF_MATRIX_H