/usr/include/atlas/zmm.h is in libatlas-dev 3.10.2-9.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | #ifndef ZMM_H
#define ZMM_H
#define ATL_mmMULADD
#define ATL_mmLAT 5
#define ATL_mmMU 6
#define ATL_mmNU 1
#define ATL_mmKU 72
#define MB 48
#define NB 48
#define KB 48
#define NBNB 2304
#define MBNB 2304
#define MBKB 2304
#define NBKB 2304
#define NB2 96
#define NBNB2 4608
#define ATL_MulByNB(N_) ((N_) * 48)
#define ATL_DivByNB(N_) ((N_) / 48)
#define ATL_MulByNBNB(N_) ((N_) * 2304)
void ATL_zJIK48x48x48TN48x48x0_a1_b0(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
void ATL_zJIK48x48x48TN48x48x0_a1_b1(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
void ATL_zJIK48x48x48TN48x48x0_a1_bX(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
#define NBmm_b1(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
{ \
ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rnone, C_, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rone, (C_)+1, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
}
#define NBmm_b0(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
{ \
ATL_zJIK48x48x48TN48x48x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rzero, C_, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rzero, (C_)+1, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
}
#define NBmm_bX(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
{ \
ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, -(be_), C_, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, be_, (C_)+1, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
}
#define rNBmm_b1 ATL_dJIK48x48x48TN48x48x0_a1_b1
#define rNBmm_b0 ATL_dJIK48x48x48TN48x48x0_a1_b0
#define rNBmm_bX ATL_dJIK48x48x48TN48x48x0_a1_bX
#endif
|