/usr/include/libint2/util/intrinsic_operations.h is in libint2-dev 2.3.0~beta3-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | /*
* This file is a part of Libint.
* Copyright (C) 2004-2014 Edward F. Valeev
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License, version 2,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*
*/
#ifndef _libint2_include_libint2intrinsicoperations_h_
#define _libint2_include_libint2intrinsicoperations_h_
#include <libint2/config.h>
#ifdef __cplusplus
namespace libint2 {
//@{ Floating-point-Multiply-Add (FMA) instructions. Redefine these operations using native FMA instructions, if available (see, e.g. vector_x86.h)
#if defined(LIBINT_GENERATE_FMA)
# if defined(LIBINT_HAS_CXX11)
/// @return x*y+z
template <typename X, typename Y, typename Z>
inline auto fma_plus(X x, Y y, Z z) -> decltype(x*y+z) {
return x*y + z;
}
/// @return x*y-z
template <typename X, typename Y, typename Z>
inline auto fma_minus(X x, Y y, Z z) -> decltype(x*y-z) {
return x*y - z;
}
# else // LIBINT_HAS_CXX11
# error "support for FMA requires compiler capable of C++11 or later"
# endif // LIBINT_HAS_CXX11
#endif // LIBINT_GENERATE_FMA
//@}
};
/**
these macros define bzero, copy, and inc operations:
*/
/** X[i] = 0 */
#define _libint2_static_api_bzero_short_(X,nelem) for(int i=0; i < (nelem); ++i) { (X)[i] = 0.0; }
/** X[i] = Y[i] */
#define _libint2_static_api_copy_short_(X,Y,nelem) for(int i=0; i < (nelem); ++i) { (X)[i] = (Y)[i]; }
/** X[i] = a*Y[i] */
#define _libint2_static_api_scale_short_(X,Y,nelem,a) for(int i=0; i < (nelem); ++i) { (X)[i] = (a) * (Y)[i]; }
/** X[i] = a*Y[i] */
#define _libint2_static_api_scale_vec_short_(X,Y,nelem,a,vl) for(int i=0, iv=0; i < (nelem)/(vl); ++i) { for(int v=0; v < (vl); ++v, ++iv) { (X)[iv] = (a)[v] * (Y)[iv]; } }
/** X[i] += a*Y[i] */
#define _libint2_static_api_inc_short_(X,Y,nelem,a) for(int i=0; i < (nelem); ++i) { (X)[i] = libint2::fma_plus((a),(Y)[i],(X)[i]); }
/** X[i] += Y[i] */
#define _libint2_static_api_inc1_short_(X,Y,nelem) for(int i=0; i < (nelem); ++i) { (X)[i] += (Y)[i]; }
#endif
#endif
|