/usr/include/superlu-dist/psymbfact.h is in libsuperlu-dist-dev 5.3.0+dfsg1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | /*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from U.S. Dept. of Energy)
All rights reserved.
The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
/*! @file
* \brief Definitions for parallel symbolic factorization routine
*
* <pre>
* -- Distributed SuperLU routine (version 2.0) --
* Lawrence Berkeley National Lab, Univ. of California Berkeley.
* March 15, 2003
* </pre>
*/
#ifndef __SUPERLU_DIST_PSYMBFACT /* allow multiple inclusions */
#define __SUPERLU_DIST_PSYMBFACT
/*
* File name: psymbfact.h
* Purpose: Definitions for parallel symbolic factorization routine
*/
/*! \brief
*
* <pre>
*-- Structure returned by the symbolic factorization routine
*
* Memory is allocated during parallel symbolic factorization
* symbfact_dist, and freed after dist_symbLU routine.
*
* (xlsub,lsub): lsub[*] contains the compressed subscript of
* rectangular supernodes; xlsub[j] points to the starting
* location of the j-th column in lsub[*]. Note that xlsub
* is indexed by column.
* Storage: row subscripts
*
* (xusub,usub): lsub[*] contains the compressed subscript of
* rectangular supernodes; xusub[j] points to the starting
* location of the j-th row in usub[*]. Note that xusub
* is indexed by rows.
* Storage: column subscripts
*
* (xsup_beg_loc,xsup_end_loc, supno_loc) describes mapping between
* supernode and column, information local to each processor:
* xsup_beg_loc[s] is the leading column of the local s-th supernode.
* xsup_end_loc[s] is the last column of the local s-th supernode.
* supno[i] is the supernode no to which column i belongs;
* </pre>
*/
typedef struct {
int_t *xlsub; /* pointer to the beginning of each column of L */
int_t *lsub; /* compressed L subscripts, stored by columns */
int_t szLsub; /* current max size of lsub */
int_t *xusub; /* pointer to the beginning of each row of U */
int_t *usub; /* compressed U subscripts, stored by rows */
int_t szUsub; /* current max size of usub */
int_t *supno_loc;
int_t *xsup_beg_loc;
int_t *xsup_end_loc;
int_t nvtcs_loc; /* number of local vertices */
int_t *globToLoc; /* global to local indexing */
int_t maxNvtcsPProc; /* max number of vertices on the processors */
} Pslu_freeable_t;
/*! \brief
*
* <pre>
*-- The structures are determined by symbfact_dist and not used thereafter.
*
* (xlsub,lsub): lsub[*] contains the compressed subscript of L, as described above
* for Pslu_freeable_t. This structure is used internally in symbfact_dist.
* (xusub,usub): usub[*] contains the compressed subscript of U, as described above
* for Pslu_freeable_t. This structure is used internally in symbfact_dist.
*
* (xlsubPr,lsubPr): contains the pruned structure of the graph of
* L, stored by rows as a linked list.
* xlsubPr[j] points to the starting location of the j-th
* row in lsub[*].
* Storage: original row subscripts.
* It contains the structure corresponding to one node in the sep_tree.
* In each independent domain formed by x vertices, xlsubPr is of size x.
* Allocated and freed during domain_symbolic.
* For the other nodes in the level tree, formed by a maximum of
* maxNvtcsNds_loc, xlsubPr is of size maxNvtcsNds_loc.
* Allocated after domain_symbolic, freed at the end of symbolic_dist
* routine.
* (xusubPr,usubPr): contains the pruned structure of the graph of
* U, stored by columns as a linked list. Similar to (xlsubPr,lsubPr),
* except that it is column oriented.
*
* This is allocated during symbolic factorization symbfact_dist.
* </pre>
*/
typedef struct {
int_t *xlsubPr; /* pointer to pruned structure of L */
int_t *lsubPr; /* pruned structure of L */
int_t szLsubPr; /* size of lsubPr array */
int_t indLsubPr; /* current index in lsubPr */
int_t *xusubPr; /* pointer to pruned structure of U */
int_t *usubPr; /* pruned structure of U */
int_t szUsubPr; /* size of usubPr array */
int_t indUsubPr; /* current index in usubPr */
int_t *xlsub_rcvd;
int_t *xlsub; /* pointer to structure of L, stored by columns */
int_t *lsub; /* structure of L, stored by columns */
int_t szLsub; /* current max size of lsub */
int_t nextl; /* pointer to current computation in lsub */
int_t *xusub_rcvd; /* */
int_t *xusub; /* pointer to structure of U, stored by rows */
int_t *usub; /* structure of U, stored by rows */
int_t szUsub; /* current max size of usub */
int_t nextu; /* pointer to current computation in usub */
int_t *cntelt_vtcs; /* size of column/row for each vertex */
int_t *cntelt_vtcsA_lvl; /* size of column/row of A for each vertex at the
current level */
LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
int_t no_expand; /* Number of memory expansions */
int_t no_expand_pr; /* Number of memory expansions of the pruned structures */
int_t no_expcp; /* Number of memory expansions due to the right looking
overestimation approach */
} Llu_symbfact_t;
/*! \brief Local information on vertices distribution */
typedef struct {
int_t maxSzBlk; /* Max no of vertices in a block */
int_t maxNvtcsNds_loc; /* Max number of vertices of a node distributed on one
processor. The maximum is computed among all the nodes
of the sep arator tree and among all the processors */
int_t maxNeltsVtx; /* Max number of elements of a vertex,
that is condisering that the matrix is
dense */
int_t nblks_loc; /* Number of local blocks */
int_t *begEndBlks_loc; /* Begin and end vertex of each local block.
Array of size 2 * nblks_loc */
int_t curblk_loc; /* Index of current block in the level under computation */
int_t nvtcs_loc; /* Number of local vertices distributed on a processor */
int_t nvtcsLvl_loc; /* Number of local vertices for current
level under computation */
int filledSep; /* determines if curent or all separators are filled */
int_t nnz_asup_loc; /* Number of nonzeros in asup not yet consumed. Used during
symbolic factorization routine to determine how much
of xusub, usub is still used to store the input matrix AS */
int_t nnz_ainf_loc; /* Number of nonzeros in ainf. Similar to nnz_asup_loc. */
int_t xusub_nextLvl; /* Pointer to usub of the next level */
int_t xlsub_nextLvl; /* Pointer to lsub of the next level */
int_t fstVtx_nextLvl; /* First vertex of the next level */
} vtcsInfo_symbfact_t;
/*! \brief Structure used for redistributing A for the symbolic factorization algorithm */
typedef struct {
int_t *x_ainf; /* pointers to columns of Ainf */
int_t *ind_ainf; /* column indices of Ainf */
int_t *x_asup; /* pointers to rows of Asup */
int_t *ind_asup; /* row indices of Asup */
} matrix_symbfact_t;
typedef struct {
int_t *rcv_interLvl; /* from which processors iam receives data */
int_t *snd_interLvl; /* to which processors iam sends data */
int_t *snd_interSz; /* size of data to be send */
int_t *snd_LinterSz; /* size of data in L part to be send */
int_t *snd_vtxinter; /* first vertex from where to send data */
/* inter level data structures */
int_t *snd_intraLvl; /* to which processors iam sends data */
int_t snd_intraSz; /* size of data to send */
int_t snd_LintraSz; /* size of data to send */
int_t *rcv_intraLvl; /* from which processors iam receives data */
int_t *rcv_buf; /* buffer to receive data */
int_t rcv_bufSz; /* size of the buffer to receive data */
int_t *snd_buf; /* buffer to send data */
int_t snd_bufSz; /* size of the buffer to send data */
int_t *ptr_rcvBuf; /* pointer to rcv_buf, the buffer to receive data */
} comm_symbfact_t;
/* relaxation parameters used in the algorithms - for future release */
/*! \brief statistics collected during parallel symbolic factorization */
typedef struct {
int_t fill_par; /* Estimation of fill. It corresponds to sp_ienv_dist(6) */
float relax_seps; /* relaxation parameter -not used in this version */
float relax_curSep; /* relaxation parameter -not used in this version */
float relax_gen; /* relaxation parameter -not used in this version */
/* number of operations performed during parallel symbolic factorization */
float nops;
/* no of dense current separators per proc */
int_t nDnsCurSep;
/* no of dense separators up per proc */
int_t nDnsUpSeps;
float no_shmSnd; /* Number of auxiliary messages for send data */
float no_msgsSnd; /* Number of messages sending data */
int_t maxsz_msgSnd; /* Max size of messages sending data */
float sz_msgsSnd; /* Average size of messages sending data */
float no_shmRcvd; /* Number of auxiliary messages for rcvd data */
float no_msgsRcvd; /* Number of messages receiving data */
int_t maxsz_msgRcvd;/* Max size of messages receiving data */
float sz_msgsRcvd; /* Average size of messages receiving data */
float no_msgsCol; /* Number of messages sent for estimating size
of rows/columns, setup information
interLvl_symbfact, */
int_t maxsz_msgCol; /* Average size of messages counted in
no_msgsCol */
float sz_msgsCol; /* Max size of messages counted in no_msgsCol */
/* statistics on fill-in */
float fill_pelt[6];
/*
0 - average fill per elt added during right-looking factorization
1 - max fill per elt added during right-looking factorization
2 - number vertices modified during right-looking factorization
3 - average fill per elt
4 - max fill per elt
5 - number vertices computed in upper levels of separator tree
*/
/* Memory usage */
int_t estimLSz; /* size of lsub due to right looking overestimation */
int_t estimUSz; /* size of usub due to right looking overestimation */
int_t maxSzLPr; /* maximum size of pruned L */
int_t maxSzUPr; /* maximum size of pruned U */
int_t maxSzBuf; /* maximum size of the send and receive buffers */
int_t szDnsSep; /* size of memory used when there are dense separators */
float allocMem; /* size of the total memory allocated (in bytes) */
} psymbfact_stat_t;
/* MACROS */
/*
Macros for comptuting the owner of a vertex and the local index
corresponding to a vertex
*/
#define OWNER(x) ((x) / maxNvtcsPProc)
#define LOCAL_IND(x) ((x) % maxNvtcsPProc)
/* Macros for computing the available memory in lsub, usub */
#define MEM_LSUB(Llu, VInfo) (Llu->szLsub - VInfo->nnz_ainf_loc)
#define MEM_USUB(Llu, VInfo) (Llu->szUsub - VInfo->nnz_asup_loc)
#define tag_interLvl 2
#define tag_interLvl_LData 0
#define tag_interLvl_UData 1
#define tag_intraLvl_szMsg 1000
#define tag_intraLvl_LData 1001
#define tag_intraLvl_UData 1002
/* tag_intraLvl has to be the last tag number */
#define tag_intraLvl 1003
/*
* Index of diagonal element, no of elements preceding each column/row
* of L/U send to another processor
*/
#define DIAG_IND 0
#define NELTS_IND 1
#define RCVD_IND 2
#define SUCCES_RET 0 /* successful return from a routine */
#define ERROR_RET 1 /* error return code from a routine */
#define FILLED_SEP 2 /* the current separator is dense */
#define FILLED_SEPS 3 /* all the separators situated on the path from the current
separator to the root separator are dense */
/* Code for the type of the memory to expand */
#define USUB_PR 0
#define LSUB_PR 1
/* Sherry: the following are already defined in superlu_enum_const.h
#define USUB 0
#define LSUB 1
*/
/*
* Code for the type of computation - right looking (RL_SYMB); left
* looking (LL_SYMB); symbolic factorization of an independent domain
* (DOMAIN_SYMB); current separator is dense (DNS_CURSEP); all the
* separators from the current one to the root of the tree are dense
* (DNS_UPSEPS).
*/
#define RL_SYMB 0
#define DOMAIN_SYMB 1
#define LL_SYMB 2
#define DNS_UPSEPS 3
#define DNS_CURSEP 4
#endif /* __SUPERLU_DIST_PSYMBFACT */
|