/usr/include/superlu-dist/superlu_defs.h is in libsuperlu-dist-dev 5.3.0+dfsg1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 | /*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from U.S. Dept. of Energy)
All rights reserved.
The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
/*! @file
* \brief Definitions which are precision-neutral
*
* <pre>
* -- Distributed SuperLU routine (version 5.2) --
* Lawrence Berkeley National Lab, Univ. of California Berkeley.
* November 1, 2007
*
* Modified:
* Feburary 20, 2008
* October 11, 2014
* </pre>
*/
#ifndef __SUPERLU_DEFS /* allow multiple inclusions */
#define __SUPERLU_DEFS
/*
* File name: superlu_defs.h
* Purpose: Definitions which are precision-neutral
*/
#ifdef _CRAY
#include <fortran.h>
#endif
#ifdef _OPENMP
#include <omp.h>
#endif
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <string.h>
/* Following is for vtune */
#if 0
#include <ittnotify.h>
#define USE_VTUNE
#endif
/*************************************************************************
* Constants
**************************************************************************/
/*
* You can support older version of SuperLU_DIST.
* At compile-time, you can catch the new release as:
* #ifdef SUPERLU_DIST_MAIN_VERSION == 5
* use the new interface
* #else
* use the old interface
* #endif
* Versions 4.x and earlier do not include a #define'd version numbers.
*/
#define SUPERLU_DIST_MAJOR_VERSION 5
#define SUPERLU_DIST_MINOR_VERSION 3
#define SUPERLU_DIST_PATCH_VERSION 0
#define SUPERLU_DIST_RELEASE_DATE "January 28, 2018"
#include "superlu_dist_config.h"
/* Define my integer size int_t */
#ifdef _CRAY
typedef short int_t;
/*#undef int Revert back to int of default size. */
#define mpi_int_t MPI_SHORT
#elif defined (_LONGINT)
typedef long long int int_t;
#define mpi_int_t MPI_LONG_LONG_INT
#define IFMT "%lld"
#else /* Default */
typedef int int_t;
#define mpi_int_t MPI_INT
#define IFMT "%8d"
#endif
#include "superlu_enum_consts.h"
#include "Cnames.h"
#include "supermatrix.h"
#include "util_dist.h"
#include "psymbfact.h"
#define ISORT /* NOTE: qsort() has bug on Mac */
/***********************************************************************
* Constants
***********************************************************************/
/*
* For each block column of L, the index[] array contains both the row
* subscripts and the integers describing the size of the blocks.
* The organization of index[] looks like:
*
* [ BLOCK COLUMN HEADER (size BC_HEADER)
* number of blocks
* number of row subscripts, i.e., LDA of nzval[]
* BLOCK 0 <----
* BLOCK DESCRIPTOR (of size LB_DESCRIPTOR) |
* block number (global) |
* number of full rows in the block |
* actual row subscripts |
* BLOCK 1 | Repeat ...
* BLOCK DESCRIPTOR | number of blocks
* block number (global) |
* number of full rows in the block |
* actual row subscripts |
* . |
* . |
* . <----
* ]
*
* For each block row of U, the organization of index[] looks like:
*
* [ BLOCK ROW HEADER (of size BR_HEADER)
* number of blocks
* number of entries in nzval[]
* number of entries in index[]
* BLOCK 0 <----
* BLOCK DESCRIPTOR (of size UB_DESCRIPTOR) |
* block number (global) |
* number of nonzeros in the block |
* actual fstnz subscripts |
* BLOCK 1 | Repeat ...
* BLOCK DESCRIPTOR | number of blocks
* block number (global) |
* number of nonzeros in the block |
* actual fstnz subscripts |
* . |
* . |
* . <----
* ]
*
*/
#define BC_HEADER 2
#define LB_DESCRIPTOR 2
#define BR_HEADER 3
#define UB_DESCRIPTOR 2
#define NBUFFERS 5
/*
* Communication tags
*/
/* Return the mpi_tag assuming 5 pairs of communications and MPI_TAG_UB >= 5 *
* for each supernodal column "num", the five communications are: *
* 0,1: for sending L to "right" *
* 2,3: for sending off-diagonal blocks of U "down" *
* 4 : for sending the diagonal blcok down (in pxgstrf2) */
#define SLU_MPI_TAG(id,num) ( (5*(num)+id) % tag_ub )
/* For numeric factorization. */
#if 0
#define NTAGS 10000
#else
#define NTAGS INT_MAX
#endif
#define UjROW 10
#define UkSUB 11
#define UkVAL 12
#define LkSUB 13
#define LkVAL 14
#define LkkDIAG 15
/* For triangular solves. */
#define XK_H 2 /* The header preceding each X block. */
#define LSUM_H 2 /* The header preceding each MOD block. */
#define GSUM 20
#define Xk 21
#define Yk 22
#define LSUM 23
/*
* Communication scopes
*/
#define COMM_ALL 100
#define COMM_COLUMN 101
#define COMM_ROW 102
/*
* Matrix distribution for sparse matrix-vector multiplication
*/
#define SUPER_LINEAR 11
#define SUPER_BLOCK 12
/*
* No of marker arrays used in the symbolic factorization, each of size n
*/
#define NO_MARKER 3
/***********************************************************************
* Macros
***********************************************************************/
#define IAM(comm) { int rank; MPI_Comm_rank ( comm, &rank ); rank};
#define MYROW(iam,grid) ( (iam) / grid->npcol )
#define MYCOL(iam,grid) ( (iam) % grid->npcol )
#define BlockNum(i) ( supno[i] )
#define FstBlockC(bnum) ( xsup[bnum] )
#define SuperSize(bnum) ( xsup[bnum+1]-xsup[bnum] )
#define LBi(bnum,grid) ( (bnum)/grid->nprow )/* Global to local block rowwise */
#define LBj(bnum,grid) ( (bnum)/grid->npcol )/* Global to local block columnwise*/
#define PROW(bnum,grid) ( (bnum) % grid->nprow )
#define PCOL(bnum,grid) ( (bnum) % grid->npcol )
#define PNUM(i,j,grid) ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */
#define CEILING(a,b) ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )
/* For triangular solves */
#define RHS_ITERATE(i) \
for (i = 0; i < nrhs; ++i)
#define X_BLK(i) \
ilsum[i] * nrhs + (i+1) * XK_H
#define LSUM_BLK(i) \
ilsum[i] * nrhs + (i+1) * LSUM_H
#define SuperLU_timer_ SuperLU_timer_dist_
#define LOG2(x) (log10((double) x) / log10(2.0))
#if ( VAMPIR>=1 )
#define VT_TRACEON VT_traceon()
#define VT_TRACEOFF VT_traceoff()
#else
#define VT_TRACEON
#define VT_TRACEOFF
#endif
/* Support Windows */
#ifndef SUPERLU_DIST_EXPORT
#if MSVC
#ifdef SUPERLU_DIST_EXPORTS
#define SUPERLU_DIST_EXPORT __declspec(dllexport)
#else
#define SUPERLU_DIST_EXPORT __declspec(dllimport)
#endif /* SUPERLU_DIST_EXPORTS */
#else
#define SUPERLU_DIST_EXPORT
#endif /* MSVC */
#endif /* SUPERLU_DIST_EXPORT */
/***********************************************************************
* New data types
***********************************************************************/
/*
* Define the 2D mapping of matrix blocks to process grid.
*
* Process grid:
* Processes are numbered (0 : P-1).
* P = Pr x Pc, where Pr, Pc are the number of process rows and columns.
* (pr,pc) is the coordinate of IAM; 0 <= pr < Pr, 0 <= pc < Pc.
*
* Matrix blocks:
* Matrix is partitioned according to supernode partitions, both
* column and row-wise.
* The k-th block columns (rows) contains columns (rows) (s:t), where
* s=xsup[k], t=xsup[k+1]-1.
* Block A(I,J) contains
* rows from (xsup[I]:xsup[I+1]-1) and
* columns from (xsup[J]:xsup[J+1]-1)
*
* Mapping of matrix entry (i,j) to matrix block (I,J):
* (I,J) = ( supno[i], supno[j] )
*
* Mapping of matrix block (I,J) to process grid (pr,pc):
* (pr,pc) = ( MOD(I,NPROW), MOD(J,NPCOL) )
*
* (xsup[nsupers],supno[n]) are replicated on all processors.
*
*/
/*-- Communication subgroup */
typedef struct {
MPI_Comm comm; /* MPI communicator */
int Np; /* number of processes */
int Iam; /* my process number */
} superlu_scope_t;
/*-- Process grid definition */
typedef struct {
MPI_Comm comm; /* MPI communicator */
superlu_scope_t rscp; /* process scope in rowwise, horizontal directon */
superlu_scope_t cscp; /* process scope in columnwise, vertical direction */
int iam; /* my process number in this scope */
int_t nprow; /* number of process rows */
int_t npcol; /* number of process columns */
} gridinfo_t;
/*
*-- The structures are determined by SYMBFACT and used thereafter.
*
* (xsup,supno) describes mapping between supernode and column:
* xsup[s] is the leading column of the s-th supernode.
* supno[i] is the supernode no to which column i belongs;
* e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
* xsup 0 1 2 4 7 12
* Note: dfs will be performed on supernode rep. relative to the new
* row pivoting ordering
*
* This is allocated during symbolic factorization SYMBFACT.
*/
typedef struct {
int_t *xsup;
int_t *supno;
} Glu_persist_t;
/*
*-- The structures are determined by SYMBFACT and used by DDISTRIBUTE.
*
* (xlsub,lsub): lsub[*] contains the compressed subscript of
* rectangular supernodes; xlsub[j] points to the starting
* location of the j-th column in lsub[*]. Note that xlsub
* is indexed by column.
* Storage: original row subscripts
*
* During the course of sparse LU factorization, we also use
* (xlsub,lsub) for the purpose of symmetric pruning. For each
* supernode {s,s+1,...,t=s+r} with first column s and last
* column t, the subscript set
* lsub[j], j=xlsub[s], .., xlsub[s+1]-1
* is the structure of column s (i.e. structure of this supernode).
* It is used for the storage of numerical values.
* Furthermore,
* lsub[j], j=xlsub[t], .., xlsub[t+1]-1
* is the structure of the last column t of this supernode.
* It is for the purpose of symmetric pruning. Therefore, the
* structural subscripts can be rearranged without making physical
* interchanges among the numerical values.
*
* However, if the supernode has only one column, then we
* only keep one set of subscripts. For any subscript interchange
* performed, similar interchange must be done on the numerical
* values.
*
* The last column structures (for pruning) will be removed
* after the numercial LU factorization phase.
*
* (xusub,usub): xusub[i] points to the starting location of column i
* in usub[]. For each U-segment, only the row index of first nonzero
* is stored in usub[].
*
* Each U column consists of a number of full segments. Each full segment
* starts from a leading nonzero, running up to the supernode (block)
* boundary. (Recall that the column-wise supernode partition is also
* imposed on the rows.) Because the segment is full, we don't store all
* the row indices. Instead, only the leading nonzero index is stored.
* The rest can be found together with xsup/supno pair.
* For example,
* usub[xsub[j+1]] - usub[xsub[j]] = number of segments in column j.
* for any i in usub[],
* supno[i] = block number in which i belongs to
* xsup[supno[i]+1] = first row of the next block
* The nonzeros of this segment are:
* i, i+1 ... xsup[supno[i]+1]-1 (only i is stored in usub[])
*
*/
typedef struct {
int_t *lsub; /* compressed L subscripts */
int_t *xlsub;
int_t *usub; /* compressed U subscripts */
int_t *xusub;
int_t nzlmax; /* current max size of lsub */
int_t nzumax; /* " " " usub */
LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
int_t *llvl; /* keep track of level in L for level-based ILU */
int_t *ulvl; /* keep track of level in U for level-based ILU */
} Glu_freeable_t;
/*
*-- The structure used to store matrix A of the linear system and
* several vectors describing the transformations done to matrix A.
*
* A (SuperMatrix*)
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol).
* The number of linear equations is A->nrow. The type of A can be:
* Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
*
* DiagScale (DiagScale_t)
* Specifies the form of equilibration that was done.
* = NOEQUIL: No equilibration.
* = ROW: Row equilibration, i.e., A was premultiplied by diag(R).
* = COL: Column equilibration, i.e., A was postmultiplied by diag(C).
* = BOTH: Both row and column equilibration, i.e., A was replaced
* by diag(R)*A*diag(C).
*
* R double*, dimension (A->nrow)
* The row scale factors for A.
* If DiagScale = ROW or BOTH, A is multiplied on the left by diag(R).
* If DiagScale = NOEQUIL or COL, R is not defined.
*
* C double*, dimension (A->ncol)
* The column scale factors for A.
* If DiagScale = COL or BOTH, A is multiplied on the right by diag(C).
* If DiagScale = NOEQUIL or ROW, C is not defined.
*
* perm_r (int*) dimension (A->nrow)
* Row permutation vector which defines the permutation matrix Pr,
* perm_r[i] = j means row i of A is in position j in Pr*A.
*
* perm_c (int*) dimension (A->ncol)
* Column permutation vector, which defines the
* permutation matrix Pc; perm_c[i] = j means column i of A is
* in position j in A*Pc.
*
*/
typedef struct {
DiagScale_t DiagScale;
double *R;
double *C;
int_t *perm_r;
int_t *perm_c;
} ScalePermstruct_t;
/*-- Data structure for redistribution of B and X --*/
typedef struct {
int *B_to_X_SendCnt;
int *X_to_B_SendCnt;
int *ptr_to_ibuf, *ptr_to_dbuf;
/* the following are needed in the hybrid solver PDSLin */
int *X_to_B_iSendCnt;
int *X_to_B_vSendCnt;
int *disp_ibuf;
int_t *send_ibuf;
void *send_dbuf;
int_t x2b, b2x;
int_t *send_ibuf2;
int_t *recv_ibuf2;
void *send_dbuf2;
void *recv_dbuf2;
} pxgstrs_comm_t;
/*
*-- This contains the options used to control the solution process.
*
* Fact (fact_t)
* Specifies whether or not the factored form of the matrix
* A is supplied on entry, and if not, how the matrix A should
* be factorizaed.
* = DOFACT: The matrix A will be factorized from scratch, and the
* factors will be stored in L and U.
* = SamePattern: The matrix A will be factorized assuming
* that a factorization of a matrix with the same sparsity
* pattern was performed prior to this one. Therefore, this
* factorization will reuse column permutation vector
* ScalePermstruct->perm_c and the column elimination tree
* LUstruct->etree.
* = SamePattern_SameRowPerm: The matrix A will be factorized
* assuming that a factorization of a matrix with the same
* sparsity pattern and similar numerical values was performed
* prior to this one. Therefore, this factorization will reuse
* both row and column scaling factors R and C, both row and
* column permutation vectors perm_r and perm_c, and the
* data structure set up from the previous symbolic factorization.
* = FACTORED: On entry, L, U, perm_r and perm_c contain the
* factored form of A. If DiagScale is not NOEQUIL, the matrix
* A has been equilibrated with scaling factors R and C.
*
* Equil (yes_no_t)
* Specifies whether to equilibrate the system (scale A's row and
* columns to have unit norm).
*
* ColPerm (colperm_t)
* Specifies what type of column permutation to use to reduce fill.
* = NATURAL: use the natural ordering
* = MMD_ATA: use minimum degree ordering on structure of A'*A
* = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
* = COLAMD: use approximate minimum degree column ordering
* = MY_PERMC: use the ordering specified by the user
*
* Trans (trans_t)
* Specifies the form of the system of equations:
* = NOTRANS: A * X = B (No transpose)
* = TRANS: A**T * X = B (Transpose)
* = CONJ: A**H * X = B (Transpose)
*
* IterRefine (IterRefine_t)
* Specifies whether to perform iterative refinement.
* = NO: no iterative refinement
* = SINGLE: perform iterative refinement in single precision
* = DOUBLE: perform iterative refinement in double precision
* = EXTRA: perform iterative refinement in extra precision
*
* DiagPivotThresh (double, in [0.0, 1.0]) (only for serial SuperLU)
* Specifies the threshold used for a diagonal entry to be an
* acceptable pivot.
*
* SymmetricMode (yest_no_t) (only for serial SuperLU)
* Specifies whether to use symmetric mode. Symmetric mode gives
* preference to diagonal pivots, and uses an (A'+A)-based column
* permutation algorithm.
*
* PivotGrowth (yes_no_t) (only for serial SuperLU)
* Specifies whether to compute the reciprocal pivot growth.
*
* ConditionNumber (ues_no_t) (only for serial SuperLU)
* Specifies whether to compute the reciprocal condition number.
*
* RowPerm (rowperm_t) (only for SuperLU_DIST or ILU in serial SuperLU)
* Specifies whether to permute rows of the original matrix.
* = NO: not to permute the rows
* = LargeDiag: make the diagonal large relative to the off-diagonal
* = MY_PERMR: use the permutation given by the user
*
* ILU_DropRule (int) (only for serial SuperLU)
* Specifies the dropping rule:
* = DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau).
* = DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n.
* = DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column,
* p = gamma * nnz(A(:,j)).
* = DROP_AREA: Variation of ILUTP, for j-th column, use
* nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
* = DROP_DYNAMIC: Modify the threshold tau during factorizaion:
* If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
* tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
* Otherwise
* tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
* tau_U(j) uses the similar rule.
* NOTE: the thresholds used by L and U are separate.
* = DROP_INTERP: Compute the second dropping threshold by
* interpolation instead of sorting (default).
* In this case, the actual fill ratio is not
* guaranteed to be smaller than gamma.
* Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
* ( Default: DROP_BASIC | DROP_AREA )
*
* ILU_DropTol (double) (only for serial SuperLU)
* numerical threshold for dropping.
*
* ILU_FillFactor (double) (only for serial SuperLU)
* Gamma in the secondary dropping.
*
* ILU_Norm (norm_t) (only for serial SuperLU)
* Specify which norm to use to measure the row size in a
* supernode: infinity-norm, 1-norm, or 2-norm.
*
* ILU_FillTol (double) (only for serial SuperLU)
* numerical threshold for zero pivot perturbation.
*
* ILU_MILU (milu_t) (only for serial SuperLU)
* Specifies which version of MILU to use.
*
* ILU_MILU_Dim (double)
* Dimension of the PDE if available.
*
* ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
* Specifies whether to replace the tiny diagonals by
* sqrt(epsilon)*||A|| during LU factorization.
*
* SolveInitialized (yes_no_t) (only for SuperLU_DIST)
* Specifies whether the initialization has been performed to the
* triangular solve.
*
* RefineInitialized (yes_no_t) (only for SuperLU_DIST)
* Specifies whether the initialization has been performed to the
* sparse matrix-vector multiplication routine needed in iterative
* refinement.
*
* num_lookaheads (int) (only for SuperLU_DIST)
* Specifies the number of levels in the look-ahead factorization
*
* lookahead_etree (yes_no_t) (only for SuperLU_DIST)
* Specifies whether to use the elimination tree computed from the
* serial symbolic factorization to perform scheduling.
*
* SymPattern (yes_no_t) (only for SuperLU_DIST)
* Gives the scheduling algorithm a hint whether the matrix
* would have symmetric pattern.
*
*/
typedef struct {
fact_t Fact;
yes_no_t Equil;
colperm_t ColPerm;
trans_t Trans;
IterRefine_t IterRefine;
double DiagPivotThresh;
yes_no_t SymmetricMode;
yes_no_t PivotGrowth;
yes_no_t ConditionNumber;
rowperm_t RowPerm;
int ILU_DropRule;
double ILU_DropTol; /* threshold for dropping */
double ILU_FillFactor; /* gamma in the secondary dropping */
norm_t ILU_Norm; /* infinity-norm, 1-norm, or 2-norm */
double ILU_FillTol; /* threshold for zero pivot perturbation */
milu_t ILU_MILU;
double ILU_MILU_Dim; /* Dimension of PDE (if available) */
yes_no_t ParSymbFact;
yes_no_t ReplaceTinyPivot; /* used in SuperLU_DIST */
yes_no_t SolveInitialized;
yes_no_t RefineInitialized;
yes_no_t PrintStat;
int nnzL, nnzU; /* used to store nnzs for now */
int num_lookaheads; /* num of levels in look-ahead */
yes_no_t lookahead_etree; /* use etree computed from the
serial symbolic factorization */
yes_no_t SymPattern; /* symmetric factorization */
} superlu_dist_options_t;
typedef struct {
float for_lu;
float total;
int_t expansions;
long long int nnzL, nnzU;
} superlu_dist_mem_usage_t;
/*
*-- The new structures added in the hybrid CUDA + OpenMP + MPI code.
*/
typedef struct {
int_t rukp;
int_t iukp;
int_t jb;
int_t full_u_cols;
} Ublock_info_t;
typedef struct {
int_t lptr;
int_t ib;
int_t FullRow;
} Remain_info_t;
typedef struct
{
int id, key;
void *next;
} etree_node;
struct superlu_pair
{
int ind;
int val;
};
/**--------**/
/***********************************************************************
* Function prototypes
***********************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
extern void set_default_options_dist(superlu_dist_options_t *);
extern void superlu_gridinit(MPI_Comm, int_t, int_t, gridinfo_t *);
extern void superlu_gridmap(MPI_Comm, int_t, int_t, int_t [], int_t,
gridinfo_t *);
extern void superlu_gridexit(gridinfo_t *);
extern void print_options_dist(superlu_dist_options_t *);
extern void print_sp_ienv_dist(superlu_dist_options_t *);
extern void Destroy_CompCol_Matrix_dist(SuperMatrix *);
extern void Destroy_SuperNode_Matrix_dist(SuperMatrix *);
extern void Destroy_SuperMatrix_Store_dist(SuperMatrix *);
extern void Destroy_CompCol_Permuted_dist(SuperMatrix *);
extern void Destroy_CompRowLoc_Matrix_dist(SuperMatrix *);
extern void Destroy_CompRow_Matrix_dist(SuperMatrix *);
extern void sp_colorder (superlu_dist_options_t*, SuperMatrix*, int_t*, int_t*,
SuperMatrix*);
extern int sp_symetree_dist(int_t *, int_t *, int_t *, int_t, int_t *);
extern int sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *);
extern void get_perm_c_dist(int_t, int_t, SuperMatrix *, int_t *);
extern void at_plus_a_dist(const int_t, const int_t, int_t *, int_t *,
int_t *, int_t **, int_t **);
extern int genmmd_dist_(int_t *, int_t *, int_t *a,
int_t *, int_t *, int_t *, int_t *,
int_t *, int_t *, int_t *, int_t *, int_t *);
extern void bcast_tree(void *, int, MPI_Datatype, int, int,
gridinfo_t *, int, int *);
extern int_t symbfact(superlu_dist_options_t *, int, SuperMatrix *, int_t *,
int_t *, Glu_persist_t *, Glu_freeable_t *);
extern int_t symbfact_SubInit(fact_t, void *, int_t, int_t, int_t, int_t,
Glu_persist_t *, Glu_freeable_t *);
extern int_t symbfact_SubXpand(int_t, int_t, int_t, MemType, int_t *,
Glu_freeable_t *);
extern int_t symbfact_SubFree(Glu_freeable_t *);
extern void countnz_dist (const int_t, int_t *,
long long int *, long long int *,
Glu_persist_t *, Glu_freeable_t *);
extern long long int fixupL_dist (const int_t, const int_t *, Glu_persist_t *,
Glu_freeable_t *);
extern int_t *TreePostorder_dist (int_t, int_t *);
extern float smach_dist(char *);
extern double dmach_dist(char *);
extern void *superlu_malloc_dist (size_t);
extern void superlu_free_dist (void*);
extern int_t *intMalloc_dist (int_t);
extern int_t *intCalloc_dist (int_t);
extern int_t mc64id_dist(int_t *);
extern void arrive_at_ublock (int_t, int_t *, int_t *, int_t *,
int_t *, int_t *, int_t, int_t,
int_t *, int_t *, int_t *, gridinfo_t *);
extern int_t estimate_bigu_size (int_t, int_t, int_t **, Glu_persist_t *,
gridinfo_t *, int_t *);
/* Auxiliary routines */
extern double SuperLU_timer_ ();
extern void superlu_abort_and_exit_dist(char *);
extern int_t sp_ienv_dist (int_t);
extern void ifill_dist (int_t *, int_t, int_t);
extern void super_stats_dist (int_t, int_t *);
extern void ScalePermstructInit(const int_t, const int_t,
ScalePermstruct_t *);
extern void ScalePermstructFree(ScalePermstruct_t *);
extern void get_diag_procs(int_t, Glu_persist_t *, gridinfo_t *, int_t *,
int_t **, int_t **);
extern int_t QuerySpace_dist(int_t, int_t, Glu_freeable_t *, superlu_dist_mem_usage_t *);
extern int xerr_dist (char *, int *);
extern void pxerr_dist (char *, gridinfo_t *, int_t);
extern void PStatInit(SuperLUStat_t *);
extern void PStatFree(SuperLUStat_t *);
extern void PStatPrint(superlu_dist_options_t *, SuperLUStat_t *, gridinfo_t *);
extern void log_memory(long long, SuperLUStat_t *);
extern void print_memorylog(SuperLUStat_t *, char *);
extern int superlu_dist_GetVersionNumber(int *, int *, int *);
/* Prototypes for parallel symbolic factorization */
extern float symbfact_dist
(int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *,
Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, superlu_dist_mem_usage_t *);
/* Get the column permutation using parmetis */
extern float get_perm_c_parmetis
(SuperMatrix *, int_t *, int_t *, int, int,
int_t **, int_t **, gridinfo_t *, MPI_Comm *);
/* Auxiliary routines for memory expansions used during
the parallel symbolic factorization routine */
extern int_t psymbfact_LUXpandMem
(int_t, int_t, int_t, int_t, int_t, int_t, int_t, int_t,
Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *);
extern int_t psymbfact_LUXpand
(int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t,
Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *);
extern int_t psymbfact_LUXpand_RL
(int_t, int_t, int_t, int_t, int_t, int_t,
Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *);
extern int_t psymbfact_prLUXpand
(int_t, int_t, int, Llu_symbfact_t *, psymbfact_stat_t *);
#ifdef GPU_ACC /* GPU related */
extern void gemm_division_cpu_gpu (int *, int *, int *, int,
int, int, int *, int);
extern int_t get_cublas_nb ();
extern int_t get_num_cuda_streams ();
#endif
extern int get_thread_per_process();
extern int_t get_max_buffer_size ();
extern int_t get_min (int_t *, int_t);
extern int compare_pair (const void *, const void *);
extern int_t static_partition (struct superlu_pair *, int_t, int_t *, int_t,
int_t *, int_t *, int);
/* Routines for debugging */
extern void print_panel_seg_dist(int_t, int_t, int_t, int_t, int_t *, int_t *);
extern void check_repfnz_dist(int_t, int_t, int_t, int_t *);
extern int_t CheckZeroDiagonal(int_t, int_t *, int_t *, int_t *);
extern void PrintDouble5(char *, int_t, double *);
extern void PrintInt10(char *, int_t, int_t *);
extern void PrintInt32(char *, int, int *);
extern int file_PrintInt10(FILE *, char *, int_t, int_t *);
extern int file_PrintInt32(FILE *, char *, int, int *);
extern int file_PrintLong10(FILE *, char *, int_t, int_t *);
#ifdef __cplusplus
}
#endif
#endif /* __SUPERLU_DEFS */
|