/usr/include/trilinos/Ifpack2_Container.hpp is in libtrilinos-ifpack2-dev 12.12.1-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 | /*@HEADER
// ***********************************************************************
//
// Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
// Copyright (2009) Sandia Corporation
//
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
// license for use of this work by or on behalf of the U.S. Government.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
//
// ***********************************************************************
//@HEADER
*/
#ifndef IFPACK2_CONTAINER_HPP
#define IFPACK2_CONTAINER_HPP
/// \file Ifpack2_Container.hpp
/// \brief Ifpack2::Container class declaration
#include "Ifpack2_ConfigDefs.hpp"
#include "Tpetra_RowMatrix.hpp"
#include "Teuchos_Describable.hpp"
#include <Ifpack2_Partitioner.hpp>
#include <Tpetra_Map.hpp>
#include <Tpetra_Experimental_BlockCrsMatrix_decl.hpp>
#include <Teuchos_ParameterList.hpp>
#include <Teuchos_Time.hpp>
#include <iostream>
#include <type_traits>
#ifndef DOXYGEN_SHOULD_SKIP_THIS
namespace Teuchos {
// Forward declaration to avoid include.
class ParameterList;
} // namespace Teuchos
#endif // DOXYGEN_SHOULD_SKIP_THIS
namespace Ifpack2 {
/// \class Container
/// \brief Interface for creating and solving a local linear problem.
/// \tparam MatrixType A specialization of Tpetra::RowMatrix.
///
/// This class is mainly useful for the implementation of
/// BlockRelaxation, and other preconditioners that need to solve
/// linear systems with diagonal blocks of a sparse matrix.
///
/// Users of BlockRelaxation (and any analogous preconditioners) do
/// not normally need to interact with the Container interface.
/// However, they do need to specify a specific Container subclass to
/// use, for example as the second template parameter
/// (<tt>ContainerType</tt>) of BlockRelaxation. Implementations of
/// Container specify
/// - the kind of data structure used to store the local matrix, and
/// - how to solve a linear system with the local matrix.
///
/// For example, the SparseContainer subclass uses a sparse matrix (in
/// particular, Tpetra::CrsMatrix) to store each diagonal block, and
/// can use any given Ifpack2 Preconditioner subclass to solve linear
/// systems.
///
/// A Container can create, populate, and solve a local linear
/// system. The local linear system matrix, B, is a submatrix of the
/// local components of a distributed matrix, A. The idea of Container
/// is to specify the rows of A that are contained in B, then extract
/// B from A, and compute all it is necessary to solve a linear system
/// in B. Then, set the initial guess (if necessary) and right-hand
/// side for B, and solve the linear system in B.
///
/// If you are writing a class (comparable to BlockRelaxation) that
/// uses Container, you should use it in the following way:
/// <ol>
/// <li> Create a Container object, specifying the global matrix A and
/// the indices of the local rows of A that are contained in B.
/// The latter indices come from a Partitioner object.</li>
/// <li> Optionally, set linear solve parameters using setParameters().</li>
/// <li> Initialize the container by calling initialize().</li>
/// <li> Prepare the linear system solver using compute().</li>
/// <li> Solve the linear system using apply().</li>
/// </ol>
/// For an example of Steps 1-5 above, see the implementation of
/// BlockRelaxation::ExtractSubmatrices() in
/// Ifpack2_BlockRelaxation_def.hpp.
template<class MatrixType>
class Container : public Teuchos::Describable {
//! @name Internal typedefs (protected)
//@{
protected:
typedef typename MatrixType::scalar_type scalar_type;
typedef typename MatrixType::local_ordinal_type local_ordinal_type;
typedef typename MatrixType::global_ordinal_type global_ordinal_type;
typedef typename MatrixType::node_type node_type;
typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> mv_type;
typedef Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> vector_type;
typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
typedef Teuchos::ScalarTraits<scalar_type> STS;
typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type;
typedef Partitioner<Tpetra::RowGraph<local_ordinal_type, global_ordinal_type, node_type> > partitioner_type;
typedef Tpetra::Experimental::BlockCrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> block_crs_matrix_type;
typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> row_matrix_type;
static_assert(std::is_same<MatrixType, row_matrix_type>::value,
"Ifpack2::Container: Please use MatrixType = Tpetra::RowMatrix.");
//! Internal representation of Scalar in Kokkos::View
typedef typename Kokkos::Details::ArithTraits<scalar_type>::val_type impl_scalar_type;
//@}
public:
typedef typename mv_type::dual_view_type::t_host HostView;
/// \brief Constructor.
///
/// \brief matrix [in] The original input matrix. This Container
/// will construct local diagonal blocks from the rows given by
/// <tt>partitioner</tt>.
///
/// \param partitioner [in] The Partitioner object that assigns
/// local rows of the input matrix to blocks.
Container (const Teuchos::RCP<const row_matrix_type>& matrix,
const Teuchos::Array<Teuchos::Array<local_ordinal_type> >& partitions,
const Teuchos::RCP<const import_type>& importer,
int OverlapLevel,
scalar_type DampingFactor) :
inputMatrix_ (matrix),
OverlapLevel_ (OverlapLevel),
DampingFactor_ (DampingFactor),
Importer_ (importer)
{
using Teuchos::Ptr;
using Teuchos::RCP;
using Teuchos::Array;
using Teuchos::ArrayView;
using Teuchos::Comm;
// typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type; // unused
NumLocalRows_ = inputMatrix_->getNodeNumRows();
NumGlobalRows_ = inputMatrix_->getGlobalNumRows();
NumGlobalNonzeros_ = inputMatrix_->getGlobalNumEntries();
IsParallel_ = inputMatrix_->getRowMap()->getComm()->getSize() != 1;
auto global_bcrs =
Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(inputMatrix_);
hasBlockCrs_ = !global_bcrs.is_null();
if(hasBlockCrs_)
bcrsBlockSize_ = global_bcrs->getBlockSize();
else
bcrsBlockSize_ = 1;
setBlockSizes(partitions);
}
/// \brief Constructor for single block.
///
/// \brief matrix [in] The original input matrix. This Container
/// will construct a local diagonal block from the rows given by
/// <tt>localRows</tt>.
///
/// \param localRows [in] The set of (local) rows assigned to this
/// container. <tt>localRows[i] == j</tt>, where i (from 0 to
/// <tt>getNumRows() - 1</tt>) indicates the Container's row, and
/// j indicates the local row in the calling process. Subclasses
/// must always pass along these indices to the base class.
Container (const Teuchos::RCP<const row_matrix_type>& matrix,
const Teuchos::Array<local_ordinal_type>& localRows) :
inputMatrix_ (matrix),
numBlocks_ (1),
partitions_ (localRows.size()),
blockRows_ (1),
partitionIndices_ (1),
OverlapLevel_ (0),
DampingFactor_ (STS::one()),
Importer_ (Teuchos::null)
{
NumLocalRows_ = inputMatrix_->getNodeNumRows();
NumGlobalRows_ = inputMatrix_->getGlobalNumRows();
NumGlobalNonzeros_ = inputMatrix_->getGlobalNumEntries();
IsParallel_ = inputMatrix_->getRowMap()->getComm()->getSize() > 1;
blockRows_[0] = localRows.size();
partitionIndices_[0] = 0;
const block_crs_matrix_type* global_bcrs =
Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(inputMatrix_).get();
hasBlockCrs_ = global_bcrs;
if(hasBlockCrs_)
bcrsBlockSize_ = global_bcrs->getBlockSize();
else
bcrsBlockSize_ = 1;
for(local_ordinal_type i = 0; i < localRows.size(); i++)
partitions_[i] = localRows[i];
}
//! Destructor.
virtual ~Container() {};
/// \brief Local indices of the rows of the input matrix that belong to this block.
///
/// The set of (local) rows assigned to this Container is defined by
/// passing in a set of indices <tt>localRows[i] = j</tt> to the
/// constructor, where i (from 0 to <tt>getNumRows() - 1</tt>)
/// indicates the Container's row, and j indicates the local row in
/// the calling process. Subclasses must always pass along these
/// indices to the base class.
///
/// The indices are usually used to reorder the local row index (on
/// the calling process) of the i-th row in the Container.
///
/// For an example of how to use these indices, see the
/// implementation of BlockRelaxation::ExtractSubmatrices() in
/// Ifpack2_BlockRelaxation_def.hpp.
Teuchos::ArrayView<const local_ordinal_type> getLocalRows(int blockIndex) const
{
return Teuchos::ArrayView<const local_ordinal_type>
(&partitions_[partitionIndices_[blockIndex]], blockRows_[blockIndex]);
}
/// \brief Do all set-up operations that only require matrix structure.
///
/// If the input matrix's structure changes, you must call this
/// method before you may call compute(). You must then call
/// compute() before you may call apply() or weightedApply().
///
/// "Structure" refers to the graph of the matrix: the local and
/// global dimensions, and the populated entries in each row.
virtual void initialize () = 0;
//! Initialize arrays with information about block sizes.
void setBlockSizes(const Teuchos::Array<Teuchos::Array<local_ordinal_type> >& partitions)
{
//First, create a grand total of all the rows in all the blocks
//Note: If partitioner allowed overlap, this could be greater than the # of local rows
local_ordinal_type totalBlockRows = 0;
numBlocks_ = partitions.size();
blockRows_.resize(numBlocks_);
partitionIndices_.resize(numBlocks_);
for(int i = 0; i < numBlocks_; i++)
{
local_ordinal_type rowsInBlock = partitions[i].size();
blockRows_[i] = rowsInBlock;
partitionIndices_[i] = totalBlockRows;
totalBlockRows += rowsInBlock;
}
partitions_.resize(totalBlockRows);
//set partitions_: each entry is the partition/block of the row
local_ordinal_type iter = 0;
for(int i = 0; i < numBlocks_; i++)
{
for(int j = 0; j < blockRows_[i]; j++)
{
partitions_[iter++] = partitions[i][j];
}
}
}
void getMatDiag() const
{
if(Diag_.is_null())
{
Diag_ = rcp(new vector_type(inputMatrix_->getDomainMap()));
inputMatrix_->getLocalDiagCopy(*Diag_);
}
}
/// \brief Extract the local diagonal block and prepare the solver.
///
/// If any entries' values in the input matrix have changed, you
/// must call this method before you may call apply() or
/// weightedApply().
virtual void compute () = 0;
void DoJacobi(HostView& X, HostView& Y, int stride) const;
void DoOverlappingJacobi(HostView& X, HostView& Y, HostView& W, int stride) const;
void DoGaussSeidel(HostView& X, HostView& Y, HostView& Y2, int stride) const;
void DoSGS(HostView& X, HostView& Y, HostView& Y2, int stride) const;
//! Set parameters.
virtual void setParameters (const Teuchos::ParameterList& List) = 0;
//! Return \c true if the container has been successfully initialized.
virtual bool isInitialized () const = 0;
//! Return \c true if the container has been successfully computed.
virtual bool isComputed () const = 0;
/// \brief Compute <tt>Y := alpha * M^{-1} X + beta*Y</tt>.
///
/// X is in the domain Map of the original matrix (the argument to
/// compute()), and Y is in the range Map of the original matrix.
/// This method only reads resp. modifies the permuted subset of
/// entries of X resp. Y related to the diagonal block M. That
/// permuted subset is defined by the indices passed into the
/// constructor.
///
/// This method is marked \c const for compatibility with
/// Tpetra::Operator's method of the same name. This might require
/// subclasses to mark some of their instance data as \c mutable.
virtual void
apply (HostView& X,
HostView& Y,
int blockIndex,
int stride,
Teuchos::ETransp mode = Teuchos::NO_TRANS,
scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one(),
scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero()) const = 0;
//! Wrapper for apply with MVs, used in unit tests (never called by BlockRelaxation)
void applyMV (mv_type& X, mv_type& Y) const
{
HostView XView = X.template getLocalView<Kokkos::HostSpace>();
HostView YView = Y.template getLocalView<Kokkos::HostSpace>();
this->apply (XView, YView, 0, X.getStride());
}
/// \brief Compute <tt>Y := alpha * diag(D) * M^{-1} (diag(D) * X) + beta*Y</tt>.
///
/// X is in the domain Map of the original matrix (the argument to
/// compute()), and Y is in the range Map of the original matrix.
/// This method only reads resp. modifies the permuted subset of
/// entries of X resp. Y related to the diagonal block M. That
/// permuted subset is defined by the indices passed into the
/// constructor. The D scaling vector must have the same number of
/// entries on each process as X and Y, but otherwise need not have
/// the same Map. (For example, D could be locally replicated, or
/// could be a different object on each process with a local (\c
/// MPI_COMM_SELF) communicator.)
///
/// This method supports overlap techniques, such as those used in
/// Schwarz methods.
///
/// This method is marked \c const by analogy with apply(), which
/// itself is marked \c const for compatibility with
/// Tpetra::Operator's method of the same name. This might require
/// subclasses to mark some of their instance data as \c mutable.
virtual void
weightedApply (HostView& X,
HostView& Y,
HostView& W,
int blockIndex,
int stride,
Teuchos::ETransp mode = Teuchos::NO_TRANS,
scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one(),
scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero()) const = 0;
//! Wrapper for weightedApply with MVs, used in unit tests (never called by BlockRelaxation)
void weightedApplyMV (mv_type& X,
mv_type& Y,
vector_type& W)
{
HostView XView = X.template getLocalView<Kokkos::HostSpace>();
HostView YView = Y.template getLocalView<Kokkos::HostSpace>();
HostView WView = W.template getLocalView<Kokkos::HostSpace>();
weightedApply (XView, YView, WView, 0, X.getStride());
}
virtual void clearBlocks();
//! Print basic information about the container to \c os.
virtual std::ostream& print (std::ostream& os) const = 0;
//! Returns string describing the container.
//! See <tt>Details::ContainerFactory</tt>.
static std::string getName()
{
return "Generic";
}
protected:
//! The input matrix to the constructor.
Teuchos::RCP<const row_matrix_type> inputMatrix_;
//! The number of blocks (partitions) in the container.
int numBlocks_;
//! Local indices of the rows of the input matrix that belong to this block.
Teuchos::Array<local_ordinal_type> partitions_; //size: total # of local rows (in all local blocks)
//! Number of rows in each block.
Teuchos::Array<local_ordinal_type> blockRows_; //size: # of blocks
//! Starting index in partitions_ of local row indices for each block.
Teuchos::Array<local_ordinal_type> partitionIndices_;
//! Diagonal elements.
mutable Teuchos::RCP<vector_type> Diag_;
//! Whether the problem is distributed across multiple MPI processes.
bool IsParallel_;
//! Number of rows of overlap for adjacent blocks.
int OverlapLevel_;
//! Damping factor, passed to apply() as alpha.
scalar_type DampingFactor_;
//! Importer for importing off-process elements of MultiVectors.
Teuchos::RCP<const Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type>> Importer_;
//! Number of local rows in input matrix.
local_ordinal_type NumLocalRows_;
//! Number of global rows in input matrix.
global_ordinal_type NumGlobalRows_;
//! Number of nonzeros in input matrix.
global_ordinal_type NumGlobalNonzeros_;
//! Whether the input matrix is a BlockCRS matrix.
bool hasBlockCrs_;
//! If hasBlockCrs_, the number of DOFs per vertex. Otherwise 1.
int bcrsBlockSize_;
};
//! Print information about the given Container to the output stream \c os.
template <class MatrixType>
inline std::ostream&
operator<< (std::ostream& os, const Ifpack2::Container<MatrixType>& obj)
{
return obj.print (os);
}
template <class MatrixType>
void Container<MatrixType>::DoJacobi(HostView& X, HostView& Y, int stride) const
{
const scalar_type one = STS::one();
// Note: Flop counts copied naively from Ifpack.
// use partitions_ and blockRows_
size_t numVecs = X.dimension_1();
// Non-overlapping Jacobi
for (local_ordinal_type i = 0; i < numBlocks_; i++)
{
// may happen that a partition is empty
if(blockRows_[i] != 1 || hasBlockCrs_)
{
if(blockRows_[i] == 0 )
continue;
apply(X, Y, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
}
else // singleton, can't access Containers_[i] as it was never filled and may be null.
{
local_ordinal_type LRID = partitions_[partitionIndices_[i]];
getMatDiag();
HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
for(size_t nv = 0; nv < numVecs; nv++)
{
impl_scalar_type x = X(LRID, nv);
Y(LRID, nv) = x * d;
}
}
}
}
template <class MatrixType>
void Container<MatrixType>::DoOverlappingJacobi(HostView& X, HostView& Y, HostView& W, int stride) const
{
// Overlapping Jacobi
for(local_ordinal_type i = 0; i < numBlocks_; i++)
{
// may happen that a partition is empty
if(blockRows_[i] == 0)
continue;
if(blockRows_[i] != 1)
weightedApply(X, Y, W, i, stride, Teuchos::NO_TRANS, DampingFactor_, STS::one());
}
}
template<class MatrixType>
void Container<MatrixType>::DoGaussSeidel(HostView& X, HostView& Y, HostView& Y2, int stride) const
{
using Teuchos::Array;
using Teuchos::ArrayRCP;
using Teuchos::ArrayView;
using Teuchos::Ptr;
using Teuchos::RCP;
using Teuchos::rcp;
using Teuchos::rcpFromRef;
// Note: Flop counts copied naively from Ifpack.
const scalar_type one = STS::one();
const size_t Length = inputMatrix_->getNodeMaxNumRowEntries();
auto numVecs = X.dimension_1();
Array<scalar_type> Values;
Array<local_ordinal_type> Indices;
Indices.resize(Length);
Values.resize(bcrsBlockSize_ * bcrsBlockSize_ * Length); //note: if A was not a BlockCRS, bcrsBlockSize_ is 1
// I think I decided I need two extra vectors:
// One to store the sum of the corrections (initialized to zero)
// One to store the temporary residual (doesn't matter if it is zeroed or not)
// My apologies for making the names clear and meaningful. (X=RHS, Y=guess?! Nice.)
HostView Resid("", X.dimension_0(), X.dimension_1());
for(local_ordinal_type i = 0; i < numBlocks_; i++)
{
if(blockRows_[i] > 1 || hasBlockCrs_)
{
if (blockRows_[i] == 0)
continue;
// update from previous block
ArrayView<const local_ordinal_type> localRows = getLocalRows (i);
const size_t localNumRows = blockRows_[i];
for(size_t j = 0; j < localNumRows; j++)
{
const local_ordinal_type LID = localRows[j]; // Containers_[i]->ID (j);
size_t NumEntries;
inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
for(size_t m = 0; m < numVecs; m++)
{
if(hasBlockCrs_)
{
for (int localR = 0; localR < bcrsBlockSize_; localR++)
Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
for (size_t k = 0; k < NumEntries; ++k)
{
const local_ordinal_type col = Indices[k];
for (int localR = 0; localR < bcrsBlockSize_; localR++)
{
for(int localC = 0; localC < bcrsBlockSize_; localC++)
{
Resid(LID * bcrsBlockSize_ + localR, m) -=
Values[k * bcrsBlockSize_ * bcrsBlockSize_ + localR + localC * bcrsBlockSize_]
* Y2(col * bcrsBlockSize_ + localC, m);
}
}
}
}
else
{
Resid(LID, m) = X(LID, m);
for (size_t k = 0; k < NumEntries; ++k)
{
const local_ordinal_type col = Indices[k];
Resid(LID, m) -= Values[k] * Y2(col, m);
}
}
}
}
// solve with this block
//
// Note: I'm abusing the ordering information, knowing that X/Y
// and Y2 have the same ordering for on-proc unknowns.
//
// Note: Add flop counts for inverse apply
apply(Resid, Y2, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
}
else if(blockRows_[i] == 1)
{
// singleton, can't access Containers_[i] as it was never filled and may be null.
// a singleton calculation is exact, all residuals should be zero.
local_ordinal_type LRID = partitionIndices_[i]; // by definition, a singleton 1 row in block.
getMatDiag();
HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
for(size_t m = 0; m < numVecs; m++)
{
impl_scalar_type x = X(LRID, m);
impl_scalar_type newy = x * d;
Y2(LRID, m) = newy;
}
} // end else
} // end for numBlocks_
if(IsParallel_)
{
auto numMyRows = inputMatrix_->getNodeNumRows();
for (size_t m = 0; m < numVecs; ++m)
{
for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i)
{
Y(i, m) = Y2(i, m);
}
}
}
}
template<class MatrixType>
void Container<MatrixType>::DoSGS(HostView& X, HostView& Y, HostView& Y2, int stride) const
{
using Teuchos::Array;
using Teuchos::ArrayRCP;
using Teuchos::ArrayView;
using Teuchos::Ptr;
using Teuchos::ptr;
using Teuchos::RCP;
using Teuchos::rcp;
using Teuchos::rcpFromRef;
const scalar_type one = STS::one();
auto numVecs = X.dimension_1();
const size_t Length = inputMatrix_->getNodeMaxNumRowEntries();
Array<scalar_type> Values;
Array<local_ordinal_type> Indices(Length);
Values.resize(bcrsBlockSize_ * bcrsBlockSize_ * Length);
// I think I decided I need two extra vectors:
// One to store the sum of the corrections (initialized to zero)
// One to store the temporary residual (doesn't matter if it is zeroed or not)
// My apologies for making the names clear and meaningful. (X=RHS, Y=guess?! Nice.)
HostView Resid("", X.dimension_0(), X.dimension_1());
// Forward Sweep
for(local_ordinal_type i = 0; i < numBlocks_; i++)
{
if(blockRows_[i] != 1 || hasBlockCrs_)
{
if(blockRows_[i] == 0)
continue; // Skip empty partitions
// update from previous block
ArrayView<const local_ordinal_type> localRows = getLocalRows(i);
for(local_ordinal_type j = 0; j < blockRows_[i]; j++)
{
const local_ordinal_type LID = localRows[j]; // Containers_[i]->ID (j);
size_t NumEntries;
inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
//set tmpresid = initresid - A*correction
for(size_t m = 0; m < numVecs; m++)
{
if(hasBlockCrs_)
{
for(int localR = 0; localR < bcrsBlockSize_; localR++)
Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
for(size_t k = 0; k < NumEntries; ++k)
{
const local_ordinal_type col = Indices[k];
for (int localR = 0; localR < bcrsBlockSize_; localR++)
{
for(int localC = 0; localC < bcrsBlockSize_; localC++)
Resid(LID * bcrsBlockSize_ + localR, m) -=
Values[k * (bcrsBlockSize_ * bcrsBlockSize_) + (localR + localC * bcrsBlockSize_)]
* Y2(col * bcrsBlockSize_ + localC, m);
}
}
}
else
{
Resid(LID, m) = X(LID, m);
for(size_t k = 0; k < NumEntries; k++)
{
local_ordinal_type col = Indices[k];
Resid(LID, m) -= Values[k] * Y2(col, m);
}
}
}
}
// solve with this block
//
// Note: I'm abusing the ordering information, knowing that X/Y
// and Y2 have the same ordering for on-proc unknowns.
//
// Note: Add flop counts for inverse apply
apply(Resid, Y2, i, stride, Teuchos::NO_TRANS, DampingFactor_, one);
// operations for all getrow's
}
else // singleton, can't access Containers_[i] as it was never filled and may be null.
{
local_ordinal_type LRID = partitions_[partitionIndices_[i]];
getMatDiag();
HostView diagView = Diag_->template getLocalView<Kokkos::HostSpace>();
impl_scalar_type d = (impl_scalar_type) one / diagView(LRID, 0);
for(size_t m = 0; m < numVecs; m++)
{
impl_scalar_type x = X(LRID, m);
impl_scalar_type newy = x * d;
Y2(LRID, m) = newy;
}
} // end else
} // end forward sweep over NumLocalBlocks
// Reverse Sweep
//
// mfh 12 July 2013: The unusual iteration bounds, and the use of
// i-1 rather than i in the loop body, ensure correctness even if
// local_ordinal_type is unsigned. "i = numBlocks_-1; i >= 0;
// i--" will loop forever if local_ordinal_type is unsigned, because
// unsigned integers are (trivially) always nonnegative.
for(local_ordinal_type i = numBlocks_; i > 0; --i)
{
if(hasBlockCrs_ || blockRows_[i-1] != 1)
{
if(blockRows_[i - 1] == 0)
continue;
// update from previous block
ArrayView<const local_ordinal_type> localRows = getLocalRows(i-1);
for(local_ordinal_type j = 0; j < blockRows_[i-1]; j++)
{
const local_ordinal_type LID = localRows[j]; // Containers_[i-1]->ID (j);
size_t NumEntries;
inputMatrix_->getLocalRowCopy(LID, Indices(), Values(), NumEntries);
//set tmpresid = initresid - A*correction
for (size_t m = 0; m < numVecs; m++)
{
if(hasBlockCrs_)
{
for(int localR = 0; localR < bcrsBlockSize_; localR++)
Resid(LID * bcrsBlockSize_ + localR, m) = X(LID * bcrsBlockSize_ + localR, m);
for(size_t k = 0; k < NumEntries; ++k)
{
const local_ordinal_type col = Indices[k];
for(int localR = 0; localR < bcrsBlockSize_; localR++)
{
for(int localC = 0; localC < bcrsBlockSize_; localC++)
Resid(LID*bcrsBlockSize_+localR, m) -=
Values[k * (bcrsBlockSize_ * bcrsBlockSize_) + (localR + localC * bcrsBlockSize_)]
* Y2(col * bcrsBlockSize_ + localC, m);
}
}
}
else
{
Resid(LID, m) = X(LID, m);
for(size_t k = 0; k < NumEntries; ++k)
{
local_ordinal_type col = Indices[k];
Resid(LID, m) -= Values[k] * Y2(col, m);
}
}
}
}
// solve with this block
//
// Note: I'm abusing the ordering information, knowing that X/Y
// and Y2 have the same ordering for on-proc unknowns.
//
// Note: Add flop counts for inverse apply
apply(Resid, Y2, i - 1, stride, Teuchos::NO_TRANS, DampingFactor_, one);
// operations for all getrow's
} // end Partitioner_->numRowsInPart (i) != 1 )
// else do nothing, as by definition with a singleton, the residuals are zero.
} //end reverse sweep
if(IsParallel_)
{
auto numMyRows = inputMatrix_->getNodeNumRows();
for (size_t m = 0; m < numVecs; ++m)
{
for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i)
{
Y(i, m) = Y2(i, m);
}
}
}
}
template<class MatrixType>
void Container<MatrixType>::clearBlocks()
{
numBlocks_ = 0;
partitions_.clear();
blockRows_.clear();
partitionIndices_.clear();
Diag_ = Teuchos::null; //Diag_ will be recreated if needed
}
} // namespace Ifpack2
namespace Teuchos {
/// \brief Partial specialization of TypeNameTraits for Ifpack2::Container.
///
/// \tparam MatrixType The template parameter of Ifpack2::Container.
/// Must be a Tpetra::RowMatrix specialization.
template<class MatrixType>
class TEUCHOSCORE_LIB_DLL_EXPORT TypeNameTraits< ::Ifpack2::Container<MatrixType> >
{
public:
static std::string name () {
return std::string ("Ifpack2::Container<") +
TypeNameTraits<MatrixType>::name () + " >";
}
static std::string concreteName (const ::Ifpack2::Container<MatrixType>&) {
return name ();
}
};
} // namespace Teuchos
#endif // IFPACK2_CONTAINER_HPP
|