This file is indexed.

/usr/include/deal.II/base/work_stream.h is in libdeal.ii-dev 6.3.1-1.1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
//---------------------------------------------------------------------------
//    $Id: work_stream.h 20677 2010-02-23 23:03:29Z kanschat $
//    Version: $Name$
//
//    Copyright (C) 2008, 2009, 2010 by the deal.II authors
//
//    This file is subject to QPL and may not be  distributed
//    without copyright and license information. Please refer
//    to the file deal.II/doc/license.html for the  text  and
//    further information on this license.
//
//---------------------------------------------------------------------------
#ifndef __deal2__work_stream_h
#define __deal2__work_stream_h


#include <base/config.h>
#include <base/multithread_info.h>
#include <base/template_constraints.h>
#include <base/std_cxx1x/function.h>
#include <base/std_cxx1x/bind.h>

#if DEAL_II_USE_MT == 1
#  include <base/thread_management.h>
#  include <tbb/pipeline.h>
#endif

#include <vector>
#include <utility>


DEAL_II_NAMESPACE_OPEN



/**
 * A class whose main template function supports running multiple
 * threads each of which operates on a subset of the given range of
 * objects. The class uses the Intel Threading Building Blocks (TBB)
 * to load balance the individual subranges onto the available
 * threads. For a lengthy discussion of the rationale of this class,
 * see the @ref threads "Parallel computing with multiple processors"
 * module.
 *
 * The class is built on the following premise: One frequently has some work
 * that needs to be done on a sequence of objects; a prototypical example is
 * assembling cell contributions to a system matrix or right hand side. In
 * many such examples, part of the work can be done entirely independently and
 * in parallel, possibly using several processor cores on a machine with
 * shared memory. However, some other part of this work may need to be
 * synchronised and be done in order. In the example of assembling a matrix,
 * the computation of local contributions can be done entirely in parallel,
 * but copying the the local contributions into the global matrix requires
 * some care: First, several threads can't write at the same time, but need to
 * synchronise writing using a mutex; secondly, we want the order in which
 * local contributions are added to the global matrix to be always the same
 * because floating point addition is not commutative and adding local
 * contributions to the global matrix in different orders leads to subtly
 * different results that can affect the number of iterations for iterative
 * solvers as well as the round-off error in the solution in random
 * ways. Consequently, we want to ensure that only one thread at a time writes
 * into the global matrix, and that results are copied in a stable and
 * reproducible order.
 *
 * This class implements a framework for this work model. It works with a
 * stream of objects given by an iterator range, runs a worker function in
 * parallel on all of these objects and then passes each object to a
 * postprocessor function that runs sequentially and gets objects in exactly
 * the order in which they appear in the input iterator range. None of the
 * synchronisation work is exposed to the user of this class.
 *
 * Internally, the range given to the run() function of this class is split
 * into a sequence of "items", which are then distributed according to some
 * %internal algorithm onto the number of available threads. An item is an
 * element of the range of iterators on which we are to operate; for example,
 * for the purpose of assembling matrices or evaluating error indicators, an
 * item could be a cell. The TBB library determines how many threads are
 * created (typically as many as there are processor cores), but the number of
 * items that may be active at any given time is specified by the argument to
 * the constructor. It should be bigger or equal to the number of processor
 * cores - the default is four times the number of cores on the current system.
 *
 * Items are created upon request by the TBB whenever one of the worker
 * threads is idle or is expected to become idle. It is then handed off to a
 * worker function, typically a member function of a main class. These worker
 * functions are run in parallel on a number of threads, and there is no
 * guarantee that they are asked to work on items in any particular order, in
 * particular not necessarily in the order in which items are generated from
 * the iterator range.
 *
 * Typically, worker functions need additional data, for example FEValues
 * objects, input data vectors, etc, some of which can not be shared among
 * threads. To this end, the run() function takes another template argument,
 * ScratchData, which designates a type objects of which are stored with
 * each item and which threads can use as private data without having to
 * share them with other threads. The run() function takes an additional
 * argument with an object of type ScratchData that is going to be copied
 * for the arguments passed to each of the worker functions.
 *
 * In addition, worker functions store their results in objects of template type
 * CopyData. These are then handed off to a separate function, called copier,
 * that may use the stored results to transfer them into permanent
 * storage. For example, it may copy the results of local contributions to a
 * matrix computed by a worker function into the global matrix. In contrast to
 * the worker function, however, only one instance of the copier is run at any
 * given time; it can therefore safely copy local contributions into the
 * global matrix without the need to lock the global object using a mutex or
 * similar means. Furthermore, it is guaranteed that the copier is run with
 * CopyData objects in the same order in which their associated items
 * were created; consequently, even if worker threads may compute results in
 * unspecified order, the copier always receives the results in exactly the
 * same order as the items were created.
 *
 * Once an item is processed by the copier, it is deleted and the
 * ScratchData and CopyData objects that were used in its computation
 * are considered unused and may be re-used for the next invokation of
 * the worker function, on this or another thread.
 *
 * This class only really works in parallel when multithread mode was selected
 * during deal.II configuration. Otherwise it simply works on each item
 * sequentially.
 *
 * @ingroup threads
 * @author Wolfgang Bangerth, 2007, 2008, 2009
 */
namespace WorkStream
{

#if DEAL_II_USE_MT == 1


  namespace internal
  {
				     /**
				      * A class that creates a sequence of
				      * items from a range of iterators.
				      */
    template <typename Iterator,
	      typename ScratchData,
	      typename CopyData>
    class IteratorRangeToItemStream : public tbb::filter
    {
      public:
					 /**
					  * A data type that we use to identify
					  * items to be worked on.
					  *
					  * The first element indicates an array
					  * of iterators to work on; the second
					  * the scratch space; the third an
					  * array of copy data spaces; and the
					  * last the number of elements to work
					  * on. This last argument is an integer
					  * between one and chunk_size. The
					  * arrays have a length of chunk_size.
					  */
	typedef
	std_cxx1x::tuple<std::vector<Iterator>,
			 ScratchData*,
			 std::vector<CopyData>,
			 unsigned int>
	ItemType;
	

					 /**
					  * Constructor. Take an iterator
					  * range, the size of a buffer that
					  * can hold items, and the sample
					  * additional data object that will
					  * be passed to each worker and
					  * copier function invokation.
					  */
	IteratorRangeToItemStream (const Iterator       &begin,
				   const Iterator       &end,
				   const unsigned int    buffer_size,
				   const unsigned int    chunk_size,
				   const ScratchData    &sample_scratch_data,
				   const CopyData       &sample_copy_data)
			:
			tbb::filter (/*is_serial=*/true),
			remaining_iterator_range (begin, end),
			ring_buffer (buffer_size),
			n_emitted_items (0),
			chunk_size (chunk_size)
	  {
					     // initialize copies of
					     // additional_data. since
					     // this is frequently
					     // expensive (creating
					     // FEValues objects etc) do
					     // that in parallel
	    Threads::TaskGroup<> tasks;
	    for (unsigned int i=0; i<ring_buffer.size(); ++i)
	      tasks += Threads::new_task (&IteratorRangeToItemStream::init_buffer_elements,
					  *this,
					  i,
					  std_cxx1x::cref(sample_scratch_data),
					  std_cxx1x::cref(sample_copy_data));
	    tasks.join_all ();
	  }

					 /**
					  * Destructor.
					  */
	~IteratorRangeToItemStream ()
	  {
	    for (unsigned int i=0; i<ring_buffer.size(); ++i)
	      delete std_cxx1x::get<1>(ring_buffer[i]);
	  }
      
					 /**
					  * Create a item and return a
					  * pointer to it.
					  */
	virtual void * operator () (void *)
	  {
					     // store the current
					     // position of the pointer
	    ItemType *current_item
	      = &ring_buffer[n_emitted_items % ring_buffer.size()];

					     // initialize the next item. it may
					     // consist of at most chunk_size
					     // elements
	    std_cxx1x::get<3>(*current_item) = 0;
	    while ((remaining_iterator_range.first !=
		    remaining_iterator_range.second)
		   &&
		   (std_cxx1x::get<3>(*current_item) < chunk_size))
	      {
		std_cxx1x::get<0>(*current_item)[std_cxx1x::get<3>(*current_item)]
		  = remaining_iterator_range.first;

		++remaining_iterator_range.first;
		++std_cxx1x::get<3>(*current_item);
	      }

	    if (std_cxx1x::get<3>(*current_item) == 0)
					       // there were no items
					       // left. terminate the pipeline
	      return 0;
	    else
	      {
		++n_emitted_items;
		return current_item;
	      }
	  }
	
      private:
					 /**
					  * The interval of iterators still to
					  * be worked on. This range will shrink
					  * over time.
					  */
	std::pair<Iterator,Iterator> remaining_iterator_range;
      
					 /**
					  * A ring buffer that will store items.
					  */
	std::vector<ItemType>        ring_buffer;

					 /**
					  * Counter for the number of emitted
					  * items. Each item may consist of up
					  * to chunk_size iterator elements.
					  */
	unsigned int                 n_emitted_items;

					 /**
					  * Number of elements of the
					  * iterator range that each
					  * thread should work on
					  * sequentially; a large number
					  * makes sure that each thread
					  * gets a significant amount of
					  * work before the next task
					  * switch happens, whereas a
					  * small number is better for
					  * load balancing.
					  */
	const unsigned int           chunk_size;

					 /**
					  * Initialize the pointers and vector
					  * elements in the specified entry of
					  * the ring buffer.
					  */
	void init_buffer_elements (const unsigned int element,
				   const ScratchData &sample_scratch_data,
				   const CopyData    &sample_copy_data)
	  {
	    Assert (std_cxx1x::get<1>(ring_buffer[element]) == 0,
		    ExcInternalError());
	      
	    std_cxx1x::get<0>(ring_buffer[element])
	      .resize (chunk_size, remaining_iterator_range.second);
	    std_cxx1x::get<1>(ring_buffer[element])
	      = new ScratchData(sample_scratch_data);
	    std_cxx1x::get<2>(ring_buffer[element])
	      .resize (chunk_size, sample_copy_data);
	  }
    };



				     /**
				      * A class that manages calling the
				      * worker function on a number of
				      * parallel threads. Note that it is, in
				      * the TBB notation, a filter that can
				      * run in parallel.
				      */
    template <typename Iterator,
	      typename ScratchData,
	      typename CopyData>
    class Worker : public tbb::filter
    {
      public:
					 /**
					  * Constructor. Takes a
					  * reference to the object on
					  * which we will operate as
					  * well as a pointer to the
					  * function that will do the
					  * assembly.
					  */
	Worker (const std_cxx1x::function<void (const Iterator &,
						ScratchData &,
						CopyData &)> &worker)
	:
	tbb::filter (/* is_serial= */ false),
	worker (worker)
	{}


					 /**
					  * Work on an item.
					  */
	void * operator () (void *item)
	  {
					     // first unpack the current item
	    typedef
	      typename IteratorRangeToItemStream<Iterator,ScratchData,CopyData>::ItemType
	      ItemType;
	    
	    ItemType *current_item = reinterpret_cast<ItemType*> (item);
	    
					     // then call the worker function on
					     // each element of the chunk we
					     // were given
	    for (unsigned int i=0; i<std_cxx1x::get<3>(*current_item); ++i)
	      worker (std_cxx1x::get<0>(*current_item)[i],
		      *std_cxx1x::get<1>(*current_item),
		      std_cxx1x::get<2>(*current_item)[i]);
	    
					     // then return the original pointer
					     // to the now modified object
	    return item;
	  }
	

      private:
					 /**
					  * Pointer to the function
					  * that does the assembling
					  * on the sequence of cells.
					  */
	const std_cxx1x::function<void (const Iterator &,
					ScratchData &,
					CopyData &)> worker;
    };



				     /**
				      * A class that manages calling the
				      * copier function. Note that it is, in
				      * the TBB notation, a filter that runs
				      * sequentially, ensuring that all items
				      * are copied in the same order in which
				      * they are created.
				      */
    template <typename Iterator,
	      typename ScratchData,
	      typename CopyData>
    class Copier : public tbb::filter
    {
      public:
					 /**
					  * Constructor. Takes a
					  * reference to the object on
					  * which we will operate as
					  * well as a pointer to the
					  * function that will do the
					  * copying from the
					  * additional data object to
					  * the global matrix or
					  * similar.
					  */
	Copier (const std_cxx1x::function<void (const CopyData &)> &copier)
			:
			tbb::filter (/* is_serial= */ true),
			copier (copier)
	  {}


					 /**
					  * Work on a single item.
					  */
	void * operator () (void *item)
	  {
					     // first unpack the current item
	    typedef
	      typename IteratorRangeToItemStream<Iterator,ScratchData,CopyData>::ItemType
	      ItemType;
	    
	    ItemType *current_item = reinterpret_cast<ItemType*> (item);

					     // initiate copying data
	    for (unsigned int i=0; i<std_cxx1x::get<3>(*current_item); ++i)
	      copier (std_cxx1x::get<2>(*current_item)[i]);

					     // return an invalid
					     // item since we are at
					     // the end of the
					     // pipeline
	    return 0;
	  }
	

      private:
					 /**
					  * Pointer to the function
					  * that does the copying of
					  * data.
					  */
	const std_cxx1x::function<void (const CopyData &)> copier;
    };
    
  }
  

#endif // DEAL_II_USE_MT



				   /**
				    * This is the main function of the
				    * WorkStream concept, doing work as
				    * described in the introduction to this
				    * namespace.
				    *
				    * This is the function that can be used
				    * for worker and copier objects that are
				    * either pointers to non-member
				    * functions or objects that allow to be
				    * called with an operator(), for example
				    * objects created by std::bind.
				    *
				    * The argument passed as @p end must be
				    * convertible to the same type as
				    * @p begin, but doesn't have to be of the
				    * same type itself. This allows to write
				    * code like
				    * <code>WorkStream().run(dof_handler.begin_active(),
				    * dof_handler.end(), ...</code> where
				    * the first is of type
				    * DoFHandler::active_cell_iterator
				    * whereas the second is of type
				    * DoFHandler::raw_cell_iterator.
				    *
				    * The two data types
				    * <tt>ScratchData</tt> and
				    * <tt>CopyData</tt> need to have a
				    * working copy
				    * constructor. <tt>ScratchData</tt>
				    * is only used in the
				    * <tt>worker</tt> function, while
				    * <tt>CopyData</tt> is the object
				    * passed from the <tt>worker</tt>
				    * to the <tt>copier</tt>.
				    *
				    * The @p queue_length argument indicates
				    * the number of items that can be live
				    * at any given time. Each item consists
				    * of @p chunk_size elements of the input
				    * stream that will be worked on by the
				    * worker and copier functions one after
				    * the other on the same thread.
				    */
  template <typename Worker,
	    typename Copier,
	    typename Iterator,
	    typename ScratchData,
	    typename CopyData>
  void
  run (const Iterator                          &begin,
       const typename identity<Iterator>::type &end,
       Worker                                   worker,
       Copier                                   copier,
       const ScratchData                       &sample_scratch_data,
       const CopyData                          &sample_copy_data,
       const unsigned int queue_length = 2*multithread_info.n_default_threads,
       const unsigned int chunk_size = 8)
  {
    Assert (queue_length > 0,
	    ExcMessage ("The queue length must be at least one, and preferably "
			"larger than the number of processors on this system."));
    Assert (chunk_size > 0,
	    ExcMessage ("The chunk_size must be at least one."));  

				     // if no work then skip. (only use
				     // operator!= for iterators since we may
				     // not have an equality comparison
				     // operator)
    if (!(begin != end))
      return;
    
#if DEAL_II_USE_MT == 1  
				     // create the three stages of the
				     // pipeline
    internal::IteratorRangeToItemStream<Iterator,ScratchData,CopyData>
      iterator_range_to_item_stream (begin, end,
				     queue_length * chunk_size,
				     chunk_size,
				     sample_scratch_data,
				     sample_copy_data);

    internal::Worker<Iterator, ScratchData, CopyData> worker_filter (worker);
    internal::Copier<Iterator, ScratchData, CopyData> copier_filter (copier);

				     // now create a pipeline from
				     // these stages
    tbb::pipeline assembly_line;
    assembly_line.add_filter (iterator_range_to_item_stream);
    assembly_line.add_filter (worker_filter);
    assembly_line.add_filter (copier_filter);

				     // and run it
    assembly_line.run (queue_length);    

    assembly_line.clear ();

#else

				     // need to copy the sample since it is
				     // marked const
    ScratchData scratch_data = sample_scratch_data;
    CopyData    copy_data    = sample_copy_data;
    
    for (Iterator i=begin; i!=end; ++i)
      {
	worker (i, scratch_data, copy_data);
	copier (copy_data);
      }  
#endif
  }



				   /**
				    * This is the main function of the
				    * WorkStream concept, doing work as
				    * described in the introduction to this
				    * namespace.
				    *
				    * This is the function that can be
				    * used for worker and copier functions
				    * that are member functions of a class.
				    *
				    * The argument passed as @p end must be
				    * convertible to the same type as
				    * @p begin, but doesn't have to be of the
				    * same type itself. This allows to write
				    * code like
				    * <code>WorkStream().run(dof_handler.begin_active(),
				    * dof_handler.end(), ...</code> where
				    * the first is of type
				    * DoFHandler::active_cell_iterator
				    * whereas the second is of type
				    * DoFHandler::raw_cell_iterator.
				    *
				    * The @p queue_length argument indicates
				    * the number of items that can be live
				    * at any given time. Each item consists
				    * of @p chunk_size elements of the input
				    * stream that will be worked on by the
				    * worker and copier functions one after
				    * the other on the same thread.
				    */
  template <typename MainClass,
	    typename Iterator,
	    typename ScratchData,
	    typename CopyData>
  void
  run (const Iterator                          &begin,
       const typename identity<Iterator>::type &end,
       MainClass                               &main_object,
       void (MainClass::*worker) (const Iterator &,
				  ScratchData &,
				  CopyData &),
       void (MainClass::*copier) (const CopyData &),
       const ScratchData                    &sample_scratch_data,
       const CopyData                       &sample_copy_data,
       const unsigned int queue_length = 2*multithread_info.n_default_threads,
       const unsigned int chunk_size = 8)
  {
				     // forward to the other function
    run (begin, end,
	 std_cxx1x::bind (worker,
			  std_cxx1x::ref (main_object),
			  _1, _2, _3),
	 std_cxx1x::bind (copier,
			  std_cxx1x::ref (main_object),
			  _1),
	 sample_scratch_data,
	 sample_copy_data,
	 queue_length,
	 chunk_size);
  }

}




DEAL_II_NAMESPACE_CLOSE




//----------------------------   work_stream.h     ---------------------------
// end of #ifndef __deal2__work_stream_h
#endif
//----------------------------   work_stream.h     ---------------------------