This file is indexed.

/usr/include/anfo/misc_streams.h is in libanfo0-dev 0.98-6.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
#ifndef INCLUDED_MISC_STREAMS_H
#define INCLUDED_MISC_STREAMS_H

#include "stream.h"

#include <deque>
#include <map>

namespace streams {

using namespace std ;

inline int eff_length( const Read& r )
{ return ( r.has_trim_right() ? r.trim_right() : r.sequence().size() ) - r.trim_left() ; }

//! \brief compares hits by smallest genome coordinate
//! Comparison is first done lexically on the subject name, then on the
//! smallest coordinate that's part of the alignment, then on the
//! alignment length.  A particular genome can be selected, if this
//! isn't done, we sort on the best hit and compare the genome name even
//! before the subject name.
class by_genome_coordinate {
	private:
		const vector<string> gs_ ;

		bool compare( const Result &a, const Result &b )
		{
			const Hit *u = hit_to( a ), *v = hit_to( b ) ;
			if( u && !v ) return true ;
			if( !u ) return false ;
			if( u->genome_name() < v->genome_name() ) return true ;
			if( v->genome_name() < u->genome_name() ) return false ;
			return compare( *u, *v, a.read(), b.read(), false ) ;
		}

		bool compare( const Result &a, const Result &b, const string& g )
		{
			const Hit *u = hit_to( a, g ), *v = hit_to( b, g ) ;
			if( u && !v ) return true ;
			if( !u ) return false ;
			return compare( *u, *v, a.read(), b.read(), false ) ;
		}

		bool compare( const Hit& u, const Hit& v, const Read& a, const Read& b, bool def )
		{
			if( u.sequence() < v.sequence() ) return true ;
			if( v.sequence() < u.sequence() ) return false ;
			if( u.start_pos() < v.start_pos() ) return true ;
			if( v.start_pos() < u.start_pos() ) return false ;
			if( u.aln_length() < v.aln_length() ) return true ;
			if( v.aln_length() < u.aln_length() ) return false ;
			return def ;
		}

	public:
		by_genome_coordinate( const string &g ) : gs_( &g, &g+1 ) {}
		by_genome_coordinate( const vector<string> &gs ) : gs_(gs) {}
		by_genome_coordinate() : gs_() {}

		bool operator() ( const Result *a, const Result *b ) {
			if( gs_.empty() ) return compare( *a, *b ) ;
			for( vector<string>::const_iterator i = gs_.begin(), e = gs_.end() ; i != e ; ++i )
			{
				if( compare( *a, *b, *i ) ) return true ;
				if( compare( *b, *a, *i ) ) return false ;
			}
			return false ;
		}

		void tag_header( output::Header& h ) {
			h.clear_is_sorted_by_name() ;
			h.clear_is_sorted_by_coordinate() ;
			if( gs_.empty() ) {
				h.set_is_sorted_by_all_genomes( true ) ;
			}
			else
			{
				h.clear_is_sorted_by_all_genomes() ;
				for( vector<string>::const_iterator i = gs_.begin(), e = gs_.end() ; i != e ; ++i )
					h.add_is_sorted_by_coordinate( *i ) ;
			}
		}

		bool is_sorted( const output::Header& h ) {
			if( gs_.empty() ) return h.is_sorted_by_all_genomes() ;

			if( (int)gs_.size() != h.is_sorted_by_coordinate_size() ) return false ;
			return equal( gs_.begin(), gs_.end(), h.is_sorted_by_coordinate().begin() ) ;
		}

		bool operator()( const Hit& u, const Hit& v, const Read& a, const Read& b )
		{
			if( u.genome_name() < v.genome_name() ) return true ;
			if( v.genome_name() < u.genome_name() ) return false ;
			return compare( u, v, a, b, eff_length( a ) < eff_length( b ) ) ;
		}
} ;

struct by_seqid {
	bool operator() ( const Result *a, const Result *b ) {
		assert( a && b ) ;
		return a->read().seqid() < b->read().seqid() ;
	}
	void tag_header( output::Header& h ) {
		h.clear_is_sorted_by_coordinate() ;
		h.set_is_sorted_by_name( true ) ;
	}
    bool is_sorted( const output::Header& h ) {
        return h.is_sorted_by_name() ;
    }
} ;


//! \brief merges sorted streams into a sorted stream
//! What to compare on is read from the input streams' header.  If they
//! are unsorted, we fail.  Else we check that they are sorted in the
//! same way and merge accordingly.
//!
//! \note All input streams must necessarily be opened at the same time.
//!       In principle, we could merge smaller chunks into temporary
//!       files to avoid hitting the file descriptor limit, but there
//!       hasn't been a practical need to do that yet.
class MergeStream : public StreamBundle
{
	private:
		deque< Result > rs_ ;
		enum { unknown, by_name, by_coordinate } mode_ ;
		vector<string> gs_ ;

	public:
		MergeStream() : mode_( unknown ) {}
		virtual Header fetch_header() ;
		virtual Result fetch_result() ;

		//! \todo totally broken, need to think about how to keep the
		//!       necessary information.
		virtual Object get_summary() const { return False ; }
		virtual string type_name() const { return "MergeStream" ; }
} ;

//! \brief merges multiple streams by taking the best hit
//! If asked for a result, this class takes results from each of the
//! streams in turn, until one sequence has received an entry from each.
//! This sequence is then delivered.  Everything works fine, as long as
//! the sequence names come in in roughly the same order and every name
//! is contained in every input.  Else it still works, but eats memory.
//! This is best used to merge chunks of work done by independent
//! processes where the order of records hasn't been disturbed too much
//! (multithreading and the implied slight shuffle is fine).
class NearSortedJoin : public StreamBundle
{
	private:
		typedef map< string, pair< size_t, Result > > Buffer ;
		Buffer buffer_ ;
		size_t nread_, nwritten_, nstreams_ ;
		deque< StreamHolder >::iterator cur_input_ ;
		Chan progress_ ;

	public:
		NearSortedJoin() : nread_(0), nwritten_(0), nstreams_(0) {}

		virtual Header fetch_header() ;
		virtual Result fetch_result() ;
} ;


//! \brief presents a container as an input stream
//! The container must be of pointers to \c Result, the stream acts as
//! input stream.  A suitable header and footer are supplied at
//! construction time.
template< typename I > class ContainerStream : public Stream
{
	private:
		I cur_, end_ ;
		unsigned total_, done_ ;
		Chan chan_ ;

	public:
		ContainerStream( const Header &hdr, I begin, I end, const Footer &foot ) 
			: cur_( begin ), end_( end ), total_( std::distance( begin, end ) ), done_(0)
		{
			hdr_ = hdr ;
			foot_ = foot ;
			state_ = cur_ == end_ ? end_of_stream : have_output ;
		}

		virtual Result fetch_result() {
			if( ++done_ % 1024 == 0 )
			{
				stringstream s ;
				s << "(mem) " << done_ << "/" << total_
					<< " (" << (int)(100*done_/total_) << "%)" ;
				chan_( Console::info, s.str() ) ;
			}

			Result r = **cur_ ;
			++cur_ ;
			if( cur_ == end_ ) state_ = end_of_stream ;
			return r ;
		}
		virtual string type_name() const { return "ContainerStream" ; }
} ;

extern unsigned SortingStream__ninstances ;

//! \brief stream filter that sorts its input
//! This stream is intended to sort large amounts of data.  To do that,
//! it performs a quick sort on blocks that fit into memory (the maximum
//! size is configured at construction time), writes them out to
//! temporary files, then reads them back in and does a merge sort.  If
//! too many files are open (as counted by \c AnfoReader, again
//! configurable at construction time), some temporary files are merge
//! sorted into a bigger one.

template <class Comp> class SortingStream : public Stream
{
	private:
		typedef deque< Holder< streams::Stream > > MergeableQueue ;
		typedef map< unsigned, MergeableQueue > MergeableQueues ;
		typedef deque< Result* > ScratchSpace ;

		//! Storage to perform quicksort in.
		ScratchSpace scratch_space_ ;

		//! We keep multiple queues of streams ordered and separated by
		//! the number of times they have been merged.  This way we
		//! don't merge the stuff over and over, and instead tend to do
		//! a merge that gives the greatest reduction in number of open
		//! files with least amount of IO.  The key is the number of
		//! times a file has already been merged with others, the value
		//! is just a set of streams.
		MergeableQueues mergeable_queues_ ;
		Holder< Stream > final_stream_ ;

		uint64_t total_scratch_size_ ;

		unsigned max_que_size_ ;
        unsigned num_open_files_ ;
		unsigned max_arr_size_ ;
		Comp comp_ ;

		//! \brief quicksort the scratch area
		//! \internal
		void sort_scratch() {
			if( scratch_space_.size() > 1 )
			{
				std::stringstream s ;
				s << "SortingStream: qsorting " << scratch_space_.size() << " results" ; 
				console.output( Console::notice, s.str() ) ;

				sort( scratch_space_.begin(), scratch_space_.end(), comp_ ) ;

				ScratchSpace::iterator o = scratch_space_.begin() ;
				for( ScratchSpace::const_iterator i = scratch_space_.begin()+1,
						e = scratch_space_.end() ;i != e ; ++i )
				{
					if( (*i)->read().seqid() == (*o)->read().seqid() )
						merge_sensibly( **o, **i ) ;
					else *(++o) = *i ;
				}
				scratch_space_.erase( ++o, scratch_space_.end() ) ;
			}
		}

		void enqueue_stream( streams::StreamHolder, int = 0 ) ;
		void flush_scratch() ;

		virtual ~SortingStream()
		{
			for_each( scratch_space_.begin(), scratch_space_.end(), delete_ptr<Result>() ) ;
			--SortingStream__ninstances ;
		}

	public:
		SortingStream( unsigned as = 256, unsigned qs = 256, Comp comp = Comp() ) :
            final_stream_(), total_scratch_size_(0), max_que_size_(qs),
            num_open_files_(0), max_arr_size_( as), comp_( comp )
		{ foot_.set_exit_code( 0 ) ; ++SortingStream__ninstances ; }

		virtual void put_header( const Header& h ) { Stream::put_header( h ) ; comp_.tag_header( hdr_ ) ; }
		virtual void put_footer( const Footer& ) ;
		virtual void put_result( const Result& r ) {
			scratch_space_.push_back( new Result( r ) ) ;
			total_scratch_size_ += scratch_space_.back()->SpaceUsed() ;
			if( (total_scratch_size_ >> 20) >= max_arr_size_ ) flush_scratch() ;
		}

		virtual Result fetch_result()
		{
			Result r = final_stream_->fetch_result() ;
			state_ = final_stream_->get_state() ;
			return r ;
		}
		virtual Footer fetch_footer() { merge_sensibly( foot_, final_stream_->fetch_footer() ) ; return foot_ ; }
		virtual Object get_summary() const { return final_stream_->get_summary() ; }
} ;

template < typename Comp > void SortingStream<Comp>::flush_scratch()
{
	sort_scratch() ;
	string tempname ;
	int fd = mktempfile( &tempname ) ;
	{
		ContainerStream< deque< Result* >::const_iterator >
			sa( hdr_, scratch_space_.begin(), scratch_space_.end(), foot_ ) ;
		ChunkedWriter out( fd, 25, tempname.c_str() ) ;
		console.output( Console::notice, "SortingStream: Writing to tempfile " + tempname ) ;
		transfer( sa, out ) ;
	}
	throw_errno_if_minus1( lseek( fd, 0, SEEK_SET ), "seeking in ", tempname.c_str() ) ;
	google::protobuf::io::FileInputStream* is = new google::protobuf::io::FileInputStream( fd ) ;
	is->SetCloseOnDelete( true ) ;
	enqueue_stream( new UniversalReader( tempname, is ), 1 ) ;

	for_each( scratch_space_.begin(), scratch_space_.end(), delete_ptr<Result>() ) ;
	scratch_space_.clear() ;
	total_scratch_size_ = 0 ;
}

template < typename Comp > void SortingStream<Comp>::enqueue_stream( streams::StreamHolder s, int level ) 
{
	mergeable_queues_[ level ].push_back( s ) ;

	if( ++num_open_files_ > max_que_size_ ) {
		// get the biggest bin, we'll merge everything below that
		unsigned max_bin = 0 ;
		for( MergeableQueues::const_iterator i = mergeable_queues_.begin() ;
				i != mergeable_queues_.end() ; ++i ) 
			if( i->second.size() > mergeable_queues_[max_bin].size() )
				max_bin = i->first ;

		unsigned total_inputs = 0 ;
		for( MergeableQueues::iterator i = mergeable_queues_.begin() ; i->first <= max_bin ; ++i ) 
			total_inputs += i->second.size() ;

		// we must actually make progress, and more than just a
		// single stream must be merged to avoid quadratic behaviour
		// (only important in a weird corner case, in which we simply
        // use one more file descriptor)
		if( total_inputs > 2 ) {
			string fname ;
			int fd = mktempfile( &fname ) ;
			std::stringstream s ;
			s << "SortingStream: Merging bins 0.." << max_bin << " to tempfile " << fname ;
			console.output( Console::notice, s.str() ) ;
			{
				streams::MergeStream ms ;
				for( MergeableQueues::iterator i = mergeable_queues_.begin() ; i->first <= max_bin ; ++i ) 
				{
                    num_open_files_ -= i->second.size() ;
					for( size_t j = 0 ; j != i->second.size() ; ++j )
						ms.add_stream( i->second[j] ) ;
					i->second.clear() ;
				}
				ChunkedWriter out( fd, 25, fname.c_str() ) ;
				transfer( ms, out ) ;
			}
			throw_errno_if_minus1( lseek( fd, 0, SEEK_SET ), "seeking in ", fname.c_str() ) ;
			google::protobuf::io::FileInputStream* is = new google::protobuf::io::FileInputStream( fd ) ;
			is->SetCloseOnDelete( true ) ;
			enqueue_stream( new UniversalReader( fname, is ), max_bin + 1 ) ;
		}
	}
}

//! \brief ends the input, initiates sorting
//! Only when the input ends can we completely sort it, so setting the
//! footer switches to output mode.  Here we collect the temporary files
//! we've written and become a \c MergeStream.
template < typename Comp > void SortingStream<Comp>::put_footer( const Footer& f ) 
{
	Stream::put_footer( f ) ;

	bool need_merge = false ;
	for( MergeableQueues::const_iterator i = mergeable_queues_.begin() ;
			!need_merge && i != mergeable_queues_.end() ; ++i )
		if( !i->second.empty() ) need_merge = true ;

	if( need_merge ) 
	{
		// We have to be careful about buffering; if more than one
		// SortingStream is active, we could run out of RAM.  Therefore, if
		// we're alone, we sort and add a a stream.  Else we flush to
		// temporary storage.  (Also, if the temporay space is empty, we
		// *always* add the ContainerStream, otherwise we get strange
		// effects if the output turns out to be empty.)
		Holder< MergeStream > m( new MergeStream ) ;
		if( scratch_space_.begin() != scratch_space_.end() && SortingStream__ninstances > 1 )
			flush_scratch() ; 
		else {
			console.output( Console::notice, "SortingStream: final sort" ) ;
			sort_scratch() ;
			m->add_stream( new ContainerStream< deque< Result* >::const_iterator >(
						hdr_, scratch_space_.begin(), scratch_space_.end(), foot_ ) ) ;
		}

		// add any streams that have piled up
		for( MergeableQueues::const_iterator i = mergeable_queues_.begin() ; i != mergeable_queues_.end() ; ++i )
			for( MergeableQueue::const_iterator j = i->second.begin() ; j != i->second.end() ; ++j )
				m->add_stream( *j ) ;
		mergeable_queues_.clear() ;

		if( SortingStream__ninstances == 1 )
		{
			// we're alone.  delegate directly to MergeStream
			console.output( Console::notice, "SortingStream: merging everything to output" ) ;
			final_stream_ = m ;
		}
		else
		{
			// not alone.  We'll conserve file handles by merging everything
			// into a temporary file and opening that.
			string fname ;
			int fd = mktempfile( &fname ) ;
			std::stringstream s ;
			s << "SortingStream: Merging everything to tempfile " << fname ;
			console.output( Console::notice, s.str() ) ;
			ChunkedWriter out( fd, 25, fname.c_str() ) ;
			transfer( *m, out ) ;
			num_open_files_ = 1 ;
			throw_errno_if_minus1( lseek( fd, 0, SEEK_SET ), "seeking in ", fname.c_str() ) ;
			google::protobuf::io::FileInputStream* is = new google::protobuf::io::FileInputStream( fd ) ;
			is->SetCloseOnDelete( true ) ;
			final_stream_ = new UniversalReader( fname, is ) ; 
		}
	}
	// nothing to merge, just the scratch space
	else if( scratch_space_.begin() != scratch_space_.end() && SortingStream__ninstances > 1 )
	{
		// not alone.  We'll conserve memory by writing to a
		// temporary file and opening that.
		flush_scratch() ; 
		console.output( Console::notice, "SortingStream: output comes from single temporary file" ) ;
		final_stream_ = mergeable_queues_[1].front() ;
	}
	else {
		// alone, we'll use a container stream directly
		console.output( Console::notice, "SortingStream: final sort" ) ;
		sort_scratch() ;
		final_stream_ = new ContainerStream< deque< Result* >::const_iterator >(
				hdr_, scratch_space_.begin(), scratch_space_.end(), foot_ ) ;
		console.output( Console::notice, "SortingStream: using scratch space for output" ) ;
	}
	final_stream_->fetch_header() ;
	state_ = final_stream_->get_state() ;
}


class RepairHeaderStream : public Stream
{
	private:
		string editor_ ;

	public:
		RepairHeaderStream( const string &e ) : editor_( e ) {}
		virtual void put_header( const Header& ) ;
} ;

class FanOut : public StreamBundle
{
	public:
		virtual void put_header( const Header& ) ;
		virtual void put_result( const Result& ) ;
		virtual void put_footer( const Footer& ) ;
		virtual Footer fetch_footer() ;
} ;

class Compose : public StreamBundle
{
	public:
		virtual state get_state() ;

		virtual void put_header( const Header&   ) ;
		virtual void put_result( const Result& r ) { streams_.front()->put_result( r ) ; get_state() ; }
		virtual void put_footer( const Footer& f ) { streams_.front()->put_footer( f ) ; get_state() ; }

		virtual Header fetch_header() ;
		virtual Result fetch_result() { return streams_.back()->fetch_result() ; }
		virtual Footer fetch_footer() { return streams_.back()->fetch_footer() ; }
		virtual string type_name() const { return "Compose" ; }
} ;

class StatStream : public Stream
{
	private:
		unsigned total_, mapped_, mapped_u_, different_ ;
		uint64_t bases_, bases_gc_, bases_m_, bases_gc_m_ ;
		uint64_t bases_squared_, bases_m_squared_ ; 

	public:
		StatStream()
			: total_(0), mapped_(0), mapped_u_(0), different_(0)
		    , bases_(0), bases_gc_(0), bases_m_(0), bases_gc_m_(0)
		    , bases_squared_(0), bases_m_squared_(0) {}

		virtual void put_result( const Result& ) ;
		virtual Object get_summary() const ;
} ;

//! \brief calculates divergence
//! Divergence is calculated by the Green Triangulation method.  Two
//! genomes are needed (primary and secondary) and alignments to both of
//! them.  Differences are counted (all equal, either sequence
//! different, all different), then error corrected (math stolen from
//! dropin_AHA.pl) and turned into divergence of the last common
//! ancestor, expressed as fraction of total divergence between primary
//! and secondary genome.

class DivergenceStream : public Stream
{
	private:
		string primary_genome_, secondary_genome_ ;
		int chop_ ;
        bool ancient_ ;
		int64_t b1, b2, b3, b4, b5 ;

	public:
		DivergenceStream( const string& primary, const string& secondary, int chop )
			: primary_genome_( primary ), secondary_genome_( secondary ), chop_( chop )
			, b1(0), b2(0), b3(0), b4(0), b5(0) {}

		virtual void put_header( const Header& ) ;
		virtual void put_result( const Result& ) ;
		virtual Object get_summary() const ;
} ;

class MismatchStats : public Stream
{
	private:
		int mat_[4][4] ;

	public:
		MismatchStats() { memset( mat_, 0, sizeof( mat_ ) ) ; }
		virtual void put_result( const Result& ) ;
		virtual Object get_summary() const ;
} ;

//! \brief checks for hits to homologous regions
//! This filter reads two UCSC Chain files, breaks them down into
//! blocks, discards overlapping blocks (so the mapping becomes 1:1),
//! then discard blocks from the first chain that wouldn't map back
//! according to the second chain.
//! 
//! The filter proper verifies that the alignment to the first genome is
//! mostly covered by blocks, then maps them and verifies that they
//! cover the alignment to the second genome.  A flag is set if either
//! alignment is missing or either test fails.
//!
//! \todo I swear, one of those days I'll implement a symbol table for
//!       those repeated chromosome names.
class AgreesWithChain : public Filter
{
	private:
		string left_genome_, right_genome_ ;

		struct Entry ;
		typedef map< unsigned, Entry > Chains ;	// =^= left_start
		typedef map< string, Chains > Map1 ;	// =^= left_chr

		// Chains have a hierarchical structure: below any chain, there
		// can be a collection of more.  We have one such top-level
		// collection per chromosome.
		struct Entry {
			unsigned left_end ;
			unsigned right_start ;
			unsigned right_end : 31 ;
			unsigned strand : 1 ;
			string right_chr ;
			Chains nested ;
		} ;

		Map1 map_ ;

		static Chains::iterator find_any_most_specific_overlap( 
				unsigned start, unsigned end, Chains *chains ) ;
	public:
		//! \brief constructs chain filter
		//! \param p primary genome (e.g. hg18)
		//! \param s secondary genome (e.g. pt2)
		//! \param c chain from \c p to \c s (this means \c p is target,
		//!          \c s is query), in the form of 
		//! \param d chain from \c s to \c p
		AgreesWithChain( const string& l, const string& r, const pair<istream*,string>& s ) ;
		virtual bool xform( Result& ) ;
} ;

class RegionFilter : public HitFilter
{
	private:
		typedef std::map< unsigned, unsigned > Regions ;		// end(!) & start
		typedef std::map< std::string, Regions > Regions2 ;		// chromosome
		typedef std::map< std::string, Regions2 > Regions3 ; 	// filename

		static Regions3 all_regions ;

		Regions2 *my_regions ;

	public:
		RegionFilter( const pair< istream*, string >& ) ;

		//! \brief looks for a region overlapping the current alignment
		//! This will only work correctly for non-overlapping
		//! annotations.  Deal with it.
		bool inside( const Hit& h )
		{
			unsigned x = h.start_pos() ;
			const Regions &r = (*my_regions)[ h.sequence() ] ;
			Regions::const_iterator i = r.lower_bound( x ) ;
			// we now got the leftmost region whose end is to the right
			// of our start.  if no such thing exists, nothing overlaps.
			if( i == r.end() ) return false ;
			// now check if what we got actually overlaps.  it does if
			// it starts before our alignment ends.
			return i->second <= x + abs(h.aln_length()) && i->first >= x ;
		}
} ;

class InsideRegion : public RegionFilter
{
	public:
		InsideRegion( const pair< istream*, string > &p ) : RegionFilter( p ) {}
		virtual bool keep( const Hit& h ) { return inside( h ) ; }
} ;
class OutsideRegion : public RegionFilter
{
	public:
		OutsideRegion( const pair< istream*, string > &p ) : RegionFilter( p ) {}
		virtual bool keep( const Hit& h ) { return !inside( h ) ; }
} ;

class Sanitizer : public Filter
{
	public:
		virtual void put_header( const Header& ) ;
		virtual bool xform( Result& ) ;
} ;

} // namespace

#endif