This file is indexed.

/usr/share/perl5/XML/Filter/Sort.pm is in libxml-filter-sort-perl 1.01-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
package XML::Filter::Sort;

use strict;
use Carp;

require XML::SAX::Base;


##############################################################################
#                     G L O B A L   V A R I A B L E S
##############################################################################

use vars qw($VERSION @ISA);

$VERSION = '1.01';

@ISA = qw(XML::SAX::Base);

use constant DEFAULT_BUFFER_MANAGER_CLASS => 'XML::Filter::Sort::BufferMgr';
use constant DISK_BUFFER_MANAGER_CLASS    => 'XML::Filter::Sort::DiskBufferMgr';


##############################################################################
#                             M E T H O D S
##############################################################################

##############################################################################
# Contructor: new()
#
# Set defaults for required properties and parse 'Keys' value from scalar to
# a list of lists if required.
#

sub new {
  my $proto = shift;

  my $class = ref($proto) || $proto;
  my $self = $class->SUPER::new(@_);

  croak "You must set the 'Record' option" unless($self->{Record});


  # Select memory vs disk buffering (or custom buffering class)
  
  if($self->{TempDir}) {
    $self->{BufferManagerClass} ||= DISK_BUFFER_MANAGER_CLASS;
  }
  unless($self->{BufferManagerClass}) {
    $self->{BufferManagerClass} = DEFAULT_BUFFER_MANAGER_CLASS;
  }
  my $mod_path = join('/', split(/::/, $self->{BufferManagerClass} . '.pm'));
  require $mod_path;


  # Organise sort keys into a list of 3-element lists
  
  $self->{Keys} = '.' unless($self->{Keys});
  unless(ref($self->{Keys})) {     # parse scalar to a list of lists
    my @keys = ();
    foreach (split(/[\r\n;]/, $self->{Keys})) {
      next unless(/\S/);
      s/,/ /g;
      my @key = /(\S+)/g;
      push @keys, \@key;
    }
    $self->{Keys} = \@keys;
  }
  foreach my $key (@{$self->{Keys}}) {
    croak "Keys must be a list of lists" unless(ref($key));
    $key->[1] ||= 'alpha';
    unless(ref($key->[1])) {
      $key->[1] = ($key->[1] =~ /^n/i ? 'num'  : 'alpha');
    }
    $key->[2] = ($key->[2] && $key->[2] =~ /^d/i ? 'desc' : 'asc');
  }


  # Precompile a closure to match each key

  if($self->{BufferManagerClass}->can('compile_matches')) {
    $self->{_match_subs} = [
      $self->{BufferManagerClass}->compile_matches($self->{Keys})
    ];
  }


  # Build up a list of options to be passed to buffers/buffer managers

  if($self->{MaxMem}) {
    if(uc($self->{MaxMem}) =~ /^\s*(\d+)(K|M)?$/) {
      $self->{MaxMem} = $1;
      $self->{MaxMem} *= 1024        if($2 and $2 eq 'K');
      $self->{MaxMem} *= 1024 * 1024 if($2 and $2 eq 'M');
    }
    else {
      croak "Illegal value for 'MaxMem': $self->{MaxMem}";
    }
  }

  $self->{BufferOpts} = {
    Keys              => [ @{$self->{Keys}} ],
    _match_subs       => $self->{_match_subs},
    IgnoreCase        => $self->{IgnoreCase},
    NormaliseKeySpace => $self->{NormaliseKeySpace} ||
                         $self->{NormalizeKeySpace},
    KeyFilterSub      => $self->{KeyFilterSub},
    TempDir           => $self->{TempDir},
    MaxMem            => $self->{MaxMem},
  };


  return(bless($self,$class));
}


##############################################################################
# Method: start_document()
#
# Initialise handler structures and propagate event.
#

sub start_document {
  my $self = shift;


  # Track path to current element

  $self->{path_name} = [];
  $self->{path_ns}   = [];
  $self->{prefixes}  = [];
  $self->{depth}     = 0;


  # Initialise pattern matching for record elements

  my @parts = split(/\//, $self->{Record});
  if($parts[0] eq '') {
    $self->{abs_match} = 1;
    shift @parts;
  }
  else {
    $self->{abs_match} = 0;
  }
  $self->{rec_path_name} = [ ];
  $self->{rec_path_ns}   = [ ];
  foreach (@parts) {
    if(/^(?:\{(.*?)\})?(.*)$/) {
      push @{$self->{rec_path_name}}, $2;
      push @{$self->{rec_path_ns}},   $1;
    }
  }
  $self->{required_depth} = @parts;

  $self->SUPER::start_document(@_);
}


##############################################################################
# Method: start_element()
#
# Marshalls events either to the default handler or to a record buffer. 
# Also handles the creation of buffers as record elements are encountered.
# Two extra considerations increase complexity: contiguous character events
# are being merged; and each 'record' element takes it's leading whitespace
# with it.
#

sub start_element {
  my $self    = shift;
  my $element = shift;


  return $self->start_prefixed_element($element) if($self->{passthru});

  # Add this element's details to the end of the list (for recognising records)

  push @{$self->{path_name}}, $element->{LocalName};
  push @{$self->{path_ns}},
    (defined($element->{NamespaceURI}) ? $element->{NamespaceURI} : '');
  $self->{depth}++;


  # Do we have a record buffer open?

  if($self->{buffer}) {
    $self->{record_depth}++;
    $self->send_characters();
    $self->{buffer}->start_element($element);
    return;
  }


  # Any leading (non-whitespace) text?

  if($self->{buffered_text}) {
    $self->flush_buffers();
    $self->send_characters();
  }

  
  # Is this a record?

  if($self->match_record()) {
    
    $self->{record_depth} = 1;

    unless($self->{buffer_manager}) {
      $self->{buffer_manager} = $self->{BufferManagerClass}->new(
	%{$self->{BufferOpts}}
      );
    }

    $self->{buffer} = $self->{buffer_manager}->new_buffer();

    $self->send_characters();
    $self->{buffer}->start_element($element);
    return;
  }


  # Send buffered data and this event to the downstream handler

  $self->flush_buffers();
  $self->send_characters();
  $self->start_prefixed_element($element);
}


##############################################################################
# Method: end_element()
#
# Marshalls events either to the default handler or to a record buffer. 
# Also handles closing the current buffer object as the end of a record is
# encountered.
#

sub end_element {
  my $self    = shift;
  my $element = shift;


  return $self->end_prefixed_element($element) if($self->{passthru});


  pop @{$self->{path_name}};
  pop @{$self->{path_ns}};
  $self->{depth}--;


  # Do we have a record buffer open?
  
  if($self->{buffer}) {
    $self->send_characters();
    $self->{buffer}->end_element($element);
    $self->{record_depth}--;
    if($self->{record_depth} == 0) {
      $self->{buffer_manager}->close_buffer($self->{buffer});
      delete($self->{buffer});
    }
    return;
  }

  # No, then do we have any complete buffered records?
  
  $self->flush_buffers();

  $self->send_characters();
  $self->end_prefixed_element($element);

}


##############################################################################
# Method: characters()
#
# Buffer character events for two reasons:
# - to merge contiguous character data (simplifies pattern matching logic)
# - to enable 'record' elements to take their leading whitespace with them
#

sub characters {
  my $self = shift;
  my $char = shift;

  return $self->SUPER::characters($char) if($self->{passthru});

  unless(exists($self->{char_buffer})) {
    $self->{char_buffer} = '';
    $self->{buffered_text} = 0;
  }
  $self->{char_buffer} .= $char->{Data};
  $self->{buffered_text} |= ($char->{Data} =~ /\S/); 
}


##############################################################################
# Method: ignorable_whitespace()
#
# Discard ignorable whitespace if required, otherwise send it on as 
# character events.
#
# Yes, this is a dirty hack, but it's getting late and I haven't got a
# parser that generates them anyway.
#

sub ignorable_whitespace {
  my $self = shift;
  my $char = shift;

  $self->characters($char) unless($self->{SkipIgnorableWS});
}


##############################################################################
# Method: start_prefix_mapping()
# Method: end_prefix_mapping()
#
# Suppress these events as they need to remain synchronised with the
# start/end_element events (which may be re-ordered).  Replacement events are
# generated by start/end_prefixed_element().
#

sub start_prefix_mapping { }
sub end_prefix_mapping   { }


##############################################################################
# Method: start_prefixed_element()
#
# Sends a start_element() event to the downstream handler, but re-generates
# start_prefix_mapping() events first.
#

sub start_prefixed_element {
  my $self = shift;
  my $elem = shift;

  my @prefixes;
  foreach my $attr (values %{$elem->{Attributes}}) {
    if($attr->{Name}  and  $attr->{Name} eq 'xmlns') {
      unshift @prefixes, '', $attr->{Value};
    }
    elsif($attr->{Prefix}  and  $attr->{Prefix} eq 'xmlns') {
      push @prefixes, $attr->{LocalName}, $attr->{Value};
    }
  }
  
  if(@prefixes) {
    push @{$self->{prefixes}}, [ @prefixes ];
    while(@prefixes) {
      my $prefix = shift @prefixes;
      my $uri    = shift @prefixes;
      $self->SUPER::start_prefix_mapping({
	Prefix       => $prefix,
	NamespaceURI => $uri,
      });
    }
  }
  else {
    push @{$self->{prefixes}}, undef;
  }
  
  $self->SUPER::start_element($elem);
}


##############################################################################
# Method: end_prefixed_element()
#
# Sends an end_element() event to the downstream handler, and follows it with
# re-generated end_prefix_mapping() events.
#

sub end_prefixed_element {
  my $self = shift;
  my $elem = shift;

  $self->SUPER::end_element($elem);

  my $prefixes = pop @{$self->{prefixes}};

  if($prefixes) {
    while(@$prefixes) {
      my $prefix = shift @$prefixes;
      my $uri    = shift @$prefixes;
      $self->SUPER::end_prefix_mapping({
	Prefix       => $prefix,
	NamespaceURI => $uri,
      });
    }
  }

}


##############################################################################
# Method: comment()
#
# Send comments to buffer if we have one open, otherwise flush any buffered
# records before propagating event.
#

sub comment {
  my $self    = shift;
  my $comment = shift;

  return $self->SUPER::comment($comment) if($self->{passthru});

  if($self->{buffer}) {
    $self->send_characters();
    $self->{buffer}->comment($comment);
    return;
  }

  $self->flush_buffers();
  $self->send_characters();
  $self->SUPER::comment($comment);
}


##############################################################################
# Method: processing_instruction()
#
# Send PIs to downstream handler but flush buffered records & text first.
#

sub processing_instruction {
  my $self = shift;
  my $pi   = shift;

  return $self->SUPER::processing_instruction($pi) if($self->{passthru});

  if($self->{buffer}) {
    $self->send_characters();
    $self->{buffer}->processing_instruction($pi);
    return;
  }

  $self->flush_buffers();
  $self->send_characters();
  $self->SUPER::processing_instruction($pi);
}


##############################################################################
# Method: send_characters()
#
# Contiguous character events are concatenated into a buffer.  This routine
# sends the buffer contents to the open buffer if there is one, or the
# downstream handler otherwise.
#

sub send_characters {
  my $self    = shift;

  return unless(exists $self->{char_buffer});
  if($self->{buffer}) {
    $self->{buffer}->characters({Data => $self->{char_buffer}});
  }
  else {
    $self->SUPER::characters({Data => $self->{char_buffer}});
  }
  delete($self->{char_buffer});
  delete($self->{buffered_text});
}


##############################################################################
# Method: flush_buffers()
#
# If there are any records buffered, sends them to the downstream handler.
#

sub flush_buffers {
  my $self    = shift;

  if($self->{buffer_manager}) {
    $self->{passthru} = 1;
    $self->{buffer_manager}->to_sax($self);
    $self->{passthru} = 0;
    delete($self->{buffer_manager});
  }
}


##############################################################################
# Method: match_record()
#
# Returns true if the path to the current element matches the 'Record' option
# passed to the constructor.
#

sub match_record {
  my $self = shift;


  if($self->{abs_match}) {
    return if($self->{depth} != $self->{required_depth});
  }
  else {
    return if($self->{depth} < $self->{required_depth});
  }

  foreach my $i (1..$self->{required_depth}) {
    return unless($self->{path_name}->[-$i] eq $self->{rec_path_name}->[-$i]);
    if(defined($self->{rec_path_ns}->[-$i])) {
      return unless($self->{path_ns}->[-$i] eq $self->{rec_path_ns}->[-$i]);
    }
  }

  return(1);
}


1;
__END__

=head1 NAME

XML::Filter::Sort - SAX filter for sorting elements in XML

=head1 SYNOPSIS

  use XML::Filter::Sort;
  use XML::SAX::Machines qw( :all );

  my $sorter = XML::Filter::Sort->new(
    Record  => 'person',
    Keys    => [
	         [ 'lastname',  'alpha', 'asc' ],
	         [ 'firstname', 'alpha', 'asc' ],
		 [ '@age',      'num',   'desc']
               ],
  );

  my $filter = Pipeline( $sorter => \*STDOUT );

  $filter->parse_file(\*STDIN);

Or from the command line:

  xmlsort

=head1 DESCRIPTION

This module is a SAX filter for sorting 'records' in XML documents (including
documents larger than available memory).  The C<xmlsort> utility which is
included with this distribution can be used to sort an XML file from the
command line without writing Perl code (see C<perldoc xmlsort>).

=head1 EXAMPLES

These examples assume that you will create an XML::Filter::Sort object and use
it in a SAX::Machines pipeline (as in the synopsis above).  Of course you could
use the object directly by hooking up to a SAX generator and a SAX handler but
such details are omitted from the sample code.

When you create an XML::Filter::Sort object (with the C<new()> method), you
must use the 'Record' option to identify which elements you want sorted.  The
simplest way to do this is to simply use the element name, eg:

  my $sorter = XML::Filter::Sort->new( Record  => 'colour' );

Which could be used to transform this XML:

  <options>
    <colour>red</colour>
    <colour>green</colour>
    <colour>blue</colour>
  <options>

to this:

  <options>
    <colour>blue</colour>
    <colour>green</colour>
    <colour>red</colour>
  </options>

You can define a more specific path to the record by adding a prefix of element
names separated by forward slashes, eg:

  my $sorter = XML::Filter::Sort->new( Record  => 'hair/colour' );

which would only sort <colour> elements contained directly within a <hair>
element (and would therefore leave our sample document above unchanged).  A
path which starts with a slash is an 'absolute' path and must specify all 
intervening elements from the root element to the record elements.

A record element may contain other elements.  The order of the record elements
may be changed by the sorting process but the order of any child elements
within them will not.

The default sort uses the full text of each 'record' element and uses an
alphabetic comparison.  You can use the 'Keys' option to specify a list of
elements within each record whose text content should be used as sort keys.
You can also use this option to specify whether the keys should be compared
alphabetically or numerically and whether the resulting order should be
ascending or descending, eg:

  my $sorter = XML::Filter::Sort->new(
    Record  => 'person',
    Keys    => [
	         [ 'lastname',  'alpha', 'asc'  ],
	         [ 'firstname', 'alpha', 'asc'  ],
	         [ '@age',      'alpha', 'desc' ],
               ]
  );

Given this record ...

    <person age='35'>
      <firstname>Aardvark</firstname>
      <lastname>Zebedee</lastname>
    </person>

The above code would use 'Zebedee' as the first (primary) sort key, 'Aardvark'
as the second sort key and the number 35 as the third sort key.  In this case,
records with the same first and last name would be sorted from oldest to
youngest.

As with the 'record' path, it is possible to specify a path to the sort key
elements (or attributes).  To make a path relative to the record element
itself, use './' at the start of the path.

=head1 OPTIONS

=over 4

=item Record => 'path string'

A simple path string defining which elements should be treated as 'records' to
be sorted (see L<"PATH SYNTAX">).  Elements which do not match this path will
not be altered by the filter.  Elements which do match this path will be
re-ordered depending on their contents and the value of the Keys option.

When a record element is re-ordered, it takes its leading whitespace with it.

Only lists of contiguous record elements will be sorted.  A list of records
which has a 'foreign body' (a non-record element, non-whitespace text, a
comment or a processing instruction) between two elements will be treated as
two separate lists and each will be sorted in isolation of the other.

=item Keys => [ [ 'path string', comparator, order ], ... ]

=item Keys => 'delimited string'

This option specifies which parts of the records should be used as sort keys.
The first form uses a list-of-lists syntax.  Each key is defined using a list
of three elements:

=over 4

=item 1

The 'path string' defines the path to an element or an attribute whose text
contents should be used as the value of the sort key (see L<"PATH SYNTAX">).

=item 2

The 'comparator' defines how these values should be compared.  This can be the
string 'alpha' for alphabetic, the string 'num' for numeric or a reference to a
subroutine taking two parameters and returning -1, 0 or 1 (similar to the
standard Perl sort function but without the $a, $b magic).

This item is optional and defaults to 'alpha'.

=item 3

The 'order' should be 'asc' for ascending or 'desc' for descending and if
omitted, defaults to 'asc'.

=back

You may prefer to define the Keys using a delimited string rather than a
list of lists.  Keys in the string should be separated by either newlines or
semicolons and the components of a key should be separated by whitespace or
commas.  It is not possible to define a subroutine reference comparator using
the string syntax.

=item IgnoreCase => 1

Enabling this option will make sort comparisions case-insensitive (rather than
the default case-sensitive).

=item NormaliseKeySpace => 1

The sort key values for each record will be the text content of the child
elements specified using the Keys option (above).  If you enable this option,
leading and trailing whitespace will be stripped from the keys and each
internal run of spaces will be collapsed to a single space.  The default 
value for this option is off for efficiency.

Note: The contents of the record are not affected by this setting - merely
the copy of the data that is used in the sort comparisons.

=item KeyFilterSub => coderef

You can also supply your own custom 'fix-ups' by passing this option a
reference to a subroutine.  The subroutine will be called once for each record
and will be passed a list of the key values for the record.  The routine must
return the same number of elements each time it is called, but this may be less
than the number of values passed to it.  You might use this option to combine
multiple key values into one (eg: using sprintf).

Note: You can enable both the NormaliseKeySpace and the KeyFilterSub options -
space normalisation will occur first.

=item TempDir => 'directory path'

This option serves two purposes: it enables disk buffering rather than the
default memory buffering and it allows you to specify where on disk the data
should be buffered.  Disk buffering will be slower than memory buffering, so
don't ask for it if you don't need it.  For more details, see
L<"IMPLEMENTATION">.

Note: It is safe to specify the same temporary directory path for multiple
instances since each will create a uniquely named subdirectory (and clean it
up afterwards).

=item MaxMem => bytes

The disk buffering mode actually sorts chunks of records in memory before
saving them to disk.  The default chunk size is 10 megabytes.  You can use this
option to specify an alternative chunk size (in bytes) which is more attuned to
your available resources (more is better).  A suffix of 'K' or 'M' is
recognised as kilobytes or megabytes respectively.

If you have not enabled disk buffering (using 'TempDir'), the MaxMem option has
no effect.  Attempting to sort a large document using only memory buffering
may result in Perl dying with an 'out of memory' error.

=item SkipIgnorableWS

If your SAX parser can do validation and generates ignorable_whitespace()
events, you can enable this option to discard these events.  If you leave this
option at it's default value (implying you want the whitespace), the events
will be translated to characters() events.

=back

=head1 PATH SYNTAX

A simple element path syntax is used in two places:

=over 4

=item 1

with the 'Record' option to define which elements should be sorted

=item 2

with the 'Keys' option to define which parts of each record should be used
as sort keys.

=back

In each case you can use a just an element name, or a list of element names
separated by forward slashes.  eg:

  Record => 'ul/li',
  Keys   => 'name'

If a 'Record' path begins with a '/' then it will be anchored at the document
root.  If a 'Keys' path begins with './' then it is anchored at the current
record element.  Unanchored paths can match at any level.

A 'Keys' path can include an attribute name prefixed with an '@' symbol, eg:

  Keys   => './@href'

Each element or attribute name can include a namespace URI prefix in curly
braces, eg:

  Record => '{http://www.w3.org/1999/xhtml}li'

If you do not include a namespace prefix, all elements with the specified
name will be matched, regardless of any namespace URI association they might
have.

If you include an empty namespace prefix (eg: C<'{}li'>) then only records
which do not have a namespace association will be matched.

=head1 IMPLEMENTATION

In order to arrange records into sorted order, this module uses buffering.  It
does not need to buffer the whole document, but for any sequence of records
within a document, all records must be buffered.  Unless you specify otherwise,
the records will be buffered in memory.  The memory requirements are similar to
DOM implementations - 10 to 50 times the character count of the source XML.  If
your documents are so large that you would not process them with a DOM parser
then you should enable disk buffering.

If you enable disk buffering, sequences of records will be assembled into
'chunks' of approximately 10 megabytes (this value is configurable).  Each
chunk will be sorted and saved to disk.  At the end of the record sequence, all
the sorted chunks will be merged and written out as SAX events.

The memory buffering mode represents each record an a
B<XML::Filter::Sort::Buffer> object and uses B<XML::Filter::Sort::BufferMgr>
objects to manage the buffers.  For details of the internals, see L<XML::Filter::Sort::BufferMgr>.

The disk buffering mode represents each record an a
B<XML::Filter::Sort::DiskBuffer> object and uses
B<XML::Filter::Sort::DiskBufferMgr> objects to manage the buffers.  For details
of the internals, see L<XML::Filter::Sort::DiskBufferMgr>.


=head1 BUGS

ignorable_whitespace() events shouldn't be translated to normal characters()
events - perhaps in a later release they won't be.

=head1 SEE ALSO

B<XML::Filter::Sort> requires L<XML::SAX::Base> and plays nicely with
L<XML::SAX::Machines>.


=head1 COPYRIGHT 

Copyright 2002-2005 Grant McLean E<lt>grantm@cpan.orgE<gt>

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself. 

=cut