This file is indexed.

/usr/lib/ocaml/pxp-engine/pxp_reader.mli is in libpxp-ocaml-dev 1.2.4-1build1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
(* $Id: pxp_reader.mli 751 2009-05-14 13:56:13Z gerd $
 * ----------------------------------------------------------------------
 * PXP: The polymorphic XML parser for Objective Caml.
 * Copyright by Gerd Stolpmann. See LICENSE for details.
 *)

(** Resolving identifiers and associating resources *)

(** Purpose of this module: The [Pxp_reader] module allows you to exactly
 * specify how external identifiers ([SYSTEM] or [PUBLIC]) are mapped to
 * files or channels. This is normally only necessary for advanced
 * configurations, as the built-in functions {!Pxp_types.from_file}, 
 * {!Pxp_types.from_channel}, and {!Pxp_types.from_string} often suffice.
 *
 * There are two ways to use this module. First, you can compose the
 * desired behaviour by combining several predefined resolver objects
 * or functions. See the example section at the end of the file.
 * Second, you can inherit from the classes (or define a resolver class
 * from scratch). I hope this is seldom necessary as this way is much
 * more complicated; however it allows you to implement any required magic.
 *)


open Pxp_core_types.I;;
(** {fixpxpcoretypes true} *) (* Set back to false at the end of the file *)


(** {2 Types and exceptions} *)

exception Not_competent;;
  (** Raised by the [open_in] method if the object does not know how to
   * handle the passed external ID.
   *)

exception Not_resolvable of exn;;
  (** Indicates that the resolver was competent, but there was an error
   * while resolving the external ID. The passed exception explains the
   * reason.
   * [Not_resolvable(Not_found)] serves as indicator for an unknown reason.
   *)


(* One must only use either [lsrc_lexbuf], or [lsrc_unicode_lexbuf] ! *)
type lexer_source =
    { lsrc_lexbuf : Lexing.lexbuf Lazy.t;
      lsrc_unicode_lexbuf : Netulex.ULB.unicode_lexbuf Lazy.t;
    }
    (** The parser chooses one of these ways of lexing the input into tokens.
     *)


(** {3 The [resolver] class type} 
 *
 * The class type [resolver] is the official type of all "resolvers".
 * Resolvers take file names (or better, external identifiers) and
 * return lexbufs, scanning the file for tokens. Resolvers may be
 * cloned, and clones can interpret relative file names relative to
 * their creator.
 *
 * {b Example of cloning:}
 *
 * Given resolver [r] reads from [file:/dir/f1.xml] this text:
 *
 * {[ <tag>some XML text &e; </tag> ]}
 *
 * The task is to switch to a resolver for reading from the entity
 * [e] (which is referenced by [&e;]), and to switch back to the original
 * resolver when the parser is done with [e]. Let us assume that [e]
 * has the [SYSTEM] ID [subdir/f2.xml]. Our approach is to first create
 * a clone of the original resolver so that we can do the switch to [e]
 * in a copy. That means switching back is easy: We give up the cloned
 * resolver, and continue with the original, unmodified resolver. 
 * This gives us the freedom to modify the clone in order to switch
 * to [e]. We do this by changing the input file:
 *
 * - Step 1: [let r' = ]<create clone of [r]>
 * - Step 2: <direct [r'] to open the file [subdir/f2.xml]>
 *
 * [r'] must still know the directory of the file [r] is reading, otherwise
 * it would not be able to resolve [subdir/f2.xml], which expands to
 * [file:/dir/subdir/f2.xml].
 *
 * Actually, this example can be coded as:
 *
 * {[
 * let r = new resolve_as_file in
 * let lbuf = r # open_in "file:/dir/f1.xml" in
 * ... read from lbuf ...
 * let r' = r # clone in
 * let lbuf' = r' # open_in "subdir/f2.xml" in
 * ... read from lbuf' ...
 * r' # close_in;
 * ... read from lbuf ...
 * r # close_in;
 * ]}
 *)

class type resolver =
  object
    (** A resolver can open an input source, and returns this source as
     * [Lexing.lexbuf] (and as its advanced version, 
     * [Netulex.ULB.unicode_lexbuf]).
     *
     * After creating a resolver, one must invoke the two methods
     * [init_rep_encoding] and [init_warner] to set the internal encoding of
     * strings and the warner object, respectively. This is normally
     * already done by the parsing core.
     * It is not necessary to invoke these two methods for a fresh
     * clone.
     *
     * It is possible that the character encoding of the source and the
     * internal encoding of the parser are different. To cope with this,
     * one of the tasks of the resolver is to recode the characters of
     * the input source into the internal character encoding.
     *
     * Note that there are several ways of determining the encoding of the
     * input: (1) It is possible that the transport protocol (e.g. HTTP)
     * transmits the encoding, and (2) it is possible to inspect the beginning
     * of the file, and to analyze:
     * - (2.1) The first two bytes indicate whether UTF-16 is used
     * - (2.2) Otherwise, one can assume that an ASCII-compatible character
     *         set is used. It is now possible to read the XML declaration
     *         [<?xml ... encoding="xyz" ...?>]. The encoding found here is
     *         to be used.
     * - (2.3) If the XML declaration is missing, the encoding is UTF-8.
     *
     * The resolver needs only to distinguish between cases (1), (2.1),
     * and the rest.
     * The details of analyzing whether (2.2) or (2.3) applies are programmed
     * elsewhere, and the resolver will be told the result (see below).
     *
     * A resolver is like a file: it must be opened before one can work
     * with it, and it should be closed after all operations on it have been
     * done. The method [open_rid] is called with the resolver ID as argument
     * and it must return the lexbuf reading from the external resource.
     * (There is also the old method [open_in] that expects an [ext_id] as
     * argument. It is less powerful and should not be used any longer.)
     * The method [close_in] does not require an argument.
     *
     * It is allowed to re-open a resolver after it has been closed. It is
     * forbidden to open a resolver again while it is open.
     * It is allowed to close a resolver several times: If [close_in] is
     * invoked while the resolver is already closed, nothing happens.
     *
     * The method [open_rid] may raise [Not_competent] to indicate that this
     * resolver is not able to open this type of IDs.
     *
     * If [open_rid] gets a [PUBLIC] ID, it can be assumed that the string
     * is already normalized (concerning whitespace).
     *
     * The method [change_encoding] is called from the parser after the
     * analysis of case (2) has been done; the argument is either the
     * string name of the encoding, or the empty string to indicate
     * that no XML declaration was found. It is guaranteed that
     * [change_encoding] is invoked after only a few tokens of the
     * file. The resolver should react as follows:
     * - If case (1) applies:   Ignore the encoding passed to [change_encoding].
     * - If case (2.1) applies: The encoding passed to [change_encoding] must
     *                          be compatible with UTF-16. This should be
     *                          checked, and violations should be reported.
     * - Else:                  If the passed encoding is "", assume UTF-8.
     *                          Otherwise, assume the passed encoding.
     *
     * The following rule helps synchronizing the lexbuf with the encoding:
     * If the resolver has been opened, but [change_encoding] has not yet
     * been invoked, the lexbuf contains at most one character (which may
     * be represented by multiple bytes); i.e. the lexbuf is created by
     * [Lexing.from_function], and the function puts only one character into
     * the buffer at once.
     * After [change_encoding] has been invoked, there is no longer a limit
     * on the lexbuf size.
     *
     * The reason for this rule is that you know exactly the character where
     * the encoding changes to the encoding passed by [change_encoding].
     *
     * The method [clone] may be invoked for open or closed resolvers.
     * Basically, [clone] returns a new resolver which is always closed.
     * If the original resolver is already closed, the clone is simply a clone.
     * If the original resolver is open at the moment of cloning, this applies:
     * If the clone is later opened for a relative system ID (i.e. relative
     * URL), the clone must interpret this ID relative to the ID of the
     * original resolver.
     *)
    method init_rep_encoding : rep_encoding -> unit
    method init_warner : symbolic_warnings option -> collect_warnings -> unit

    method rep_encoding : rep_encoding
      (** Return the representation encoding, as set by [init_rep_encoding] *)

    method open_in : ext_id -> lexer_source
      (** This is the old method to open a resolver. It is superseded by
       * [open_rid].
       * This method may raise [Not_competent] if the object does not know
       * how to handle this [ext_id].
       *)

    method open_rid : resolver_id -> lexer_source
      (** This is the new method to open a resolver. It takes a resolver ID
       * instead of an [ext_id] but works in the same way.
       *)

    method close_in : unit
      (** Closes the resolver *)

    method change_encoding : string -> unit
      (** Changes the external encoding. See above for details *)


    (** Every resolver can be cloned. The clone does not inherit the connection
     * with the external object, i.e. it is initially closed.
     *)
    method clone : resolver

    method active_id : resolver_id
      (** Returns the actually used resolver ID. This is the ID passed to
       * [open_rid] where unused components have been set to None. The
       * resolver ID returned by [active_id] plays an important role when
       * expanding relative URLs.
       *)
  end
;;


(* The next classes are resolvers for concrete input sources. *)

(* CHANGES IN PXP 1.2:
 *
 * All resolve_read_* classes are now deprecated. The new classes 
 * resolve_to_* base on the Netchannels classes as generalization of
 * input streams.
 *
 * Examples: To read from an in_channel, use:
 *
 *   let obj_channel = new Netchannels.input_channel in_channel in
 *   new Pxp_reader.resolve_to_this_obj_channel obj_channel
 *
 * To read from a string, use:
 *
 *   let obj_channel = new Netchannels.input_string string in
 *   new Pxp_reader.resolve_to_this_obj_channel obj_channel
 *
 * Furthermore, the new classes use the resolver_id record as generalized
 * names for entities. This solves most problems with relative URLs.
 *
 * The "Anonymous" ID: In previous versions of PXP, a resolver bound to
 * the Anonymous ID matched the Anonymous ID. This is no longer true.
 * The algebra has been changed such that Anonymous never matches, not 
 * even itself.
 * 
 *   Example: The new resolver
 *     let r = new resolve_to_this_obj_channel ~id:Anonymous ch 
 *   will never accept any ID. In contrast to this, the old, and now
 *   deprecated resolver
 *     let r' = new resolve_read_this_channel ~id:Anonymous ch
 *   accepted the ID Anonymous in previous versions of PXP.
 *
 * The rationale behind this change is that Anonymous acts now like 
 * an "empty set", and not like a concrete element. You can use Private
 * to create as many concrete elements as you want, so there is actually
 * no need for the old behaviour of Anonymous.
 *
 * Note that even the resolver classes provided for backwards compatibility
 * implement this change (to limit the confusion). This means that you
 * might have to change your application to use Private instead of 
 * Anonymous.
 * 
 *)

type accepted_id =
    Netchannels.in_obj_channel * encoding option * resolver_id option
  (** When a resolver accepts an ID, this triple specifies how to proceed.
   * The [in_obj_channel] is the channel to read data from, the encoding option
   * may enforce a certain character encoding, and the [resolver_id] option
   * may detail the ID (this ID will be returned by [active_id]).
   *
   * If [None] is passed as encoding option, the standard autodetection of
   * the encoding is performed.
   *
   * If [None] is passed as [resolver_id] option, the original ID is taken
   * unchanged.
   *)


(** {2 Base resolvers} *)

class resolve_to_this_obj_channel :
  ?id:ext_id ->
  ?rid:resolver_id ->
  ?fixenc:encoding ->
  ?close:(Netchannels.in_obj_channel -> unit) ->
 Netchannels.in_obj_channel -> 
   resolver;;
  (** Reads from the passed [in_obj_channel]. If the [id] or [rid] arguments
   * are passed to the object, the created resolver accepts only
   * these IDs (all mentioned private, system, or public IDs). Otherwise,
   * i.e. no such argument is passed, all IDs are accepted, even [Anonymous].
   *
   * This resolver can only be used once (because the [in_obj_channel]
   * can only be used once). If it is opened a second time (either
   * in the base object or a clone), it will raise [Not_competent].
   *
   * If you pass the [fixenc] argument, the encoding of the channel is
   * set to the passed value, regardless of any auto-recognition or
   * any XML declaration.
   *
   * When the resolver is closed, the function passed by the [close]
   * argument is called. By default, the channel is closed
   * (i.e. the default is: [close:(fun ch -> ch # close_in)]).
   *)

class resolve_to_any_obj_channel :
  ?close:(Netchannels.in_obj_channel -> unit) ->
  channel_of_id:(resolver_id -> accepted_id) ->
  unit ->
  resolver
  (** This resolver calls the function [channel_of_id] to open a new channel for
   * the passed [resolver_id]. This function must either return the [accepted_id],
   * or it must fail with [Not_competent].
   *
   * When the resolver is closed, the function passed by the [close]
   * argument is called. By default, the channel is closed
   * (i.e. the default is: [close:(fun ch -> ch # close_in)]).
   *)

class resolve_to_url_obj_channel : 
  ?close:(Netchannels.in_obj_channel -> unit) ->
  url_of_id:(resolver_id -> Neturl.url) ->
  base_url_of_id:(resolver_id -> Neturl.url) ->
  channel_of_url:(resolver_id -> Neturl.url -> accepted_id) ->
  unit ->
    resolver
  (** When this resolver gets an ID to read from, it calls the function
   * [url_of_id] to get the corresponding URL (such IDs are normally 
   * system IDs, but it is also possible to other kinds of IDs to URLs). 
   * This URL may be a relative URL; however, a URL scheme must be used
   * which contains a path. The resolver converts the URL to an absolute 
   * URL if necessary.
   *
   * To do so, the resolver calls [base_url_of_id] to get the URL the relative
   * URL must be interpreted relative to. Usually, this function returns
   * the [rid_system_base] as URL. This URL must be absolute.
   *
   * The third function, [channel_of_url], is fed with the absolute URL
   * as input. This function opens the resource to read from, and returns
   * the [accepted_id] like [resolve_to_any_obj_channel] does. The resolver ID 
   * passed to [channel_of_url] contains the string representation of the
   * absolute URL as system ID.
   *
   * All functions, [url_of_id], [base_url_of_id], and [channel_of_url], can raise
   * [Not_competent] to indicate that the object is not able to read from
   * the specified resource. However, there is a difference: A [Not_competent]
   * from [url_of_id] or [base_url_of_id] is left as is, but a [Not_competent] from [channel_of_url]
   * is converted to [Not_resolvable]. So only [url_of_id] and [base_url_of_id]
   * decide which URLs
   * are accepted by the resolver and which not, and in the latter case,
   * other resolver can be tried. If [channel_of_url] raises [Not_competent],
   * however, the whole resolution procedure will stop, and no other resolver
   * will be tried.
   *
   * When the resolver is closed, the function passed by the [close]
   * argument is called. By default, the channel is closed
   * (i.e. the default is: [close:(fun ch -> ch # close_in())]).
   *)


class resolve_as_file :
  ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
  ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
  ?system_encoding:encoding ->
  ?map_private_id:  (private_id -> Neturl.url) ->
  ?open_private_id: (private_id -> in_channel * encoding option) ->
  ?base_url_defaults_to_cwd: bool ->
  ?not_resolvable_if_not_found:bool ->
  unit ->
  resolver;;
  (** Reads from the local file system. [file] URL's are interpreted as
   * file names of the local file system, and the referenced files are opened.
   *
   * The full form of a file URL is: [file://host/path], where
   * [host] specifies the host system where the file identified [path]
   * resides. [host=""] or [host="localhost"] are accepted; other values
   * will raise [Not_competent]. The standard for file URLs is
   * defined in RFC 1738.
   *
   * Option [file_prefix]: Specifies how the [file:] prefix of file names
   * is handled:
   * - [`Not_recognized]: The prefix is not recognized.
   * - [`Allowed]:        The prefix is allowed but not required (the default).
   * - [`Required]:       The prefix is required.
   *
   * Option [host_prefix]: Specifies how the [//host] phrase of file names
   * is handled:
   * - [`Not_recognized]: The phrase is not recognized.
   * - [`Allowed]:        The phrase is allowed but not required (the default).
   * - [`Required]:       The phrase is required.
   *
   * Option [system_encoding]: Specifies the encoding of file names of
   * the local file system. Default: UTF-8.
   *
   * Options [map_private_id] and [open_private_id]: These options are
   * deprecated and no longer described here.
   *
   * Option [base_url_defaults_to_cwd]: If true, relative URLs
   * are interpreted relative to the current working directory at the time
   * the class is instantiated, but only if there is no parent URL, i.e.
   * [rid_system_base=None]. If false (the default), such URLs cannot be resolved.
   * In general, it is better to set this option to false, and to
   * initialize [rid_system_base] properly.
   *
   * Option [not_resolvable_if_not_found]: If true (the default), 
   * "File not found" errors stop the resolution process. If false,
   * "File not found" is treated as [Not_competent].
   *)

  (* Options [map_private_id] and [open_private_id]: 
   * These must always be
   * used together. They specify an exceptional behaviour in case a private
   * ID is to be opened. map_private_id maps the private ID to an URL
   * (or raises Not_competent). However, instead of opening the URL 
   * the function open_private_id is called to get an in_channel to read
   * from and to get the character encoding. The URL is taken into account
   * when subsequently relative SYSTEM IDs must be resolved.
   *)

val make_file_url :
  ?system_encoding:encoding ->
  ?enc:encoding ->
  string ->
    Neturl.url
(** This is a convenience function to create a file URL (for localhost).
 * The argument is the file name encoded in the character set enc.
 * Relative file names are automatically converted to absolute names
 * by prepending [Sys.getcwd()] to the passed file name.
 *
 * [system_encoding]: Specifies the encoding of file names of
 *     the local file system. Default: UTF-8. (This argument is
 *     necessary to interpret [Sys.getcwd()] correctly.)
 *
 * [enc]: The encoding of the passed string. Defaults to [`Enc_utf8]
 *
 * Note: To get a string representation of the URL, apply
 * [Neturl.string_of_url] to the result.
 *)



(** {2 Catalog resolvers} *)

class lookup_id :
  (ext_id * resolver) list ->    (* catalog *)
    resolver
  (** The general catalog class. The list (catalog) argument specifies pairs [(xid,r)]
   * mapping external IDs [xid] to subresolvers [r]. The subresolver is invoked
   * if an entity with the corresponding [xid] is to be opened.
   *
   * Note: [SYSTEM] IDs are simply compared literally by this class, 
   * without making
   * relative IDs absolute. See [norm_system_id] below for how to improve this.
   *)


class lookup_id_as_file :
  ?fixenc:encoding ->
  (ext_id * string) list ->      (* catalog *)
    resolver
  (** The list (catalog) argument specifies pairs [(xid,file)] mapping external IDs [xid]
   * to files. The file is read  if an entity with the corresponding [xid] is
   * to be opened.
   *
   * Note: [SYSTEM] IDs are simply compared literally by this class, 
   * without making
   * relative IDs absolute. See [norm_system_id] below for how to improve this.
   *
   * [fixenc]: Overrides the encoding of the file contents. By default, the
   *     standard rule is applied to find out the encoding of the file.
   *)


class lookup_id_as_string :
  ?fixenc:encoding ->
  (ext_id * string) list ->      (* catalog *)
    resolver
  (** The list (catalog) argument specifies pairs [(xid,s)] mapping external IDs [xid]
   * to strings [s]. The string is read if an entity with the corresponding
   * [xid] is to be opened.
   *
   * Note: [SYSTEM] IDs are simply compared literally by this class, 
   * without making
   * relative IDs absolute. See [norm_system_id] below for how to improve this.
   *)


class lookup_public_id :
  (string * resolver) list ->    (* catalog *)
  resolver
  (** This is the generic builder for [PUBLIC] id catalog resolvers: The
   * list (catalog)
   * argument specifies pairs [(pubid, r)] mapping [PUBLIC] identifiers to
   * subresolvers.
   *
   * The subresolver is invoked if an entity with the corresponding [PUBLIC]
   * id is to be opened.
   *)



class lookup_public_id_as_file :
  ?fixenc:encoding ->
  (string * string) list ->     (* catalog *)
    resolver
  (** Makes a resolver for [PUBLIC] identifiers. The list (catalog) argument specifies
   * pairs [(pubid, filename)] mapping [PUBLIC] identifiers to filenames. The
   * filenames must already be encoded in the character set the system uses
   * for filenames.
   *
   * There is a restriction of this catalog class:
   * After a [PUBLIC] entity has been opened, it is not possible to refer
   * to sub entities by relative [SYSTEM] names, even if the [PUBLIC]
   * name is accompanied by a resovable system name like in
   * [Public(pubid,sysid)]. Workaround: Use [lookup_id] instead, and
   * put the complete [Public(pubid,sysid)] ID's into the catalog.
   *
   * [fixenc]: Overrides the encoding of the file contents. By default, the
   *     standard rule is applied to find out the encoding of the file.
   *)


class lookup_public_id_as_string :
  ?fixenc:encoding ->
  (string * string) list ->    (* catalog *)
    resolver;;
  (** Makes a resolver for [PUBLIC] identifiers. The catalog argument specifies
   * pairs [(pubid, text)] mapping [PUBLIC] identifiers to XML text (which must
   * begin with [<?xml ...?>]).
   *
   * The same restriction as for [lookup_public_id_as_file] applies.
   *
   * [fixenc]: Overrides the encoding of the strings.
   *)


class lookup_system_id :
  (string * resolver) list ->    (* catalog *)
    resolver
  (** This is the generic builder for URL-based catalog resolvers: The catalog 
   * argument specifies pairs [(url, r)] mapping URL's identifiers to 
   * subresolvers.
   * The subresolver is invoked if an entity with the corresponding URL
   * id is to be opened.
   *
   * Important note: Two URL's are considered as equal if they are
   * equal in their string representation. (This may not what you want
   * and may cause trouble... However, I currently do not know how to
   * implement a "semantic" comparison logic.)
   *
   * Note: [SYSTEM] IDs are simply compared literally, without making
   * relative IDs absolute. See [norm_system_id] below for improving this.
   *)


class lookup_system_id_as_file :
  ?fixenc:encoding ->
  (string * string) list ->     (* catalog *)
    resolver
  (** Looks up resolvers for URL identifiers: The catalog argument specifies
   * pairs [(url, filename)] mapping URL's to filenames. The
   * filenames must already be encoded in the character set the system uses
   * for filenames.
   *
   * Note: URL's are simply compared literally, without making
   * relative IDs absolute. See [norm_system_id] below for improving this.
   *
   * [fixenc]: Overrides the encoding of the file contents. By default, the
   *     standard rule is applied to find out the encoding of the file.
   *)


class lookup_system_id_as_string :
  ?fixenc:encoding ->
  (string * string) list ->     (* catalog *)
    resolver
  (** Looks up resolvers for URL identifiers: The catalog argument specifies
   * pairs [(url, text)] mapping URL's to XML text (which must
   * begin with [<?xml ...?>]).
   *
   * Note: URL's are simply compared literally, without making
   * relative IDs absolute. See [norm_system_id] below for how to improve this.
   *
   * [fixenc]: Overrides the encoding of the strings.
   *)


(** {2 System ID normalization} *)

class norm_system_id : resolver -> resolver
  (** Normalizes URL's, and forwards the open request to the
   * passed resolver. (Non-URL ID's are forwarded unchanged to the subresolver.)
   *
   * Normalization includes:
   * - Relative URLs are made absolute. If this fails, the problematic
   *   relative URL will be rejected.
   * - [..] and [.] and [//] in the middle of URLs are removed 
   * - Escaping of reserved characters is normalized (percent encoding like %40)
   *
   * Normalization is recommended for catalogs, e.g.
   * {[
   * new norm_system_id
   *   (new lookup_system_id_as_file
   *      [ "http://h/p1", ...;
   *        "http://h/p2", ...;
   *      ])
   * ]}
   * First, the catalog now even works if the URL is written in an
   * unsual way, e.g. [http://h/p1/../p2], or [http://h/p%31]. 
   * Second, relative URLs can be used. For instance, the document
   * referred to as [http://h/p1] can now refer to the other document
   * as [p2].
   *)


(** {2 ID rewriting} *)

class rewrite_system_id :
        ?forward_unmatching_urls:bool ->
	(string * string) list ->
	resolver ->
	  resolver
  (** Rewrites the URL's according to the list of pairs. The left
   * component is the pattern, the right component is the substitute.
   * For example,
   *
   * {[
   * new rewrite_system_id
   *       [ "http://host/foo/", "file:///dir/" ]
   *       r
   * ]}
   *
   * rewrites all URLs beginning with [http://host/foo/] to [file:///dir/],
   * e.g. [http://host/foo/x] becomes [file:///dir/x].
   *
   * If the pattern ends with a slash (as in the example), a prefix match
   * is performed, i.e. the whole directory hierarchy is rewritten.
   * If the pattern does not end with a slash, an exact match is performed,
   * i.e. only a single URL is rewritten.
   *
   * The class normalizes URLs as [norm_system_id] does, before the match
   * is tried.
   *
   * By default, URLs that do not match any pattern are rejected
   * ([Not_competent]).
   *
   * The rewritten URL is only visible within the passed subresolver.
   * If the opened entity accesses other entities by relative URLs,
   * these will be resolved relative to the original URL as it was before
   * rewriting it. This gives some protection against unwanted accesses.
   * For example, if you map [http://host/contents] to [file:///data/contents],
   * it will not be possible to access files outside this directory,
   * even if tricks are used like opening [../../etc/passwd] relative to
   * [http://host/contents].  Of course, this protection works only if
   * the resolver opening the file is a subresolver of [rewrite_system_id].
   *
   * Another application of this class is to use the identity as rewriting
   * rule. This resolver
   * 
   * {[
   * new rewrite_system_id
   *       [ "file:///data/", "file:///data/" ]
   *       ( new resolve_as_file() )
   * ]}
   *
   * has the effect that only files under [/data] can be accessed, and
   * other such as [/etc/passwd] cannot.
   *
   * Option [forward_unmatching_urls]: If true, URLs that do not match any
   *   pattern are forwarded to the inner resolver. These URLs are not
   *   rewritten. {b Note that the mentioned access restrictions do not
   *   work anymore if this option is turned on.}
   *)


(** {2 Resolver construction} *)

type combination_mode =
    Public_before_system    (* Try public identifiers first *)
  | System_before_public    (* Try system identifiers first *)
;;


class combine : 
	?mode:combination_mode ->
	resolver list -> 
	  resolver;;
  (** Combines several resolver objects. If a concrete entity with an
   * [ext_id] is to be opened, the combined resolver tries the contained
   * resolvers in turn until a resolver accepts opening the entity
   * (i.e. until a resolver does not raise [Not_competent] on [open_rid]).
   *
   * If the entity to open has several names, e.g. a public name and
   * a system name, these names are tried in parallel by default. 
   * For backward compatibility, the
   * [mode] argument allows one to specify a different order:
   *
   * - [Public_before_system]:
   *   Try first to open as public identifier, and if that fails,
   *   fall back to the system identifier 
   * - [System_before_public]: 
   *   Try first to open as system identifier, and if that fails,
   *    fall back to the public identifier
   *
   * Clones: If the [clone] method is invoked on the combined resolver
   * while it is closed, the effect is that all contained
   * resolvers are cloned and the combination is repeated on the clones.
   *  If the [clone] method is
   * invoked while the resolver is open, only the
   * active sub resolver is cloned (i.e. the resolver that accepted the
   * ID in the first place).
   *)


(**/**)

(* ====================================================================== *)

(* TODO: The following examples recommend deprecated classes. *)

(* EXAMPLES OF RESOLVERS:
 *
 * let r1 = new resolve_as_file ()
 *   - r1 can open all local files
 *
 * let r2 = new resolve_read_this_channel
 *            ~id:(System "file:/dir/f.xml")
 *            (open_in "/dir/f.xml")
 *   - r2 can only read /dir/f.xml of the local file system. If this file
 *     contains references to other files, r2 will fail.
 *     Note that the channel is automatically closed after XML parsing
 *     is done.
 *
 * let r3 = new combine [ r2; r1 ]
 *   - r3 reads /dir/f.xml of the local file system by calling r2, and all
 *     other files by calling r1. However, inner references within 
 *     /dir/f.xml still fail.
 *
 * let pid = Pxp_types.allocate_private_id() in
 * let r4 = new resolve_read_this_channel 
 *                ~id:(Private pid) 
 *                (open_in "/dir/f.xml")
 *   - r4 can only read from a so-called private ID. These are opaque
 *     identifiers that can be mapped to channels and files as needed.
 *     They do not have a textual representation, and they cannot be
 *     referred to from XML text.
 *
 * ----------------------------------------------------------------------
 * 
 * Now a bigger example. The task is to:
 *  - resolve the PUBLIC IDs P and Q to some files;
 *  - resolve the SYSTEM ID "http://r/s.dtd" to another file;
 *  - resolve all file SYSTEM IDs
 *  - start parsing with "f.xml" in the current directory
 *
 * let r =
 *   new combine 
 *     [ lookup_public_id_as_file 
 *         [ "P", "file_for_p";   "Q", "file_for_q" ];
 *       lookup_system_id_as_file
 *         [ "http://r/s.dtd", "file_for_this_dtd" ];
 *       new resolve_as_file()
 *     ]
 * in
 * (* The recommended way to create the start_id from file names: *)
 * let start_url =
 *   make_file_url "f.xml" in
 * let start_id = 
 *   System (Neturl.string_of_url url) in
 * let source = ExtID(start_id, r) in
 * parse_document_entity ... source ...
 *
 * ----------------------------------------------------------------------
 *
 * A variation:
 *
 *  - resolve the PUBLIC IDs P and Q to some files;
 *  - resolve the SYSTEM ID "http://r/s.dtd" to another file;
 *  - do not resolve any file URL
 *  - start parsing with "f.xml" in the current directory
 *
 * let start_id = allocate_private_id() in
 * let r =
 *   new combine 
 *     [ lookup_public_id_as_file 
 *         [ "P", "file_for_p";   "Q", "file_for_q" ];
 *       lookup_system_id_as_file
 *         [ "http://r/s.dtd", "file_for_this_dtd" ];
 *       resolve_read_any_channel
 *         ~channel_of_id: (fun xid ->
 *            if xid = start_id then 
 *              open_in_bin "f.xml", None  (* you may want to catch Sys_error *)
 *            else raise Not_competent)
 *         ();
 *     ]
 * in
 * let source = ExtID(start_id, r) in
 * parse_document_entity ... source ...
 *
 * ----------------------------------------------------------------------
 *
 * Three further examples can be found in the source of Pxp_yacc (file
 * pxp_yacc.m2y): the implementations of from_file, from_channel, and
 * from_string are also applications of the Pxp_reader objects.
 *)

(**********************************************************************)
(* DEPRECATED CLASSES                                                 *)
(**********************************************************************)

class resolve_read_this_channel :
  ?id:ext_id -> ?fixenc:encoding -> ?close:(in_channel -> unit) ->
  in_channel -> resolver;;

  (* THIS CLASS IS DEPRECATED! USE resolve_to_this_obj_channel INSTEAD!
   *)

  (* Reads from the passed channel (it may be even a pipe). If the ~id
   * argument is passed to the object, the created resolver accepts only
   * this ID (except Anonymous). Otherwise all IDs are accepted, even
   * Anonymous.
   * Once the resolver has been cloned, it does not accept any ID. This
   * means that this resolver cannot handle inner references to external
   * entities. Note that you can combine this resolver with another resolver
   * that can handle inner references (such as resolve_as_file); see
   * class 'combine' below.
   * If you pass the ~fixenc argument, the encoding of the channel is
   * set to the passed value, regardless of any auto-recognition or
   * any XML declaration.
   * When the resolver is closed, the function passed by the ~close
   * argument is called. By default, the channel is closed
   * (i.e. the default is: ~close:close_in).
   *)


class resolve_read_any_channel :
  ?close:(in_channel -> unit) ->
  channel_of_id:(ext_id -> (in_channel * encoding option)) ->
  unit ->
  resolver;;

  (* THIS CLASS IS DEPRECATED! USE resolve_to_any_obj_channel INSTEAD!
   *
   * Note: The function channel_of_id may be called several times to find
   * out the right ext_id from the current resolver_id. The first result
   * is taken that is not Not_competent.
   *)

  (* resolve_read_any_channel f_open ():
   * This resolver calls the function f_open to open a new channel for
   * the passed ext_id. This function must either return the channel and
   * the encoding, or it must fail with Not_competent.
   * The function must return None as encoding if the default mechanism to
   * recognize the encoding should be used. It must return Some e if it is
   * already known that the encoding of the channel is e.
   * When the resolver is closed, the function passed by the ~close
   * argument is called. By default, the channel is closed
   * (i.e. the default is: ~close:close_in).
   *)

class resolve_read_url_channel :
  ?base_url:Neturl.url ->
  ?close:(in_channel -> unit) ->
  url_of_id:(ext_id -> Neturl.url) ->
  channel_of_url:(ext_id -> Neturl.url -> (in_channel * encoding option)) ->
  unit ->
    resolver;;

  (* THIS CLASS IS DEPRECATED! USE resolve_to_url_obj_channel INSTEAD!
   *
   * Note: The function url_of_id may be called several times to find
   * out the right ext_id from the current resolver_id. The first result
   * is taken that is not Not_competent.
   *
   * Note: The optional argument base_url is ignored. The class uses always
   * the rid_system_base string to interpret relative URLs.
   *)

  (* resolve_read_url_channel url_of_id channel_of_url ():
   *
   * When this resolver gets an ID to read from, it calls the function
   * ~url_of_id to get the corresponding URL. This URL may be a relative
   * URL; however, a URL scheme must be used which contains a path.
   * The resolver converts the URL to an absolute URL if necessary.
   * The second function, ~channel_of_url, is fed with the absolute URL
   * as input. This function opens the resource to read from, and returns
   * the channel and the encoding of the resource.
   *
   * Both functions, ~url_of_id and ~channel_of_url, can raise
   * Not_competent to indicate that the object is not able to read from
   * the specified resource. However, there is a difference: A Not_competent
   * from ~url_of_id is left as it is, but a Not_competent from ~channel_of_url
   * is converted to Not_resolvable. So only ~url_of_id decides which URLs
   * are accepted by the resolver and which not.
   *
   * The function ~channel_of_url must return None as encoding if the default
   * mechanism to recognize the encoding should be used. It must return
   * Some e if it is already known that the encoding of the channel is e.
   *
   * When the resolver is closed, the function passed by the ~close
   * argument is called. By default, the channel is closed
   * (i.e. the default is: ~close:close_in).
   *
   * [Does not apply to current implementation but to former ones:]
   * Objects of this class contain a base URL relative to which relative
   * URLs are interpreted. When creating a new object, you can specify
   * the base URL by passing it as ~base_url argument. When an existing
   * object is cloned, the base URL of the clone is the URL of the original
   * object.
   *
   * Note that the term "base URL" has a strict definition in RFC 1808.
   *)


class resolve_read_this_string :
  ?id:ext_id -> ?fixenc:encoding -> string -> resolver;;

  (* THIS CLASS IS DEPRECATED! USE resolve_to_this_obj_channel INSTEAD!
   *)

  (* Reads from the passed string. If the ~id
   * argument is passed to the object, the created resolver accepts only
   * this ID (except Anonymous). Otherwise all IDs are accepted, even
   * Anonymous.
   * Once the resolver has been cloned, it does not accept any ID. This
   * means that this resolver cannot handle inner references to external
   * entities. Note that you can combine this resolver with another resolver
   * that can handle inner references (such as resolve_as_file); see
   * class 'combine' below.
   * If you pass the ~fixenc argument, the encoding of the string is
   * set to the passed value, regardless of any auto-recognition or
   * any XML declaration.
   *)


class resolve_read_any_string :
  string_of_id:(ext_id -> (string * encoding option)) -> unit -> resolver;;

  (* THIS CLASS IS DEPRECATED! USE resolve_to_any_obj_channel INSTEAD!
   *)

  (* resolver_read_any_string f_open ():
   * This resolver calls the function f_open to get the string for
   * the passed ext_id. This function must either return the string and
   * the encoding, or it must fail with Not_competent.
   * The function must return None as encoding if the default mechanism to
   * recognize the encoding should be used. It must return Some e if it is
   * already known that the encoding of the string is e.
   *)

val lookup_public_id_as_file :
  ?fixenc:encoding ->
  (string * string) list ->     (* catalog *)
    resolver;;
  (* Same as the equally named class *)

val lookup_public_id_as_string :
  ?fixenc:encoding ->
  (string * string) list ->     (* catalog *)
    resolver;;
  (* Same as the equally named class *)

val lookup_system_id_as_file :
  ?fixenc:encoding ->
  (string * string) list ->     (* catalog *)
    resolver;;
  (* Same as the equally named class *)

val lookup_system_id_as_string :
  ?fixenc:encoding ->
  (string * string) list ->     (* catalog *)
    resolver;;
  (* Same as the equally named class *)



(** {fixpxpcoretypes false} *)


(**/**)

val set_debug_mode : bool -> unit