This file is indexed.

/usr/lib/ocaml/netstring/mimestring.mli is in libocamlnet-ocaml-dev 3.7.3-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
(* $Id: mimestring.mli 1644 2011-07-20 21:18:56Z gerd $
 * ----------------------------------------------------------------------
 *
 *)

(** Low-level functions to parse and print mail and MIME messages 
 *
 * [Mimestring] contains a lot of functions to scan and print strings
 * formatted as MIME messages. For a higher-level view on this topic,
 * see the [Netmime] module.
 *
 * {b Contents}
 * - {!Mimestring.lines}
 * - {!Mimestring.headers}
 * - {!Mimestring.structured_values}
 * - {!Mimestring.parsers_for_structured_values}
 * - {!Mimestring.printers_for_structured_values}
 * - {!Mimestring.scanning_mime}
 * - {!Mimestring.helpers_mime}
 * 
 *)

(** {1:lines Splitting a string into lines} *)

(** These functions are all CR/LF-aware, i.e. lines can be terminated
    by either LF or CR/LF.
 *)

val find_line_end : string -> int -> int -> int
  (** [find_line_end s pos len]: Searches the next line end (CR/LF or
      only LF), and returns the position. The search starts at position
      [pos], and covers the next [len] bytes. Raises [Not_found]
      if there is no line end.
   *)

val find_line_start : string -> int -> int -> int
  (** [find_line_start s pos len]: Searches the next start, and returns its
      position. The line start is the position after the next line end
      (CR/LF or only LF). The search starts at position
      [pos], and covers the next [len] bytes. Raises [Not_found]
      if there is no line end.
   *)

val find_double_line_start : string -> int -> int -> int
  (** [find_double_line_start s pos len]: Searches two adjacent line ends
      (each may be a CR/LF combination or a single LF), and returns the
      position after the second line end.  The search starts at position
      [pos], and covers the next [len] bytes. Raises [Not_found]
      if the mentioned pattern is not found.
   *)

val skip_line_ends : string -> int -> int -> int
  (** [skip_line_ends s pos len]: Skips over adjacent line ends (terminated
      by CR/LF or plain LF), and returns the position after the last
      line end. The search starts at position
      [pos], and covers the next [len] bytes. Note that this function
      cannot raise [Not_found].
   *)

val fold_lines_p : ('a -> int -> int -> int -> bool -> 'a) -> 
                 'a -> string -> int -> int -> 'a
  (** [fold_lines_p f acc0 s pos len]: Splits the substring of [s] 
      from [pos]
      to [pos+len] into lines, and folds over these lines like 
      [List.fold_left]. The function [f] is called as
      [f acc p0 p1 p2 is_last] where [acc] is the current accumulator
      (initialized with [acc0]), and
      - [p0] is the start position of the line in [s]
      - [p1] is the position of the line terminator in [s]
      - [p2] is the position after the line terminator in [s]
      - [is_last] is true if this is the last line in the iteration

      The lines can be terminated with CR/LF or LF. For the last line
      the terminator is optional ([p1=p2] is possible).

      The function is tail-recursive.
   *)

val fold_lines : ('a -> string -> 'a) -> 'a -> string -> int -> int -> 'a
  (** [fold_lines f acc0 s pos len]: Splits the substring of [s] 
      from [pos]
      to [pos+len] into lines, and folds over these lines like 
      [List.fold_left]. The function [f] is called as
      [f acc line] where [acc] is the current accumulator
      (initialized with [acc0]), and [line] is the current line
      w/o terminator.

      The lines can be terminated with CR/LF or LF.

      The function is tail-recursive.

      Example: Get the lines as list:
      {[
         List.rev(fold_lines (fun l acc -> acc::l) [] s pos len)
      ]}

   *)

val iter_lines : (string -> unit) -> string -> int -> int -> unit
  (** [iter_lines f s pos len]: Splits the substring of [s] 
      from [pos]
      to [pos+len] into lines, and calls [f line] for each
      line.

      The lines can be terminated with CR/LF or LF.
   *)

val skip_whitespace_left : string -> int -> int -> int
  (** [skip_whitespace_left s pos len]: Returns the smallest
      [p] with [p >= pos && p < pos+len] so that [s.[p]] is not
      a whitesapce character (space, TAB, CR, LF), and 
      [s.[q]] is a whitespace character for all [q<p].
      If this is not possible [Not_found] will be raised.
   *)

val skip_whitespace_right : string -> int -> int -> int
  (** [skip_whitespace_right s pos len]: Returns the biggest
      [p] with [p >= pos && p < pos+len] so that [s.[p]] is not
      a whitesapce character (space, TAB, CR, LF), and
      [s.[q]] is a whitespace character for all [q>p].
      If this is not possible [Not_found] will be raised.
   *)


(* *********************************************************************)
(* Collection of auxiliary functions to parse MIME headers             *)
(* *********************************************************************)

(* See also the module Netmime for high-level MIME functions *)

(** {1:headers Parsing and Printing Mail Headers} *)

(**
  {b The Format of Mail Messages}

  Messages
  consist of a header and a body; the first empty line separates both
  parts. The header contains lines "{i param-name}[:] {i param-value}" where
  the param-name must begin on column 0 of the line, and the "[:]"
  separates the name and the value. So the format is roughly:
  
  {[
   param1-name: param1-value
   ...
   paramN-name: paramN-value
   _
   body ]}
  
   (Where "_" denotes an empty line.)
  
   {b Details}
   
   Note that parameter values are restricted; you cannot represent
   arbitrary strings. The following problems can arise:
    - Values cannot begin with whitespace characters, because there
      may be an arbitrary number of whitespaces between the "[:]" and the
      value.
    - Values (and names of parameters, too) must only be formed of
      7 bit ASCII characters. (If this is not enough, the MIME standard
      knows the extension RFC 2047 that allows that header values may
      be composed of arbitrary characters of arbitrary character sets.
      See below how to decode such characters in values returned by
      this function.)
    - Header values may be broken into several lines. Continuation
      lines must begin with whitespace characters. This means that values
      must not contain line breaks as semantic part of the value.
      And it may mean that {i one} whitespace character is not distinguishable
      from {i several} whitespace characters.
    - Header lines must not be longer than 78 characters (soft limit) or
      998 characters (hard limit). Values that
      would result into longer lines must be broken into several lines.
      This means that you cannot represent strings that contain too few
      whitespace characters.
      (Note: The soft limit is to avoid that user agents have problems
      with long lines. The hard limit means that transfer agents sometimes
      do not transfer longer lines correctly.)
    - Some old gateways pad the lines with spaces at the end of the lines.
   
    This implementation of a mail scanner tolerates a number of
    deviations from the standard: long lines are not rejected; 8 bit
    values are generally accepted; lines may be ended only with LF instead of
    CRLF.
   
    {b Compatibility}
   
    These functions can parse all mail headers that conform to RFC 822 or
    RFC 2822.
   
    But there may be still problems, as RFC 822 allows some crazy
    representations that are actually not used in practice.
    In particular, RFC 822 allows it to use backslashes to "indicate"
    that a CRLF sequence is semantically meant as line break. As this
    function normally deletes CRLFs, it is not possible to recognize such
    indicators in the result of the function.
 *)

val fold_header :
       ?downcase:bool ->              (* default: false *)
       ?unfold:bool ->                (* default: false *)
       ?strip:bool ->                 (* default: false *)
       ('a -> string -> string -> 'a) ->
       'a -> string -> int -> int ->
         'a
  (** [fold_header f acc0 s pos len]:
      Parses a MIME header in the string [s] from [pos] to exactly
      [pos+len]. The MIME header must be terminated by an empty line.

      A folding operation is done over the header values while
      the lines are extracted from the string, very much like
      [List.fold_left]. For each header [(n,v)] where [n] is the
      name and [v] is the value, the function [f] is called as
      [f acc n v].

      If the header cannot be parsed, a [Failure] is raised.

      Certain transformations may be applied (default: no
      transformations):
      - If [downcase] is set, the header names are converted to
        lowercase characters
      - If [unfold] is set, the line terminators are not included
        in the resulting values. This covers both the end of line
        characters at the very end of a header and the end of line
        characters introduced by continuation lines.
      - If [strip] is set, preceding and trailing white space is
        removed from the value (including line terminators at the
        very end of the value)
   *)

val list_header :
       ?downcase:bool ->              (* default: false *)
       ?unfold:bool ->                (* default: false *)
       ?strip:bool ->                 (* default: false *)
       string -> int -> int -> (string * string) list
  (** [list_header s pos len]: Returns the headers as list of pairs
      [(name,value)].

      For the meaning of the arguments see [fold_header] above.
   *)


val scan_header : 
       ?downcase:bool ->              (* default: true *)
       ?unfold:bool ->                (* default: true *)
       ?strip:bool ->                 (* default: false *)
       string -> start_pos:int -> end_pos:int -> 
         ((string * string) list * int)
    (** [let params, header_end_pos = scan_header s start_pos end_pos]:
   
        {b Deprecated.} 
  
      Scans the mail header that begins at position [start_pos] in the string 
      [s] and that must end somewhere before position [end_pos]. It is intended
      that in [end_pos] the character position following the end of the body of
      the MIME message is passed.

     Returns the parameters of the header as [(name,value)] pairs (in
     [params]), and in [header_end_pos] the position of the character following
     directly after the header (i.e. after the blank line separating
     the header from the body).

     - If [downcase], header names are converted to lowercase characters
     - The arguments [unfold] and [strip] have a slightly different meaning as
       for the new function [fold_header] above. In particular, whitespace
       is already stripped off the returned values if {b any} of [unfold] or
       [strip] are enabled. (This is for backward compatibility.)

     Also, this function is different because [downcase] and [unfold] are
     enabled by default, and only [strip] is not enabled.
     *)

val read_header : 
      ?downcase:bool ->
      ?unfold:bool -> 
      ?strip:bool ->
      Netstream.in_obj_stream -> 
	(string * string) list
    (** This function expects that the current position of the passed
     * [in_obj_stream] is the first byte of the header. The function scans the
     * header and returns it. After that, the stream position is after
     * the header and the terminating empty line (i.e. at the beginning of
     * the message body).
     *
     * The options [downcase], [unfold], and [strip] have the same meaning
     * as in [scan_header].
     *
     * {b Example}
     *
     * To read the mail message "[file.txt]":
     *
     * {[
     * let ch = new Netchannels.input_channel (open_in "file.txt") in
     * let stream = new Netstream.input_stream ch in
     * let header = read_header stream in
     * stream#close_in()  (* no need to close ch *)
     * ]}
     *)

val write_header :
      ?soft_eol:string ->                          (* default: "\r\n" *)
      ?eol:string ->                               (* default: "\r\n" *)
      Netchannels.out_obj_channel ->
      (string * string) list ->
	unit
    (** This function writes the header to the passed [out_obj_channel]. The
     * empty line following the header is also written.
     *
     * Exact output format: 
     * {ul
     * {- The header is not folded, i.e. no additional CRLF sequences
     *    are inserted into the header to avoid long header lines.
     *    In order to produce correct headers, the necessary CRLF bytes
     *    must already exist in the field values. (You can use the
     *    function [write_value] below for this.)}
     * {- However, this function helps getting some details right. First,
     *    whitespace at the beginning of field values is suppressed.
     *
     *    {b Example:}
     *
     *    [write_header ch ["x","Field value"; "y","   Other value"]] outputs:
     * {[ x: Field value\r\n
     * y: Other value\r\n
     * \r\n]}}
     * {- The end-of-line sequences LF, and CRLF, followed by
     *    whitespace are replaced by the passed [soft_eol] string. If the
     *    necessary space or tab character following the eol is missing, an
     *    additional space character will be inserted.
     *
     *    {b Example:}
     *
     *    [write_header ch ["x","Field\nvalue"; "y","Other\r\n\tvalue"]] outputs:
     * {[ x: Field\r\n
     *  value
     * y: Other\r\n
     * \tvalue]}}
     * {- Empty lines (and lines only consisting of whitespace) are suppressed
     *    if they occur inside the header.
     *
     *    {b Example:}
     *
     *    [write_header ch ["x","Field\n\nvalue"]] outputs:
     * {[ x: Field\r\n
     *  value]}}
     * {- Whitespace at the end of a header field is suppressed. One field
     *    is separated from the next field by printing [eol] once.}
     * }
     *
     * These rules ensure that the printed header will be well-formed with
     * two exceptions:
     * - Long lines (> 72 characters) are neither folded nor rejected
     * - True 8 bit characters are neither properly encoded nor rejected
     *
     * These two problems cannot be addressed without taking the syntax
     * of the header fields into account. See below how to create
     * proper header fields from [s_token] lists.
     *)

(* *********************************************************************)

(** {1:structured_values Parsing Structured Values} *)

(** The following types and functions allow it to build scanners for
 * structured mail and MIME values in a highly configurable way.
 *
 * {b Structured Values}
 *
 * RFC 822 (together with some other RFCs) defines lexical rules
 * how formal mail header values should be divided up into tokens. Formal
 * mail headers are those headers that are formed according to some
 * grammar, e.g. mail addresses or MIME types.
 *
 *    Some of the characters separate phrases of the value; these are
 * the "special" characters. For example, '\@' is normally a special
 * character for mail addresses, because it separates the user name
 * from the domain name (as in [user\@domain]). RFC 822 defines a fixed set
 * of special
 * characters, but other RFCs use different sets. Because of this,
 * the following functions allow it to configure the set of special characters.
 *
 *    Every sequence of characters may be embraced by double quotes,
 * which means that the sequence is meant as literal data item;
 * special characters are not recognized inside a quoted string. You may
 * use the backslash to insert any character (including double quotes)
 * verbatim into the quoted string (e.g. "He said: \"Give it to me!\"").
 * The sequence of a backslash character and another character is called
 * a quoted pair.
 *
 *    Structured values may contain comments. The beginning of a comment
 * is indicated by '(', and the end by ')'. Comments may be nested.
 * Comments may contain quoted pairs. A
 * comment counts as if a space character were written instead of it.
 *
 *    Control characters are the ASCII characters 0 to 31, and 127.
 * RFC 822 demands that mail headers are 7 bit ASCII strings. Because
 * of this, this module also counts the characters 128 to 255 as
 * control characters.
 *
 *    Domain literals are strings embraced by '\[' and '\]'; such literals
 * may contain quoted pairs. Today, domain literals are used to specify
 * IP addresses (rare), e.g. [user\@[192.168.0.44]].
 *
 *    Every character sequence not falling in one of the above categories
 * is an atom (a sequence of non-special and non-control characters).
 * When recognized, atoms may be encoded in a character set different than
 * US-ASCII; such atoms are called encoded words (see RFC 2047).
 *
 * {b Scanning Using the Extended Interface}
 *
 * In order to scan a string containing a structured value, you must first
 * create a [mime_scanner] using the function [create_mime_scanner].
 * The scanner contains the reference to the scanned string, and a 
 * specification how the string is to be scanned. The specification
 * consists of the lists [specials] and [scan_options].
 *
 * The character list [specials] specifies the set of special characters.
 * These are the characters that are not regarded as part of atoms, 
 * because they work as delimiters that separate atoms (like [@] in the
 * above example). In addition to this, when '"', '(', and '\[' are
 * seen as regular characters not delimiting quoted string, comments, and
 * domain literals, respectively, these characters must also be added
 * to [specials]. In detail, these rules apply:
 *
 * {ul
 * {- {b Spaces:}
 *    - If [' '] {i in} [specials]: A space character is returned as [Special ' '].
 *       Note that there may also be an effect on how comments are returned
 *       (see below).
 *    - If [' '] {i not in} [specials]: Spaces are not returned, although
 *      they still delimit atoms.
 *
 *   }
 * {- {b Tabs, CRs, LFs:}
 *    - If ['\t'] {i in} [specials]: A tab character is returned as 
 *      [Special '\t'].
 *    - If ['\t'] {i not in} [specials]: Tabs are not returned, although
 *      they still delimit atoms.
 *    - If ['\r'] {i in} [specials]: A CR character is returned as 
 *      [Special '\r'].
 *    - If ['\r'] {i not in} [specials]: CRs are not returned, although
 *      they still delimit atoms.
 *    - If ['\n'] {i in} [specials]: A LF character is returned as
 *      [Special '\n'].
 *    - If ['\n'] {i not in} [specials]: LFs are not returned, although
 *      they still delimit atoms.
 *
 *   }
 * {- {b Comments:}
 *    {ul
 *    {- If ['('] {i in} [specials]: Comments are not recognized. The 
 *       character '(' is returned as [Special '('].}
 *    {- If ['('] {i not in} [specials]: Comments are recognized. How comments
 *       are returned, depends on the following:
 *       + If [Return_comments] {i in} [scan_options]: Outer comments are
 *         returned as [Comment] (note that inner comments are recognized but
 *         are not returned as tokens)
 *       + If otherwise [' '] {i in} [specials]: Outer comments are returned as
 *         [Special ' ']
 *       + Otherwise: Comments are recognized but not returned at all.
 *
 *       }
 *    }
 *  }
 * {- {b Quoted strings:}
 *    - If ['"'] {i in} [specials]: Quoted strings are not recognized, and
 *      double quotes are returned as [Special '"'].
 *    - If ['"'] {i not in} [specials]: Quoted strings are returned as
 *      [QString] tokens.
 *
 *   }
 * {- {b Domain literals:}
 *    {ul
 *    {- If '\[' {i in} [specials]: Domain literals are not recognized, and
 *       left brackets are returned as [Special] '\['.}
 *    {- If '\[' {i not in} [specials]: Domain literals are returned as
 *       [DomainLiteral] tokens.}
 *    }
 *   }
 * }
 *
 * If recognized, quoted strings are returned as [QString s], where
 * [s] is the string without the embracing quotes, and with already
 * decoded quoted pairs.
 *
 * Control characters [c] are returned as [Control c].
 *
 * If recognized, comments may either be returned as spaces (in the case
 * you are not interested in the contents of comments), or as [Comment] tokens.
 * The contents of comments are not further scanned; you must start a
 * subscanner to analyze comments as structured values.
 *
 * If recognized, domain literals are returned as [DomainLiteral s], where
 * [s] is the literal without brackets, and with decoded quoted pairs.
 *
 * Atoms are returned as [Atom s] where [s] is a longest sequence of
 * atomic characters (all characters which are neither special nor control
 * characters nor delimiters for substructures). If the option
 * [Recognize_encoded_words] is on, atoms which look like encoded words
 * are returned as [EncodedWord] tokens. (Important note: Neither '?' nor
 * '=' must be special in order to enable this functionality.)
 *
 * After the [mime_scanner] has been created, you can scan the tokens by
 * invoking [scan_token] which returns one token at a time, or by invoking
 * [scan_token_list] which returns all following tokens.
 *
 * There are two token types: [s_token] is the base type and is intended to
 * be used for pattern matching. [s_extended_token] is a wrapper that 
 * additionally contains information where the token occurs.
 *
 * {b Scanning Using the Simple Interface}
 *
 * Instead of creating a [mime_scanner] and calling the scan functions,
 * you may also invoke [scan_structured_value]. This function returns the
 * list of tokens directly; however, it is restricted to [s_token].
 * 
 * {b Examples}
 *
 * - Simple address: {[
 * scan_structured_value "user\@domain.com" [ '\@'; '.' ] []
 *   = [ Atom "user"; Special '\@'; Atom "domain"; Special '.'; Atom "com" ]
 * ]}
 * - Spaces are not returned: {[
 * scan_structured_value "user \@ domain . com" [ '\@'; '.' ] []
 *   = [ Atom "user"; Special '\@'; Atom "domain"; Special '.'; Atom "com" ]
 * ]}
 * - Comments are not returned: {[
 * scan_structured_value "user(Do you know him?)\@domain.com" [ '\@'; '.' ] []
 *   = [ Atom "user"; Special '\@'; Atom "domain"; Special '.'; Atom "com" ]
 * ]}
 * - Comments are indicated if requested: {[
 * scan_structured_value "user(Do you know him?)\@domain.com" [ '\@'; '.' ] 
 *     [ Return_comments ]
 *   = [ Atom "user"; Comment; Special '\@'; Atom "domain"; Special '.'; 
 *       Atom "com" ]
 * ]}
 * - Spaces are returned if special: {[
 * scan_structured_value "user (Do you know him?) \@ domain . com" 
 *     [ '\@'; '.'; ' ' ] []
 *   = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '\@'; 
 *       Special ' '; Atom "domain";
 *       Special ' '; Special '.'; Special ' '; Atom "com" ]
 * ]}
 * - Both spaces and comments are requested: {[
 * scan_structured_value "user (Do you know him?) \@ domain . com" 
 *     [ '\@'; '.'; ' ' ] [ Return_comments ]
 *   = [ Atom "user"; Special ' '; Comment; Special ' '; Special '\@'; 
 *       Special ' '; Atom "domain";
 *       Special ' '; Special '.'; Special ' '; Atom "com" ]
 * ]}
 * - Another case: {[
 * scan_structured_value "user \@ domain . com" [ '\@'; '.'; ' ' ] []
 *   = [ Atom "user"; Special ' '; Special '\@'; Special ' '; Atom "domain";
 *       Special ' '; Special '.'; Special ' '; Atom "com" ]
 * ]}
 * - '(' is special: {[
 * scan_structured_value "user(Do you know him?)\@domain.com" ['\@'; '.'; '(']
 *     []
 *   = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
 *       Atom "him?)"; Special '\@'; Atom "domain"; Special '.'; Atom "com" ]
 * ]}
 * - Quoted strings: {[
 * scan_structured_value "\"My.name\"\@domain.com" [ '\@'; '.' ] []
 *   = [ QString "My.name"; Special '\@'; Atom "domain"; Special '.';
 *       Atom "com" ]
 * ]}
 * - Encoded words are not returned: {[
 * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" 
 *     [ ] [ ] 
 *   = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
 * ]}
 * - Encoded words are returned if requested: {[
 * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" 
 *     [ ] [ Recognize_encoded_words ] 
 *   = [ EncodedWord(("ISO-8859-1",""), "Q", "Keld_J=F8rn_Simonsen") ]
 * ]}
 *)



type s_token =
    Atom of string
  | EncodedWord of ((string * string) * string * string)
      (** Args: [((charset,lang),encoding,encoded_word)] *)
  | QString of string
  | Control of char
  | Special of char
  | DomainLiteral of string
  | Comment
  | End
      (** *)
(** A token may be one of:
 * - [QString s]: The quoted string [s], i.e a string between double
 *   quotes. Quoted pairs are already decoded in [s].
 * - [Control c]: The control character [c] (0-31, 127, 128-255)
 * - [Special c]: The special character [c], i.e. a character from 
 *   the [specials] list
 * - [DomainLiteral s]: The bracketed string [s], i.e. a string between
 *   brackets.  Quoted pairs are already decoded in [s].
 * - [Comment]: A string between parentheses. This kind of token is only
 *   generated when the option [Return_comments] is in effect.
 * - [EncodedWord((charset,lang),encoding,encoded_word)]: An RFC-2047 style
 *   encoded word: [charset] is the name of the character set; [lang] is
 *   the language specifier (from RFC 2231) or ""; [encoding] is either
 *   "Q" or "B"; and [encoded_word] is the word encoded in [charset] and
 *   [encoding]. This kind of token is only generated when the option
 *   [Recognize_encoded_words] is in effect (if not, [Atom] is generated
 *   instead).
 * - [Atom s]: A string which is neither quoted not bracketed nor 
 *   written in RFC 2047 notation, and which is not a control or special
 *   character, i.e. the "rest"
 * - [End]: The end of the string
 *)

type s_option =
    No_backslash_escaping
      (** Do not handle backslashes in quoted string and comments as escape
       * characters; backslashes are handled as normal characters.
       * For example: The wrong qstring ["C:\dir\file"] will be returned as
       * [QString "C:\dir\file"] when this option is in effect, and not as
       * [QString "C:dirfile"] as by default. 
       * -- This is a common error in many MIME implementations.
       *)
  | Return_comments
      (** Comments are returned as token [Comment] (unless '(' is included
       * in the list of special characters, in which case comments are
       * not recognized at all).
       * You may get the exact location of the comment by applying
       * [get_pos] and [get_length] to the extended token.
       *)
  | Recognize_encoded_words
      (** Enables that encoded words are recognized and returned as
       * [EncodedWord] instead of [Atom].
       *)



type s_extended_token
  (** An opaque type containing the information of [s_token] plus:
   * - where the token occurs
   * - RFC-2047 access functions
   *)

val get_token : s_extended_token -> s_token
    (** Return the [s_token] within the [s_extended_token] *)

val get_decoded_word : s_extended_token -> string
val get_charset : s_extended_token -> string
    (** Return the decoded word (the contents of the word after decoding the
     * "Q" or "B" representation), and the character set of the decoded word
     * (uppercase).
     *
     * These functions not only work for [EncodedWord]. The function
     * [get_decoded_word] returns for the other kinds of token:
     * - [Atom]: Returns the atom without decoding it
     * - [QString]: Returns the characters inside the double quotes, and
     *   ensures that any quoted pairs are decoded
     * - [Control]: Returns the one-character string
     * - [Special]: Returns the one-character string
     * - [DomainLiteral]: Returns the characters inside the brackets, and
     *   ensures that any quoted pairs are decoded
     * - [Comment]: Returns [""]
     *
     * The function [get_charset] returns ["US-ASCII"] for them.
     *)

val get_language : s_extended_token -> string
    (** Returns the language if the token is an [EncodedWord], and [""] for
     * all other tokens.
     *)

val get_pos : s_extended_token -> int
    (** Return the byte position where the token starts in the string 
     * (the first byte has position 0)
     *)

val get_line : s_extended_token -> int
    (** Return the line number where the token starts (numbering begins
     * usually with 1) 
     *)

val get_column : s_extended_token -> int
    (** Return the column of the line where the token starts (first column
     * is number 0)
     *)

val get_length : s_extended_token -> int
    (** Return the length of the token in bytes *)

val separates_adjacent_encoded_words : s_extended_token -> bool
    (** True iff the current token is white space (i.e. [Special ' '], 
     * [Special '\t'], [Special '\r'] or [Special '\n']) and the last
     * non-white space token was [EncodedWord] and the next non-white
     * space token will be [EncodedWord].
     *
     * The background of this function is that white space between
     * encoded words does not have a meaning, and must be ignored
     * by any application interpreting encoded words.
     *)


type mime_scanner
    (** The opaque type of a scanner for structured values *)

val create_mime_scanner : 
      specials:char list -> 
      scan_options:s_option list -> 
      ?pos:int ->
      ?line:int ->
      ?column:int ->
      string -> 
        mime_scanner
    (** Creates a new [mime_scanner] scanning the passed string.
     *
     * @param specials The list of characters recognized as special characters.
     * @param scan_options The list of global options modifying the behaviour
     *   of the scanner
     * @param pos The position of the byte where the scanner starts in the
     *   passed string. Defaults to 0.
     * @param line The line number of this first byte. Defaults to 1.
     * @param column The column number of this first byte. Default to 0.
     *)

(** Note for [create_mime_scanner]:
 *
 * The optional parameters [pos], [line], [column] are intentionally placed after
 * [scan_options] and before the string argument, so you can specify
 * scanners by partially applying arguments to [create_mime_scanner]
 * which are not yet connected with a particular string:
 * {[
 * let my_scanner_spec = create_mime_scanner my_specials my_options in
 * ...
 * let my_scanner = my_scanner_spec my_string in 
 * ...]}
 *)

val get_pos_of_scanner : mime_scanner -> int
val get_line_of_scanner : mime_scanner -> int
val get_column_of_scanner : mime_scanner -> int
    (** Return the current position, line, and column of a [mime_scanner].
     * The primary purpose of these functions is to simplify switching
     * from one [mime_scanner] to another within a string:
     *
     * {[
     * let scanner1 = create_mime_scanner ... s in
     * ... now scanning some tokens from s using scanner1 ...
     * let scanner2 = create_mime_scanner ... 
     *                  ?pos:(get_pos_of_scanner scanner1)
     *                  ?line:(get_line_of_scanner scanner1)
     *                  ?column:(get_column_of_scanner scanner1)
     *                  s in
     * ... scanning more tokens from s using scanner2 ... ]}
     *
     * {b Restriction:} These functions are not available if the option
     * [Recognize_encoded_words] is on. The reason is that this option
     * enables look-ahead scanning; please use the location of the last
     * scanned token instead.
     *
     * Note: To improve the performance of switching, it is recommended to
     * create scanner specs in advance (see the example [my_scanner_spec]
     * above).
     *)

val scan_token : mime_scanner -> (s_extended_token * s_token)
    (** Returns the next token, or [End] if there is no more token. The 
     * token is returned both as extended and as normal token.
     *)

val scan_token_list : mime_scanner -> (s_extended_token * s_token) list
    (** Returns all following tokens as a list (excluding [End]) *)

val scan_structured_value : string -> char list -> s_option list -> s_token list
    (** This function is included for backwards compatibility, and for all
     * cases not requiring extended tokens.
     *
     * It scans the passed string according to the list of special characters
     * and the list of options, and returns the list of all tokens.
     *)

val specials_rfc822 : char list
val specials_rfc2045 : char list
    (** The sets of special characters defined by the RFCs 822 and 2045.
     *)


(* *********************************************************************)

(* Widely used scanners: *)

(** {1:parsers_for_structured_values Parsing Certain Forms of Structured Values} *)

val scan_encoded_text_value : string -> s_extended_token list
    (** Scans a "text" value. The returned token list contains only
     * [Special], [Atom] and [EncodedWord] tokens. 
     * Spaces, TABs, CRs, LFs are returned (as [Special]) unless
     * they occur between adjacent encoded words in which case
     * they are suppressed. The characters '(', '\[', and '"' are also
     * returned as [Special] tokens, and are not interpreted as delimiters.
     *
     * For instance, this function can be used to scan the "Subject"
     * field of mail messages.
     *)


val scan_value_with_parameters : string -> s_option list ->
                                   (string * (string * string) list)
    (** [let name, params = scan_value_with_parameters s options]:
     * Scans values with annotations like
     *    [name ; p1=v1 ; p2=v2 ; ...]
     * For example, MIME types like "text/plain;charset=ISO-8859-1" can
     * be parsed.
     *
     * The values may or may not be quoted. The characters ";", "=", and
     * even "," are only accepted as part of values when they are quoted.
     * On sytax errors, the function fails.
     *
     * RFC 2231: This function supports some features of this RFC:
     * Continued parameter values are concatenated. For example:
     * 
     * {[
     * Content-Type: message/external-body; access-type=URL;
     *    URL*0="ftp://";
     *    URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar" ]}
     *
     * This is returned as:
     * {["message/external-body", 
     *   [ ("access-type", "URL");
     *     ("URL", "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar") ]
      ) ]}
     *
     * However, encoded parameter values are not handled specially. The
     * parameter
     *   [title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A]
     * would be returned as
     *   [("title*", "us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A")].
     * Use [scan_values_with_parameters_ep] instead (see below).
     *
     * Raises [Failure] on syntax errors.
     *)

type s_param
  (** The type of encoded parameters (RFC 2231) *)

val param_value : s_param -> string
val param_charset : s_param -> string
val param_language : s_param -> string
  (** Return the decoded value of the parameter, the charset (uppercase),
   * and the language.
   * If the charset is not available, [""] will be returned. 
   * If the language is not available, [""] will be returned. 
   *)

val mk_param : ?charset:string -> ?language:string -> string -> s_param
  (** Creates a parameter from a value (in decoded form). The parameter
   * may have a charset and a language.
   *)

val print_s_param : Format.formatter -> s_param -> unit
  (** Prints a parameter to the formatter (as toploop printer) *)


val scan_value_with_parameters_ep : string -> s_option list ->
                                       (string * (string * s_param) list)
    (** [let name, params = scan_value_with_parameters_ep s options]:
     * This version of the scanner copes with encoded parameters according
     * to RFC 2231.
     * Note: "ep" means "encoded parameters".
     *
     * Example:
     *   [doc.html;title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A]
     *
     * The parameter [title] would be returned as:
     * - name is ["title"]
     * - value is ["This is ***fun***"]
     * - charset is ["US-ASCII"]
     * - language is ["en-us"]
     *
     * Raises [Failure] on syntax errors.
     *)

val scan_mime_type : string -> s_option list ->
                       (string * (string * string) list)
    (** [let name, params = scan_mime_type s options]:
     * Scans MIME types like
     *    [text/plain; charset=iso-8859-1]
     * The name of the type and the names of the parameters are converted
     * to lower case.
     *
     * Raises [Failure] on syntax errors.
     *)

val scan_mime_type_ep : string -> s_option list ->
                         (string * (string * s_param) list)
    (** [let name, params = scan_mime_type_ep s options]:
     * This version copes with RFC-2231-encoded parameters.
     *
     * Raises [Failure] on syntax errors.
     *)

val split_mime_type : string -> (string * string)
    (** [let (main_type, sub_type) = split_mime_type content_type]:
     * Splits the MIME type into main and sub type, for example
     * [ split_mime_type "text/plain" = ("text", "plain") ].
     * The returned strings are always lowercase.
     *
     * Raises [Failure] on syntax errors.
     *)

(* *********************************************************************)

(* Write s_token list *)

(** {1:printers_for_structured_values Printing Structured Values} *)

exception Line_too_long
  (** Raised when the hard limit of the line length is exceeded *)

val write_value : 
      ?maxlen1:int ->                   (* Default: no max length *)
      ?maxlen:int ->                    (* Default: no max length *)
      ?hardmaxlen1:int ->               (* Default: no hard max length *)
      ?hardmaxlen:int ->                (* Default: no hard max length *)
      ?fold_qstring:bool ->             (* Default: true *)
      ?fold_literal:bool ->             (* Default: true *)
      ?unused:int ref ->                (* Default: do not store this value *)
      ?hardunused:int ref ->            (* Default: do not store this value *)
      Netchannels.out_obj_channel ->
      s_token list ->
        unit
    (** Writes the list of [s_token] to the [out_obj_channel]. The value
     * is optionally folded into several lines while writing, but this
     * is off by default. To enable folding, pass {b both} [maxlen1] and
     * [maxlen]:
     * The [maxlen1] parameter specifies the length of the first line
     * to write, the [maxlen] parameter specifies the length of the
     * other lines.
     *
     * If enabled, folding tries to ensure that the value is written
     * in several lines that are not longer as specified by 
     * [maxlen1] and [maxlen]. The value is split into lines by inserting
     * "folding space" at certain locations (which is usually a linefeed
     * followed by a space character, see below). The following
     * table specifies between which tokens folding may happen:
     *
     * {[
     *               +=========================================================+
     * 1st   \   2nd | Atom | QString | DLiteral | EncWord | Special | Spec ' '|
     * ==============+======+=========+==========+=========+=========+=========+
     *          Atom | FS   |  FS     |   FS     |   FS    |    -    |    F    |
     *       QString | FS   |  FS     |   FS     |   FS    |    -    |    F    |
     * DomainLiteral | FS   |  FS     |   FS     |   FS    |    -    |    F    |
     *   EncodedWord | FS   |  FS     |   FS     |   FS    |    -    |    F    |
     *       Special | -    |  -      |   -      |   -     |    -    |    F    |
     *   Special ' ' | -    |  -      |   -      |   -     |    -    |    -    |
     * ==============+======+=========+==========+=========+=========+=========+
     *]}
     *
     * The table shows between which two types of tokens a space or a folding
     * space is inserted:
     * - [FS]: folding space
     * - [F]:  linefeed without extra space
     * - [-]:  nothing can be inserted here
     * 
     * Folding space is ["\n "], i.e. only LF, not CRLF is used as end-of-line
     * character. The function [write_header] will convert these LF to CRLF
     * if needed.
     *
     * [Special '\t'] is handled like [Special ' ']. Control characters are just
     * printed, without folding. Comments, however, are substituted by 
     * either space or folding space. The token [End] is ignored.
     *
     * Furthermore, folding may also happen within tokens:
     * - [Atom], [Control], and [Special] are never split up into parts.
     *   They are simply printed.
     * - [EncodedWord]s, however, are reformatted. This especially means:
     *   adjacent encoded words are first concatenated if possible
     *   (same character set, same encoding, same language), and then
     *   split up into several pieces with optimally chosen lengths.
     *   {b Note:} Because this function gets [s_token] as input and not
     *   [s_extended_token], it is not known whether [Special ' '] tokens
     *   (or other whitespace) between adjacent EncodedWords must be
     *   ignored. Because of this, [write_value] only reformats adjacent encoded 
     *   words when there is not any whitespace between them.
     * - [QString] may be split up in a special way unless [fold_qstring]
     *   is set to [false]. For example, ["One Two  Three"] may be split up into
     *   three lines ["One\n Two\n \ Three"]. Because some header fields
     *   explicitly forbid folding of quoted strings, it is possible to
     *   set [~fold_qstring:false] (it is [true] by default).
     *   {b Note:} Software should not rely on that the different types of
     *   whitespace (especially space and TAB) remain intact at the
     *   beginning of a line. Furthermore, it may also happen that 
     *   additional whitespace is added at the end of a line by the
     *   transport layer.
     * - [DomainLiteral]: These are handled like [QString]. The parameter
     *   [~fold_literal:false] turns folding off if it must be prevented,
     *   it is [true] by default.
     * - [Comment]: Comments are effectively omitted! Instead of [Comment],
     *   a space or folding space is printed. However, you can output comments
     *   by passing sequences like [ Special "("; ...; Special ")" ].
     *
     * It is possible to get the actual number of characters back that
     * can still be printed into the last line without making the line
     * too long. Pass an [int ref] as [unused] to get this value (it may
     * be negative!). Pass an
     * [int ref] as [hardunused] to get the number of characters that may
     * be printed until the hard limit is exceeded.
     *
     * The function normally does not fail when a line becomes too long,
     * i.e. it exceeds [maxlen1] or [maxlen].
     * However, it is possible to specify a hard maximum length
     * ([hardmaxlen1] and [hardmaxlen]). If these are exceeded, the function
     * will raise [Line_too_long].
     *
     * For electronic mail, a [maxlen] of 78 and a [hardmaxlen] of 998 is
     * recommended.
     *
     * {b Known Problems:} 
     * - The reformatter for EncodedWords takes into
     *   account that multi-byte characters must not be split up. However,
     *   this works only when the multi-byte character set is known
     *   to [Netconversion]. You can assume that UTF-8 and UTF-16 always
     *   work. If the character set is not known the reformatter may
     *   split the string at wrong positions.
     * - The reformatter for EncodedWords may parse the token, and if
     *   this fails, you will get the exception [Malformed_code].
     *   This is only done in some special cases, however.
     * - The function prints spaces between adjacent atoms. Although
     *   this is allowed in principal, other MIME implementations might fail when
     *   there are spaces at unexpected locations. Workaround: If
     *   no spaces are desired, concatenate adjacent atoms before
     *   passing them to this function.
     *
     * {b Further Tips:}
     * - Pass ~maxlen1:0 and ~maxlen:0 to get shortest lines
     * - Use the reformatter for encoded words! It works well. For
     *   example, to output a long sentence, just wrap it into
     *   {b one} [EncodedWord]. The reformatter takes care to
     *   fold the word into several lines.
     *)

val param_tokens : ?maxlen:int -> (string*s_param) list -> s_token list
    (** Formats a parameter list. For example, 
     * [[ "a", "b"; "c", "d" ]] is transformed to the token sequence
     * corresponding to [; a=b; c=d].
     * If [maxlen] is specified, it is ensured that the individual
     * parameter (e.g. ["a=b;"]) is not longer than [maxlen-1], such that
     * it will fit into a line with maximum length [maxlen].
     * By default, no maximum length is guaranteed.
     * If [maxlen] is passed, or if a parameter specifies a character
     * set or language, the encoding of RFC 2231 will be applied. If these
     * conditions are not met, the parameters will be encoded traditionally.
     *)

val split_uri : string -> s_token list
    (** Splits a long URI according to the algorithm of RFC 2017.
     * The input string must only contain 7 bit characters, and
     * must be, if necessary, already be URL-encoded.
     *)


(* *********************************************************************)

(* Scanners for MIME bodies *)

(** {1:scanning_mime Scanning MIME Messages} *)

val scan_multipart_body : string -> start_pos:int -> end_pos:int -> 
                            boundary:string ->
                            ((string * string) list * string) list
(** [let [params1, value1; params2, value2; ...]
 *   = scan_multipart_body s start_pos end_pos boundary]:
 *
 * Scans the string [s] that is the body of a multipart message.
 * The multipart message begins at position [start_pos] in [s], and 
 * [end_pos] is the position
 * of the character following the message. In [boundary] the boundary string
 * must be passed (this is the "boundary" parameter of the multipart
 * MIME type, e.g. [multipart/mixed;boundary="some string"] ).
 *
 *     The return value is the list of the parts, where each part
 * is returned as pair [(params, value)]. The left component [params]
 * is the list of name/value pairs of the header of the part. The
 * right component is the raw content of the part, i.e. if the part
 * is encoded ("content-transfer-encoding"), the content is returned
 * in the encoded representation. The caller is responsible for decoding
 * the content.
 *
 *     The material before the first boundary and after the last
 * boundary is not returned.
 *
 * {b Multipart Messages}
 *
 * The MIME standard defines a way to group several message parts to
 * a larger message (for E-Mails this technique is known as "attaching"
 * files to messages); these are the so-called multipart messages.
 * Such messages are recognized by the major type string "multipart",
 * e.g. [multipart/mixed] or [multipart/form-data]. Multipart types MUST
 * have a [boundary] parameter because boundaries are essential for the
 * representation.
 *
 *    Multipart messages have a format like (where "_" denotes empty lines):
 * {[
 * ...Header...
 * Content-type: multipart/xyz; boundary="abc"
 * ...Header...
 * _
 * Body begins here ("prologue")
 * --abc
 * ...Header part 1...
 * _
 * ...Body part 1...
 * --abc
 * ...Header part 2...
 * _
 * ...Body part 2
 * --abc
 * ...
 * --abc--
 * Epilogue ]}
 *
 * The parts are separated by boundary lines which begin with "--" and
 * the string passed as boundary parameter. (Note that there may follow
 * arbitrary text on boundary lines after "--abc".) The boundary is
 * chosen such that it does not occur as prefix of any line of the
 * inner parts of the message.
 *
 *     The parts are again MIME messages, with header and body. Note
 * that it is explicitely allowed that the parts are even multipart
 * messages.
 *
 *     The texts before the first boundary and after the last boundary
 * are ignored.
 *
 *     Note that multipart messages as a whole MUST NOT be encoded.
 * Only the PARTS of the messages may be encoded (if they are not
 * multipart messages themselves).
 *
 * Please read RFC 2046 if want to know the gory details of this
 * brain-dead format.
 *)

val scan_multipart_body_and_decode : string -> start_pos:int -> end_pos:int -> 
                                        boundary:string ->
                                        ((string * string) list * string) list
  (** Same as [scan_multipart_body], but decodes the bodies of the parts
   * if they are encoded using the methods "base64" or "quoted printable".
   * Fails, if an unknown encoding is used.
   *)

val scan_multipart_body_from_netstream
    : Netstream.in_obj_stream ->
      boundary:string ->
      create:((string * string) list -> 'a) ->
      add:('a -> Netstream.in_obj_stream -> int -> int -> unit) ->
      stop:('a -> unit) ->
      unit
    (** [scan_multipart_body_from_netstream s boundary create add stop]:
     *
     * Reads the MIME message from the netstream [s] block by block. The
     * parts are delimited by the [boundary].
     *
     * Once a new part is detected and begins, the function [create] is
     * called with the MIME header as argument. The result [p] of this function
     * may be of any type.
     *
     * For every chunk of the part that is being read, the function [add]
     * is invoked: [add p s k n].
     *
     * Here, [p] is the value returned by the [create] invocation for the
     * current part. [s] is the netstream. The current window of [s] contains
     * the read chunk completely; the chunk begins at position [k] of the
     * window (relative to the beginning of the window) and has a length
     * of [n] bytes.
     *
     * When the part has been fully read, the function [stop] is
     * called with [p] as argument.
     *
     * That means, for every part the following is executed:
     * - [let p = create h]
     * - [add p s k1 n1]
     * - [add p s k2 n2]
     * - ...
     * - [add p s kN nN]
     * - [stop p]
     *
     * {b Important Precondition:}
     * - The block size of the netstream [s] must be at least
     *   [String.length boundary + 4]
     *
     * {b Exceptions:}
     * - Exceptions can happen because of ill-formed input, and within
     *   the callbacks of the functions [create], [add], [stop].
     * - If the exception happens while part [p] is being read, and the
     *   [create] function has already been called (successfully), the
     *   [stop] function is also called (you have the chance to close files).
     *   The exception is re-raised after [stop] returns.
     *)

val read_multipart_body : 
      (Netstream.in_obj_stream -> 'a) -> string -> Netstream.in_obj_stream -> 
         'a list
    (** This is the "next generation" multipart message parser. It is 
     * called as follows:
     *
     *   [let parts = read_multipart_body f boundary s]
     *
     * As precondition, the current position of the stream [s] must be at
     * the beginning of the message body. The string [boundary] must
     * be the message boundary (without "--"). The function [f] is called
     * for every message part, and the resulting list [parts] is the
     * concatentation of the values returned by [f]. 
     *
     * The stream passed to [f] is a substream of [s] that begins at the
     * first byte of the header of the message part. The function [f]
     * can read data from the substream as necessary. The substream
     * terminates at the end of the message part. This means that [f] can simply
     * read the data of the substream from the beginning to the end. It is
     * not necessary that [f] reads the substream until EOF, however.
     *
     * After all parts have been read, the trailing material of stream [s] 
     * is skipped until EOF of [s] is reached.
     *)

(* *********************************************************************)

(* Tools to create multipart messages *)

(** {1:helpers_mime Helpers for MIME Messages} *)

val create_boundary : ?random:string list -> ?nr:int -> unit -> string
  (** Creates a boundary string that can be used to separate multipart
   * messages.
   * The string is 63 characters long and has the following "features":
   * - Most of the string consists of the minus character yielding
   *   a clear optical effect
   * - The string contains "=__". This sequence cannot be obtained
   *   by the quoted-printable encoding, so you need not to care whether
   *   strings encoded as quoted-printable contain the boundary.
   * - The string contains "<&>;" which is illegal in HTML, XML, and
   *   SGML.
   * - The string does not contain double quotes or backslashes,
   *   so you can safely put double quotes around it in the MIME header.
   * - The string contains [nr], so you can safely distinguish between
   *   several boundaries occurring in the same MIME body if you 
   *   assign different [nr].
   * - The string contains a hash value composed of the first
   *   256 bytes of all strings passed as [random], and influenced
   *   by the current GC state.
   *)


(* THREAD-SAFETY:
 * The functions are thread-safe as long as the threads do not share
 * values.
 *)