This file is indexed.

/usr/lib/ocaml/netstring/netulex.mli is in libocamlnet-ocaml-dev 4.0.4-1build3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
(* $Id: netulex.mli 1003 2006-09-24 15:17:15Z gerd $
 * ----------------------------------------------------------------------
 * PXP: The polymorphic XML parser for Objective Caml.
 * Copyright by Gerd Stolpmann. See LICENSE for details.
 *)

(** Support module for Alain Frisch's [ulex] lexer generator
 *
 * The sub module [ULB] is a Unicode-based lexing buffer that
 * reads encoded strings and makes them available to the lexer
 * as both Unicode arrays and UTF-8 strings.
 *
 * The sub module [Ulexing] is a replacement for the module
 * in [ulex] with the same name. It uses [ULB] to represent
 * the main lexing buffer. It is much faster than the original
 * [Ulexing] implementation when the scanned text is UTF-8
 * encoded and [Ulexing.utf8_lexeme] is frequently called to
 * get the lexeme strings. Furthermore, it can process input
 * data of all encodings available to [Netconversion]. It is,
 * however, no drop-in replacement as it has a different
 * signature.
 *
 * To enable this version of [Ulexing], simply put an
 * [open Netulex] before using the [ulex] lexers.
 *
 * Note that the tutorial has been moved to {!Netulex_tut}.
 *)

(** {1:modules Modules} *)


module ULB : sig
  (** This module provides the [unicode_lexbuf] record with
   * access functions. In this record, the data is available
   * in two forms: As an array of Unicode code points
   * [ulb_chars], and as string of encoded chars [ulb_rawbuf].
   * Both buffers are synchronised by [ulb_chars_pos]. This
   * array stores where every character of [ulb_chars] can be
   * found in [ulb_rawbuf].
   *)

  type unicode_lexbuf =
      private
	{ mutable ulb_encoding : Netconversion.encoding;
                (** The character encoding of [ulb_rawbuf] *)
	  mutable ulb_encoding_start : int;
	        (** The first character position to which [ulb_encoding]
		 * applies (the encoding of earlier positions is
		 * lost)
		 *)
	  mutable ulb_rawbuf : string;
                (** The encoded string to analyse *)
	  mutable ulb_rawbuf_len : int;
                (** The filled part of [ulb_rawbuf] *)
	  mutable ulb_rawbuf_end : int;
                (** The analysed part of [ulb_rawbuf]. We have always
	         * [ulb_rawbuf_end <= ulb_rawbuf_len]. The analysed part
	         * may be shorter than the filled part because there is
	         * not enough space in [ulb_chars], or because the filled
	         * part ends with an incomplete multi-byte character
	         *)
	  mutable ulb_rawbuf_const : bool;
	        (** Whether [ulb_rawbuf] is considered as a constant. If
		 * [true], it is never blitted.
		 *)
	  mutable ulb_chars : int array;
                (** The analysed part of [ulb_rawbuf] as array of Unicode
	         * code points. Only the positions 0 to [ulb_chars_len-1]
	         * of the array are filled.
	         *)
	  mutable ulb_chars_pos : int array;
                (** For every analysed character this array stores the
   	         * byte position where the character begins in [ulb_rawbuf].
	         * In addition, the array contains at [ulb_chars_len] the
	         * value of [ulb_rawbuf_end].
		 *
	         * This array is one element longer than [ulb_chars].
	         *)
	  mutable ulb_chars_len : int;
                (** The filled part of [ulb_chars] *)
	  mutable ulb_eof : bool;
                (** Whether EOF has been seen *)
	  mutable ulb_refill : string -> int -> int -> int;
	        (** The refill function *)
	  mutable ulb_enc_change_hook : unicode_lexbuf -> unit;
	        (** This function is called when the encoding changes *)
	  mutable ulb_cursor : Netconversion.cursor;
	        (** Internally used by the implementation *)
	}

  val from_function : 
    ?raw_size:int -> 
    ?char_size:int -> 
    ?enc_change_hook:(unicode_lexbuf -> unit) ->
    refill:(string -> int -> int -> int) ->
    Netconversion.encoding -> 
      unicode_lexbuf
  (** Creates a [unicode_lexbuf] to analyse strings of the 
   * passed [encoding] coming from the [refill] function.
   *
   * @param raw_size The initial size for [ulb_rawbuf]. Defaults to 512
   * @param char_size The initial size for [ulb_chars]. Defaults to 256
   * @param enc_change_hook This function is called when the encoding
   *   is changed, either by this module, or by the user calling
   *   [set_encoding].
   * @param refill This function is called with arguments [ulb_rawbuf],
   *   [ulb_rawbuf_len], and [l], where 
   *   [l = String.length ulb_rawbuf - ulb_rawbuf_len] is the free
   *   space in the buffer. The function should fill new bytes into
   *   this substring, and return the number of added bytes. The
   *   return value 0 signals EOF.
   *)

  val from_in_obj_channel :
    ?raw_size:int -> 
    ?char_size:int -> 
    ?enc_change_hook:(unicode_lexbuf -> unit) ->
    Netconversion.encoding -> 
    Netchannels.in_obj_channel ->
      unicode_lexbuf
  (** Creates a [unicode_lexbuf] to analyse strings of the 
   * passed [encoding] coming from the object channel.
   *
   * @param raw_size The initial size for [ulb_rawbuf]. Defaults to 512
   * @param char_size The initial size for [ulb_chars]. Defaults to 256
   * @param enc_change_hook This function is called when the encoding
   *   is changed, either by this module, or by the user calling
   *   [set_encoding].
   *)

  val from_string :
        ?enc_change_hook:(unicode_lexbuf -> unit) ->
        Netconversion.encoding -> string -> unicode_lexbuf
  (** Creates a [unicode_lexbuf] analysing the passed string encoded in
   * the passed encoding. This function copies the input string.
   *
   * @param enc_change_hook This function is called when the encoding
   *   is changed, either by this module, or by the user calling
   *   [set_encoding]
   *)

  val from_string_inplace :
        ?enc_change_hook:(unicode_lexbuf -> unit) ->
        Netconversion.encoding -> string -> unicode_lexbuf
  (** Creates a [unicode_lexbuf] analysing the passed string encoded in
   * the passed encoding. This function does not copy the input string,
   * but uses it directly as [ulb_rawbuf]. The string is not modified by [ULB],
   * but the caller must ensure that other program parts do not
   * modify it either.
   *
   * @param enc_change_hook This function is called when the encoding
   *   is changed, either by this module, or by the user calling
   *   [set_encoding]
   *)

  val delete :
        int -> unicode_lexbuf -> unit
  (** Deletes the number of characters from [unicode_lexbuf].
   *  These characters
   *  are removed from the beginning of the buffer, i.e.
   *  [ulb_chars.(n)] becomes the new first character of the
   *  buffer. All three buffers [ulb_rawbuf], [ulb_chars], and
   *  [ulb_chars_pos] are blitted as necessary.       
   *
   *  When the buffer is already at EOF, the function fails.
   *
   *  For efficiency, it should be tried to call [delete] as seldom as
   *  possible. Its speed is linear to the number of characters to move.
   *)

  val refill :
	unicode_lexbuf ->
	  unit
  (** Tries to add characters to the [unicode_lexbuf] by calling the 
   * [ulb_refill] function. When the buffer is already at EOF, the 
   * exception [End_of_file] is raised, and the buffer is not modified.
   * Otherwise, the [ulb_refill] function is called to
   * add new characters. If necessary, [ulb_rawbuf], [ulb_chars], and
   * [ulb_chars_pos] are enlarged such that it is ensured that either
   * at least one new character is added, or that EOF is found for
   * the first time
   * In the latter case, [ulb_eof] is set to [true] (and the next call
   * of [refill_unicode_lexbuf] will raise [End_of_file]).
   *)

  val set_encoding :
        Netconversion.encoding -> unicode_lexbuf -> unit
   (** Sets the [encoding] to the passed value. This only affects future
    * [refill] calls. The hook [enc_change_hook] is invoked when defined.
    *)

  val close :
        unicode_lexbuf -> unit
  (** Sets [ulb_eof] of the [unicode_lexbuf]. The rest of the buffer
   * is not modified
   *)

  val utf8_sub_string : int -> int -> unicode_lexbuf -> string
    (** The two [int] arguments are the position and length of a sub
     * string of the lexbuf that is returned as UTF8 string. Position
     * and length are given as character multiples, not byte multiples.
     *)

  val utf8_sub_string_length : int -> int -> unicode_lexbuf -> int
    (** Returns [String.length(utf8_sub_string args)]. Tries not to
     * allocate the UTF-8 string.
     *)

end (* module ULB *)


module Ulexing : sig
  (** This is a lexing buffer for [ulex]. *)

  type lexbuf
  exception Error
    (** Lexical error *)

  val from_ulb_lexbuf : ULB.unicode_lexbuf -> lexbuf
    (** Creates a new [lexbuf] from the [unicode_lexbuf]. After that,
     * the [unicode_lexbuf] must no longer be modified.
     *)
  val lexeme_start: lexbuf -> int
    (** The character position of the start of the lexeme *)
  val lexeme_end: lexbuf -> int
    (** The character position of the end of the lexeme *)
  val lexeme_length: lexbuf -> int
    (** The length of the lexeme in characters *)
  val lexeme: lexbuf -> int array
    (** Returns the lexeme as array of Unicode code points *)
  val lexeme_char: lexbuf -> int -> int
    (** Returns the code point of a certain character of the
     * lexeme
     *)
  val sub_lexeme: lexbuf -> int -> int -> int array
    (** Returns a substring of the lexeme as array of Unicode
     * code points. The first [int] is the characater position
     * where to start, the second [int] is the number of 
     * characters.
     *)
  val utf8_lexeme: lexbuf -> string
    (** Returns the lexeme as UTF-8 encoded string *)
  val utf8_sub_lexeme: lexbuf -> int -> int -> string
    (** Returns a substring of the lexeme as UTF-8 encoded
     * string. The first [int] is the characater position
     * where to start, the second [int] is the number of
     * characters.
     *)              
  val utf8_sub_lexeme_length: lexbuf -> int -> int -> int
    (** Same as
     * String.length(utf8_sub_lexeme args), i.e. returns
     * the number of bytes a certain sub lexeme will have
     * when encoded as UTF-8 string.
     *)
    
  (**/**)

  (* "Internal" interface. This must match ulex's ones. *)
  val start: lexbuf -> unit
  val next: lexbuf -> int
  val mark: lexbuf -> int -> unit
  val backtrack: lexbuf -> int
end