This file is indexed.

/usr/lib/mlton/sml/mllpt-lib/ulex-buffer.sml is in mlton-basis 20130715-3.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
(* ulex-buffer.sml
 *
 * COPYRIGHT (c) 2006
 * John Reppy (http://www.cs.uchicago.edu/~jhr)
 * Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
 * All rights reserved.
 *
 * Forward-chained buffers for lexing
 *)

structure ULexBuffer : sig

    type stream

    exception Incomplete	(* raised by getu on an incomplete multi-byte character *)

    val mkStream : (AntlrStreamPos.pos * (unit -> string)) -> stream
    val getc : stream -> (char * stream) option
    val getu : stream -> (word * stream) option
    val getpos : stream -> AntlrStreamPos.pos
    val subtract : stream * stream -> Substring.substring
    val eof : stream -> bool
    val lastWasNL : stream -> bool

  end = struct

    structure W = Word

    datatype stream = S of (buf * int * bool) 
    and buf = B of { 
      data : string,
      basePos : AntlrStreamPos.pos,
      more : more ref,
      input : unit -> string
    }
    and more = UNKNOWN | YES of buf | NO
        
    fun mkStream (pos, input) = 
	  (S (B {data = "", basePos = pos, 
		 more = ref UNKNOWN,
		 input = input},
	      0, true))

  (* advance the stream to the next block of input *)
    fun advance (data, input, basePos, more) = (case !more
	   of UNKNOWN => (case input()
		 of "" => (more := NO; NO)
		  | data' => let 
		      val buf' = B {
			  data = data',
			  basePos = AntlrStreamPos.forward (basePos, String.size data),
			  more = ref UNKNOWN,
			  input = input
			}
		      in
			more := YES buf';
			YES buf'
		      end
		(* end case *))
	    | m => m
	  (* end case *))

    fun getc (S(buf as B{data, basePos, more, input}, pos, lastWasNL)) = 
	  if pos < String.size data
	    then let
	      val c = String.sub (data, pos)
	      in
		SOME (c, S (buf, pos+1, c = #"\n"))
	      end
	    else (case advance(data, input, basePos, more)
	       of NO => NONE
		| YES buf' => getc (S (buf', 0, lastWasNL))
		| UNKNOWN => raise Fail "impossible"
	      (* end case *))

    exception Incomplete

  (* get the next UTF8 character represented as a word *)
    fun getu (S(buf as B{data, basePos, more, input}, pos, _)) =
	  if pos < String.size data
	    then let
	      val c = W.fromInt(Char.ord(String.sub(data, pos)))
	      in
		if (c < 0w128)
		  then SOME(c, S(buf, pos+1, c = 0w10))  (* ord #"\n" = 10 *)
		  else let (* multibyte character *)
		    fun getByte (S(buf as B{data, basePos, more, input}, pos, _)) = 
			  if pos < String.size data
			    then let
			      val c = W.fromInt(Char.ord(String.sub(data, pos)))
			      in
				SOME (c, S (buf, pos+1, false))
			      end
			    else (case advance(data, input, basePos, more)
			       of NO => NONE
				| YES buf' => getByte (S (buf', 0, false))
				| UNKNOWN => raise Fail "impossible"
			      (* end case *))
		    fun getContByte (wc, strm) = (case getByte strm
			   of NONE => raise Incomplete
			    | SOME(b, strm') => if (W.andb(0wxc0, b) = 0wx80)
				then (W.orb(W.<<(wc, 0w6), W.andb(0wx3f, b)), strm')
				else raise Incomplete
			  (* end case *))
		    val strm = S(buf, pos+1, false)
		    in
		      case (W.andb(0wxe0, c))
		       of 0wxc0 => SOME(getContByte (W.andb(0wx1f, c), strm))
			| 0wxe0 => SOME(getContByte(getContByte(W.andb(0wx0f, c), strm)))
			| _ => raise Incomplete
		      (* end case *)
		    end
	      end
	  (* advance buffer *)
	    else (case advance(data, input, basePos, more)
	       of NO => NONE
		| YES buf' => getu (S(buf', 0, false))
		| UNKNOWN => raise Fail "impossible"
	      (* end case *))

    fun getpos (S (B {basePos, ...}, pos, _)) = AntlrStreamPos.forward (basePos, pos)

    fun subtract (new, old) = let
	  val (S (B {data = ndata, basePos = nbasePos, ...}, npos, _)) = new
	  val (S (B {data = odata, basePos = obasePos, 
		     more, input}, opos, _)) = old
	  in
	    if nbasePos = obasePos then
	      Substring.substring (ndata, opos, npos-opos)
	    else case !more
		  of NO =>      raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated"
		   | UNKNOWN => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated"
		   | YES buf => 
		       Substring.extract (
			 Substring.concat [
			   Substring.extract (odata, opos, NONE),
			   subtract (new, S (buf, 0, false))],
			 0, NONE)
	  end

    fun eof s = not (isSome (getc s))

    fun lastWasNL (S (_, _, lastWasNL)) = lastWasNL

  end