/usr/lib/mlton/sml/mllpt-lib/ulex-buffer.sml is in mlton-basis 20130715-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | (* ulex-buffer.sml
*
* COPYRIGHT (c) 2006
* John Reppy (http://www.cs.uchicago.edu/~jhr)
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
* All rights reserved.
*
* Forward-chained buffers for lexing
*)
structure ULexBuffer : sig
type stream
exception Incomplete (* raised by getu on an incomplete multi-byte character *)
val mkStream : (AntlrStreamPos.pos * (unit -> string)) -> stream
val getc : stream -> (char * stream) option
val getu : stream -> (word * stream) option
val getpos : stream -> AntlrStreamPos.pos
val subtract : stream * stream -> Substring.substring
val eof : stream -> bool
val lastWasNL : stream -> bool
end = struct
structure W = Word
datatype stream = S of (buf * int * bool)
and buf = B of {
data : string,
basePos : AntlrStreamPos.pos,
more : more ref,
input : unit -> string
}
and more = UNKNOWN | YES of buf | NO
fun mkStream (pos, input) =
(S (B {data = "", basePos = pos,
more = ref UNKNOWN,
input = input},
0, true))
(* advance the stream to the next block of input *)
fun advance (data, input, basePos, more) = (case !more
of UNKNOWN => (case input()
of "" => (more := NO; NO)
| data' => let
val buf' = B {
data = data',
basePos = AntlrStreamPos.forward (basePos, String.size data),
more = ref UNKNOWN,
input = input
}
in
more := YES buf';
YES buf'
end
(* end case *))
| m => m
(* end case *))
fun getc (S(buf as B{data, basePos, more, input}, pos, lastWasNL)) =
if pos < String.size data
then let
val c = String.sub (data, pos)
in
SOME (c, S (buf, pos+1, c = #"\n"))
end
else (case advance(data, input, basePos, more)
of NO => NONE
| YES buf' => getc (S (buf', 0, lastWasNL))
| UNKNOWN => raise Fail "impossible"
(* end case *))
exception Incomplete
(* get the next UTF8 character represented as a word *)
fun getu (S(buf as B{data, basePos, more, input}, pos, _)) =
if pos < String.size data
then let
val c = W.fromInt(Char.ord(String.sub(data, pos)))
in
if (c < 0w128)
then SOME(c, S(buf, pos+1, c = 0w10)) (* ord #"\n" = 10 *)
else let (* multibyte character *)
fun getByte (S(buf as B{data, basePos, more, input}, pos, _)) =
if pos < String.size data
then let
val c = W.fromInt(Char.ord(String.sub(data, pos)))
in
SOME (c, S (buf, pos+1, false))
end
else (case advance(data, input, basePos, more)
of NO => NONE
| YES buf' => getByte (S (buf', 0, false))
| UNKNOWN => raise Fail "impossible"
(* end case *))
fun getContByte (wc, strm) = (case getByte strm
of NONE => raise Incomplete
| SOME(b, strm') => if (W.andb(0wxc0, b) = 0wx80)
then (W.orb(W.<<(wc, 0w6), W.andb(0wx3f, b)), strm')
else raise Incomplete
(* end case *))
val strm = S(buf, pos+1, false)
in
case (W.andb(0wxe0, c))
of 0wxc0 => SOME(getContByte (W.andb(0wx1f, c), strm))
| 0wxe0 => SOME(getContByte(getContByte(W.andb(0wx0f, c), strm)))
| _ => raise Incomplete
(* end case *)
end
end
(* advance buffer *)
else (case advance(data, input, basePos, more)
of NO => NONE
| YES buf' => getu (S(buf', 0, false))
| UNKNOWN => raise Fail "impossible"
(* end case *))
fun getpos (S (B {basePos, ...}, pos, _)) = AntlrStreamPos.forward (basePos, pos)
fun subtract (new, old) = let
val (S (B {data = ndata, basePos = nbasePos, ...}, npos, _)) = new
val (S (B {data = odata, basePos = obasePos,
more, input}, opos, _)) = old
in
if nbasePos = obasePos then
Substring.substring (ndata, opos, npos-opos)
else case !more
of NO => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated"
| UNKNOWN => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated"
| YES buf =>
Substring.extract (
Substring.concat [
Substring.extract (odata, opos, NONE),
subtract (new, S (buf, 0, false))],
0, NONE)
end
fun eof s = not (isSome (getc s))
fun lastWasNL (S (_, _, lastWasNL)) = lastWasNL
end
|