This file is indexed.

/usr/share/doc/libsmlnj-smlnj/HTML/html-lex is in libsmlnj-smlnj 110.78-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
(* html-lex
 *
 * COPYRIGHT (c) 1995 AT&T Bell Laboratories.
 * COPYRIGHT (c) 1996 AT&T Research.
 *
 * A scanner for HTML.
 *
 * TODO:
 *    Recognize the DOCTYPE element
 *	<!DOCTYPE HTML PUBLIC "...">
 *    Clean-up the scanning of start tags (do we need Err?).
 *    Whitespace in PRE elements should be preserved, but how?
 *)

structure T = Tokens
structure Elems = HTMLElementsFn (
  structure Tokens = Tokens
  structure Err = Err
  structure HTMLAttrs = HTMLAttrs)

type pos = int
type svalue = T.svalue
type arg = (((string * int * int) -> unit) * string option)
type ('a, 'b) token = ('a, 'b) T.token
type lexresult= (svalue, pos) token

fun eof _ = Tokens.EOF(0, 0)

(* a buffer for collecting a string piecewise *)
val buffer = ref ([] : string list)
fun addStr s = (buffer := s :: !buffer)
fun getStr () = (String.concat(List.rev(! buffer)) before (buffer := []))

%%

%s COM1 COM2 STAG;

%header (functor HTMLLexFn (
  structure Tokens : HTML_TOKENS
  structure Err : HTML_ERROR
  structure HTMLAttrs : HTML_ATTRS));

%arg (errorFn, file);

%full
%count

alpha=[A-Za-z];
digit=[0-9];
namechar=[-A-Za-z0-9.];
tag=({alpha}{namechar}*);
ws = [\ \t];

%%

<INITIAL>"<"{tag}
	=> (addStr yytext; YYBEGIN STAG; continue());
<STAG>">"
	=> (addStr yytext;
	    YYBEGIN INITIAL;
	    case Elems.startTag file (getStr(), !yylineno, !yylineno)
	     of NONE => continue()
	      | (SOME tag) => tag
	    (* end case *));
<STAG>\n
	=> (addStr " "; continue());
<STAG>{ws}+
	=> (addStr yytext; continue());
<STAG>{namechar}+
	=> (addStr yytext; continue());
<STAG>"="
	=> (addStr yytext; continue());
<STAG>"\""[^\"\n]*"\""
	=> (addStr yytext; continue());
<STAG>"'"[^'\n]*"'"
	=> (addStr yytext; continue());
<STAG>.
	=> (addStr yytext; continue());

<INITIAL>"</"{tag}{ws}*">"
	=> (case Elems.endTag file (yytext, !yylineno, !yylineno)
	     of NONE => continue()
	      | (SOME tag) => tag
	    (* end case *));

<INITIAL>"<!--"
	=> (YYBEGIN COM1; continue());
<COM1>"--"
	=> (YYBEGIN COM2; continue());
<COM1>\n
	=> (continue());
<COM1>.
	=> (continue());
<COM2>"--"
	=> (YYBEGIN COM1; continue());
<COM2>">"
	=> (YYBEGIN INITIAL; continue());
<COM2>\n
	=> (continue());
<COM2>{ws}
	=> (continue());
<COM2>.
	=> (errorFn("bad comment syntax", !yylineno, !yylineno+1);
	    YYBEGIN INITIAL;
	    continue());

<INITIAL>"&#"[A-Za-z]+";"
	=> (
(** At some point, we should support &#SPACE; and &#TAB; **)
	    continue());

<INITIAL>"&#"[0-9]+";"
	=> (T.CHAR_REF(yytext, !yylineno, !yylineno));

<INITIAL>"&"{tag}";"
	=> (T.ENTITY_REF(yytext, !yylineno, !yylineno));

<INITIAL>"\n"
	=> (continue());
<INITIAL>{ws}
	=> (continue());

<INITIAL>[^<]+
	=> (T.PCDATA(yytext, !yylineno, !yylineno));
<INITIAL>.
	=> (errorFn(concat[
		"bogus character #\"", Char.toString(String.sub(yytext, 0)),
		"\" in PCDATA\n"
	      ], !yylineno, !yylineno+1);
	    continue());