This file is indexed.

/usr/share/racket/collects/net/uri-codec.rkt is in racket-common 6.1-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#lang racket/base

#|
People used to wonder why semicolons were the default.  We then
decided to switch the default back to ampersands --

  http://www.w3.org/TR/html401/appendix/notes.html#h-B.2.2

  We recommend that HTTP server implementors, and in particular, CGI
  implementors support the use of ";" in place of "&" to save authors
  the trouble of escaping "&" characters in this manner.

See more in PR8831.
|#

;;; <uri-codec.rkt> ---- En/Decode URLs and form-urlencoded data
;;; Time-stamp: <03/04/25 10:31:31 noel>
;;;
;;; Copyright (C) 2002 by Noel Welsh.
;;;
;;; This file is part of Net.

;;; Net is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
;;; License as published by the Free Software Foundation; either
;;; version 2.1 of the License, or (at your option) any later version.

;;; Net is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.

;;; You should have received a copy of the GNU Lesser General Public
;;; License along with Net; if not, write to the Free Software
;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
;;; 02110-1301 USA.

;;; Author: Noel Welsh <noelwelsh@yahoo.com>
;;
;; Commentary:

;; The module provides functions to encode and decode strings using
;; the URI encoding rules given in RFC 2396, and to encode and decode
;; name/value pairs using the application/x-www-form-urlencoded
;; mimetype given the in HTML 4.0 specification.  There are minor
;; differences between the two encodings.

;; The URI encoding uses allows a few characters to be represented `as
;; is': a-z, A-Z, 0-9, -, _, ., !, ~, *, ', ( and ).  The remaining
;; characters are encoded as %xx, where xx is the hex representation
;; of the integer value of the character (where the mapping
;; character<->integer is determined by US-ASCII if the integer is
;; <128).

;; The encoding, inline with RFC 2396's recommendation, represents a
;; character as is, if possible.  The decoding allows any characters
;; to be represented by their hex values, and allows characters to be
;; incorrectly represented `as is'.

;; The rules for the application/x-www-form-urlencoded mimetype given
;; in the HTML 4.0 spec are:

;;   1. Control names and values are escaped. Space characters are
;;   replaced by `+', and then reserved characters are escaped as
;;   described in [RFC1738], section 2.2: Non-alphanumeric characters
;;   are replaced by `%HH', a percent sign and two hexadecimal digits
;;   representing the ASCII code of the character. Line breaks are
;;   represented as "CR LF" pairs (i.e., `%0D%0A').

;;   2. The control names/values are listed in the order they appear
;;   in the document. The name is separated from the value by `=' and
;;   name/value pairs are separated from each other by `&'.

;; NB: RFC 2396 supersedes RFC 1738.

;; This differs slightly from the straight encoding in RFC 2396 in
;; that `+' is allowed, and represents a space.  We follow this
;; convention, encoding a space as `+' and decoding `+' as a space.
;; There appear to be some brain-dead decoders on the web, so we also
;; encode `!', `~', `'', `(' and ')' using their hex representation.
;; This is the same choice as made by the Java URLEncoder.

;; Draws inspiration from encode-decode.scm by Kurt Normark and a code
;; sample provided by Eli Barzilay

(require racket/string racket/list)

(provide uri-encode                         uri-decode
         uri-path-segment-encode            uri-path-segment-decode
         uri-userinfo-encode                uri-userinfo-decode
         uri-unreserved-encode              uri-unreserved-decode
         uri-path-segment-unreserved-encode uri-path-segment-unreserved-decode
         form-urlencoded-encode             form-urlencoded-decode
         alist->form-urlencoded             form-urlencoded->alist
         current-alist-separator-mode)

(define (self-map-chars str) (map (λ (ch) (cons ch ch)) (string->list str)))

;; The characters that always map to themselves
(define alphanumeric-mapping
  (self-map-chars
   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"))

;; Characters that sometimes map to themselves
;; called 'mark' in RFC 3986
(define safe-mapping (self-map-chars "-_.!~*'()"))

;; The strict URI mapping
(define uri-mapping `(,@alphanumeric-mapping ,@safe-mapping))

;; The uri path segment mapping from RFC 3986
(define path-segment-extra-mapping (self-map-chars "@+,=$&:"))
(define uri-path-segment-mapping `(,@uri-mapping ,@path-segment-extra-mapping))

;; from RFC 3986
(define unreserved-mapping `(,@alphanumeric-mapping ,@(self-map-chars "-._~")))

;; The uri path segment mapping from RFC 3986
(define uri-path-segment-unreserved-mapping
  `(,@unreserved-mapping ,@path-segment-extra-mapping))

;; from RFC 3986
(define sub-delims-mapping (self-map-chars "!$&'()*+,;="))

;; The uri userinfo mapping from RFC 3986
(define uri-userinfo-mapping
  `(,@unreserved-mapping ,@sub-delims-mapping ,@(self-map-chars ":")))

;; The form-urlencoded mapping
(define form-urlencoded-mapping
  `(,@(self-map-chars ".-*_") (#\space . #\+) ,@alphanumeric-mapping))

(define (number->hex-string number)
  (define (hex n) (string-ref "0123456789ABCDEF" n))
  (string #\% (hex (quotient number 16)) (hex (modulo number 16))))

(define ascii-size 128)

;; (listof (cons char char)) -> (values (vectorof string) (vectorof int))
(define (make-codec-tables alist)
  (define encoding-table (build-vector ascii-size number->hex-string))
  (define decoding-table (build-vector ascii-size values))
  (for ([orig+enc (in-list alist)])
    (vector-set! encoding-table
                 (char->integer (car orig+enc))
                 (string (cdr orig+enc)))
    (vector-set! decoding-table
                 (char->integer (cdr orig+enc))
                 (char->integer (car orig+enc))))
  (values encoding-table decoding-table))

;; vector string -> string
(define (encode table str)
  ;; First, check for an ASCII string with no conversion needed:
  (if (for/and ([char (in-string str)])
        (define v (char->integer char))
        (and (< v ascii-size)
             (let ([s (vector-ref table v)])
               (and (= 1 (string-length s))
                    (eq? char (string-ref s 0))))))
      str
      (string-append* (for/list ([byte (in-bytes (string->bytes/utf-8 str))])
                        (if (< byte ascii-size)
                            (vector-ref table byte)
                            (number->hex-string byte))))))

;; vector string -> string
(define (decode table str)
  (define max-ascii (integer->char ascii-size))
  ;; internal-decode : list -> listof[byte]
  (define (internal-decode l)
    (if (null? l) '()
        (let* ([c (car l)] [l (cdr l)]
               [hex (and (equal? #\% c) (pair? l) (pair? (cdr l))
                         (string->number (string (car l) (cadr l)) 16))]
               [rest (internal-decode (if hex (cddr l) l))])
          (cond [hex (cons hex rest)]
                [(char<? c max-ascii) (cons (vector-ref table (char->integer c))
                                            rest)]
                ;; This should probably error, but strings to be decoded might
                ;; come from misbehaving sources; maybe it's better to add some
                ;; parameter for a permissive mode; one source of such bad URLs
                ;; is user-defined strings where the string is entered directly
                ;; and not properly encoded -- similar justification to
                ;; browsers accepting unencoded chars in manually entered URLs.
                [else (append (bytes->list (string->bytes/utf-8 (string c)))
                              rest)]))))
  (bytes->string/utf-8 (apply bytes (internal-decode (string->list str)))))

;; Utility for defining codecs
(define-syntax-rule (define-codecs [encoder decoder mapping] ...)
  (begin (define-values [encoder decoder]
           (let-values ([(v:en v:de) (make-codec-tables mapping)])
             (define (encoder str) (encode v:en str))
             (define (decoder str) (decode v:de str))
             (values encoder decoder)))
         ...))

;; All of these are string -> string
(define-codecs
  [uri-encode uri-decode uri-mapping]
  [uri-path-segment-encode uri-path-segment-decode uri-path-segment-mapping]
  [uri-userinfo-encode uri-userinfo-decode uri-userinfo-mapping]
  [uri-unreserved-encode uri-unreserved-decode unreserved-mapping]
  [uri-path-segment-unreserved-encode uri-path-segment-unreserved-decode
   uri-path-segment-unreserved-mapping]
  [form-urlencoded-encode form-urlencoded-decode form-urlencoded-mapping])

;; listof (cons string string) -> string
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
;; listof (cons symbol string) -> string
(define (alist->form-urlencoded args)
  (string-join
   (for/list ([arg (in-list args)])
     (define name  (form-urlencoded-encode (symbol->string (car arg))))
     (define value (and (cdr arg) (form-urlencoded-encode (cdr arg))))
     (if value (string-append name "=" value) name))
   (if (memq (current-alist-separator-mode) '(semi semi-or-amp)) ";" "&")))

;; string -> listof (cons string string)
;; http://www.w3.org/TR/html401/appendix/notes.html#ampersands-in-uris
(define (form-urlencoded->alist str)
  (define sep-regexp
    (case (current-alist-separator-mode)
      [(semi) #rx"[;]"]
      [(amp) #rx"[&]"]
      [else #rx"[&;]"]))
  (if (equal? "" str) '()
      (for/list ([keyval (in-list (regexp-split sep-regexp str))])
        ;; m = #f => no "=..." part
        (define m (regexp-match-positions #rx"=" keyval))
        (cons (string->symbol (form-urlencoded-decode
                               (if m (substring keyval 0 (caar m)) keyval)))
              (and m (form-urlencoded-decode (substring keyval (cdar m))))))))

(define current-alist-separator-mode
  (make-parameter 'amp-or-semi
    (lambda (s)
      (unless (memq s '(amp semi amp-or-semi semi-or-amp))
        (raise-type-error 'current-alist-separator-mode
                          "'amp, 'semi, 'amp-or-semi, or 'semi-or-amp"
                          s))
      s)))

;;; uri-codec.rkt ends here