/usr/share/scheme48-1.9/r6rs/bytevector-string.scm is in scheme48 1.9-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | ; Part of Scheme 48 1.9. See file COPYING for notices and license.
; Authors: Harald Glab-Phlak, Mike Sperber
(define (string->utf8 string)
(enc:string->utf-8 string))
; If the bytevector begins with the three-byte sequence #xef #xbb
; #xbf, then those bytes are ignored. (They are conventionally used
; as a signature to indicate UTF-8 encoding. The string->utf8
; procedure does not emit those bytes, but UTF-8 encodings produced by
; other sources may contain them.)
(define (replacement-character)
(integer->char #xfffd))
(define (begins-with-utf8-bom? bv)
(let* ((n (bytevector-length bv)))
(and (<= 3 n)
(= #xef (bytevector-u8-ref bv 0))
(= #xbb (bytevector-u8-ref bv 1))
(= #xbf (bytevector-u8-ref bv 2)))))
(define (utf8->string bv)
(if (begins-with-utf8-bom? bv)
(let ((start 3)
(count (- (bytevector-length bv) 3)))
(enc:utf-8->string-n bv start count (replacement-character)))
(enc:utf-8->string bv (replacement-character))))
(define string->utf16
(opt-lambda (string (endness #f))
(let ((text-codec
(case endness
((#f big) utf-16be-codec)
((little) utf-16le-codec)
(else (endianness-violation 'string->utf16 endness)))))
(enc:string->bytes text-codec string))))
(define (maybe-utf16-bom bytevector n)
(and (<= 2 n)
(let ((b0 (bytevector-u8-ref bytevector 0))
(b1 (bytevector-u8-ref bytevector 1)))
(or (and (= b0 #xfe) (= b1 #xff) (endianness big))
(and (= b0 #xff) (= b1 #xfe) (endianness little))))))
(define utf16->string
(opt-lambda (bytevector endness (endianness-mandatory? #f))
(let ((n (bytevector-length bytevector)))
(call-with-values
(lambda ()
(cond
(endianness-mandatory? (values endness 0))
((maybe-utf16-bom bytevector n)
=> (lambda (endness)
(values endness 2)))
(else (values endness 0))))
(lambda (endness start)
(let ((text-codec (case endness
((big) utf-16be-codec)
((little) utf-16le-codec)
(else
(endianness-violation 'utf16->string endness))))
(conv-len (- n start)))
(if (not (zero? (remainder n 2)))
(assertion-violation 'utf16->string "Bytevector has bad length." bytevector))
(enc:bytes->string-n text-codec bytevector start conv-len (replacement-character))))))))
; There is no utf-32-codec, so we can't use textual i/o for this.
(define string->utf32
(opt-lambda (string (endness #f))
(let ((text-codec (case endness
((#f big) utf-32be-codec)
((little) utf-32le-codec)
(else (endianness-violation 'string->utf32 endness)))))
(enc:string->bytes text-codec string))))
(define (maybe-utf32-bom bytevector n)
(and (<= 4 n)
(let ((b0 (bytevector-u8-ref bytevector 0))
(b1 (bytevector-u8-ref bytevector 1))
(b2 (bytevector-u8-ref bytevector 2))
(b3 (bytevector-u8-ref bytevector 3)))
(or (and (= b0 0) (= b1 0) (= b2 #xfe) (= b3 #xff)
(endianness big))
(and (= b0 #xff) (= b1 #xfe) (= b2 0) (= b3 0)
(endianness little))))))
(define utf32->string
(opt-lambda (bytevector endness (endianness-mandatory? #f))
(let ((n (bytevector-length bytevector)))
(call-with-values
(lambda ()
(cond
(endianness-mandatory? (values endness 0))
((maybe-utf32-bom bytevector n)
=> (lambda (endness)
(values endness 4)))
(else (values endness 0))))
(lambda (endness start)
(let ((text-codec (case endness
((big) utf-32be-codec)
((little) utf-32le-codec)
(else
(endianness-violation 'utf32->string endness))))
(conv-len (- n start)))
(if (not (zero? (remainder n 4)))
(assertion-violation 'utf32->string "Bytevector has bad length." bytevector))
(enc:bytes->string-n text-codec bytevector start conv-len (replacement-character))))))))
(define (endianness-violation who what)
(assertion-violation who "bad endianness" what))
|