/usr/share/guile/site/sxml/unicode.scm is in guile-library 0.2.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | ;; (sxml unicode) -- rendering unicode to byte strings
;; Copyright (C) 2008 Andy Wingo <wingo at pobox dot com>
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;
;; Routines for encoding unicode codepoints into byte strings.
;;
;;; Code:
(define-module (sxml unicode)
#:export (unichar->utf-8))
(define (unichar->utf-8 u)
(define (byte header mask shift)
(integer->char (logior header (logand mask (ash u shift)))))
(cond
((< u #x000000) (error "bad unicode code point" u))
((< u #x000080) (string (integer->char u)))
((< u #x000800) (string (byte #b11000000 #b11111 -6)
(byte #b10000000 #b111111 0)))
((< u #x00d800) (string (byte #b11100000 #b1111 -12)
(byte #b10000000 #b111111 -6)
(byte #b10000000 #b111111 0)))
((< u #x00e000) (error "bad unicode code point" u))
((< u #x010000) (string (byte #b11100000 #b1111 -12)
(byte #b10000000 #b111111 -6)
(byte #b10000000 #b111111 0)))
((< u #x110000) (string (byte #b11110000 #b111 -18)
(byte #b10000000 #b111111 -12)
(byte #b10000000 #b111111 -6)
(byte #b10000000 #b111111 0)))
(else (error "bad unicode code point" u))))
|