This file is indexed.

/usr/share/ada/adainclude/xmlada/unicode-ces-utf8.ads is in libxmlada5-dev 4.4.2014-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
------------------------------------------------------------------------------
--                     XML/Ada - An XML suite for Ada95                     --
--                                                                          --
--                     Copyright (C) 2001-2014, AdaCore                     --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
-- You should have received a copy of the GNU General Public License along  --
-- with this program;  see the file COPYING3.  If not, see                  --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

--  This package provides support for Utf8 encoding of characters.
--
--  Characters whose code is less than 128 are encoded as is in the
--  Utf8_String. As a result, such a string is compatible with a standard
--  String whose characters are all standard ASCII (and contains no
--  extended ASCII characters).
--  In that, one of the beauties of UTF-8 (and UTF-16) is that there is no
--  overlap, as opposed to what happens with other encodings. If you search
--  for an ASCII character within a Utf8_String, using the standard string
--  string or array manipulation functions, you will only find that character,
--  and not part of a longer sequence that encodes another character.
--  As a result, all the standard string-manipulation functions will work
--  as is (note however that the 'Length attribute doesn't represent the
--  number of characters in the string, but the number of bytes).
--
--  However, since characters can be encoded on one to six bytes, this means
--  that traversing a string is not as efficient as with other encodings.
--
--  Also, this encoding is not subject to byte-ordering constraints, since this
--  is only a sequence of bytes. It is self-synchronizing, in that you can
--  start anywhere in the string and find a synchronization point easily.

with Unicode.CES.Utf32;
with Unicode.CCS;
with Unchecked_Deallocation;

package Unicode.CES.Utf8 is

   -----------
   -- Types --
   -----------

   subtype Utf8_String is String;
   type Utf8_String_Access is access all Utf8_String;
   --  An UTF8-encoded string.

   -------------------------------------------
   -- Conversion to and from byte sequences --
   -------------------------------------------

   procedure Encode
     (Char   : Unicode_Char;
      Output : in out Byte_Sequence;
      Index  : in out Natural);
   --  Set the byte sequence representing Char in the Utf8 character encoding.
   --  There must remain at least 6 characters in Output if you want to avoid
   --  Constraint_Errors.

   procedure Read
     (Str   : Utf8_String;
      Index : in out Positive;
      Char  : out Unicode_Char);
   --  Return the character starting at location Index in Str, and move Index
   --  to the beginning of the next location
   --  Invalid_Encoding is raised if not valid byte sequence starts at Index.
   --  Incomplete_Encoding is raised if there is not enough characters for
   --  a valid encoding.

   function Width (Char : Unicode_Char) return Natural;
   pragma Inline (Width);
   --  Return the number of bytes occupied by the Utf8 representation of Char

   function Length (Str : Utf8_String) return Natural;
   --  Return the number of characters in Str

   function Utf8_Length (Str : Utf8_String) return Natural renames Length;
   --  Return the number of characters in Str

   function Utf8_Next_Char
      (Str : Utf8_String; Index : Natural) return Natural;
   pragma Inline (Utf8_Next_Char);
   --  Return the location of the next character in Str.
   --  Index must point to the beginning of a character.

   function Utf8_Prev_Char
      (Str : Utf8_String; Index : Natural) return Natural;
   pragma Inline (Utf8_Prev_Char);
   --  Return the start index of the rightmost UTF-8 sequence starting
   --  strictly before Index.
   --  If Index is the start index of an UTF-8 sequence, this returns the
   --  start index of the previous UTF-8 sequence.
   --  If Index falls in the middle of an UTF-8 sequence, this returns the
   --  start index of that sequence.

   procedure Utf8_Get_Char
      (Str : Utf8_String; Index : in out Positive; Char : out Unicode_Char);
   pragma Inline (Utf8_Get_Char);
   --  Similar to read, but sets Char to Unicode_Char'Last in case of
   --  invalid encoding.

   function Utf8_Find_Next_Char
      (Str : Utf8_String; Index : Natural) return Natural;
   pragma Inline (Utf8_Find_Next_Char);
   --  Finds the start of the next UTF8 character in the string after Index.
   --  Index does not have to be at the beginning of a UTF8 character.
   --  If you know you are at the beginning of a UTF8 character, it is more
   --  efficient to use Utf8_Next_Char.

   -------------------------------------------
   -- Conversion to and from Utf32-encoding --
   -------------------------------------------

   function From_Utf32 (Str : Unicode.CES.Utf32.Utf32_LE_String)
      return Utf8_String;
   --  Return a new Utf8-encoded string, from a Utf32-encoded string.

   function To_Utf32 (Str : Utf8_String)
      return Unicode.CES.Utf32.Utf32_LE_String;
   --  Return a new Utf32-encoded string, from a Utf8-encoded string.

   ---------------------------
   -- Byte order conversion --
   ---------------------------

   function To_Unicode_LE
     (Str   : Utf8_String;
      Cs    : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
      Order : Byte_Order := Default_Byte_Order) return Utf8_String;
   --  Convert str (character set is CS) to a Unicode
   --  little-endian byte-sequence
   --  If Str contains a BOM that indicates an encoding other than Utf8,
   --  Invalid_Encoding is raised.
   --  Order is irrelevant for utf8, but is kept for interface compatibility
   --  with other similar functions.

   function To_CS
     (Str   : Utf8_String;
      Cs    : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
      Order : Byte_Order := Default_Byte_Order) return Utf8_String;
   --  Convert Str (Unicode) to another character set

   ---------------------
   -- Encoding Scheme --
   ---------------------

   Utf8_Encoding : constant Encoding_Scheme :=
     (BOM    => Utf8_All,
      Read   => Read'Access,
      Width  => Width'Access,
      Encode => Encode_Function'(Encode'Access),
      Length => Length'Access);

   ------------------
   -- Deallocation --
   ------------------

   procedure Free is new Unchecked_Deallocation
     (Utf8_String, Utf8_String_Access);
   --  Free the memory occupied by a utf8-encoded string

end Unicode.CES.Utf8;