This file is indexed.

/usr/include/phonenumbers/utf/unicodetext.h is in libphonenumber6-dev 6.3~svn698-3+b1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
// Copyright (C) 2006 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Author: Jim Meehan

#ifndef UTIL_UTF8_UNICODETEXT_H__
#define UTIL_UTF8_UNICODETEXT_H__

#include <iterator>
#include <string>
#include <utility>
#include "phonenumbers/base/basictypes.h"

namespace i18n {
namespace phonenumbers {

using std::string;
using std::bidirectional_iterator_tag;
using std::pair;

// ***************************** UnicodeText **************************
//
// A UnicodeText object is a container for a sequence of Unicode
// codepoint values. It has default, copy, and assignment constructors.
// Data can be appended to it from another UnicodeText, from
// iterators, or from a single codepoint.
//
// The internal representation of the text is UTF-8. Since UTF-8 is a
// variable-width format, UnicodeText does not provide random access
// to the text, and changes to the text are permitted only at the end.
//
// The UnicodeText class defines a const_iterator. The dereferencing
// operator (*) returns a codepoint (char32). The iterator is a
// bidirectional, read-only iterator. It becomes invalid if the text
// is changed.
//
// There are methods for appending and retrieving UTF-8 data directly.
// The 'utf8_data' method returns a const char* that contains the
// UTF-8-encoded version of the text; 'utf8_length' returns the number
// of bytes in the UTF-8 data. An iterator's 'get' method stores up to
// 4 bytes of UTF-8 data in a char array and returns the number of
// bytes that it stored.
//
// Codepoints are integers in the range [0, 0xD7FF] or [0xE000,
// 0x10FFFF], but UnicodeText has the additional restriction that it
// can contain only those characters that are valid for interchange on
// the Web. This excludes all of the control codes except for carriage
// return, line feed, and horizontal tab.  It also excludes
// non-characters, but codepoints that are in the Private Use regions
// are allowed, as are codepoints that are unassigned. (See the
// Unicode reference for details.) The function UniLib::IsInterchangeValid
// can be used as a test for this property.
//
// UnicodeTexts are safe. Every method that constructs or modifies a
// UnicodeText tests for interchange-validity, and will substitute a
// space for the invalid data. Such cases are reported via
// LOG(WARNING).
//
// MEMORY MANAGEMENT: copy, take ownership, or point to
//
// A UnicodeText is either an "owner", meaning that it owns the memory
// for the data buffer and will free it when the UnicodeText is
// destroyed, or it is an "alias", meaning that it does not.
//
// There are three methods for storing UTF-8 data in a UnicodeText:
//
// CopyUTF8(buffer, len) copies buffer.
//
// TakeOwnershipOfUTF8(buffer, size, capacity) takes ownership of buffer.
//
// PointToUTF8(buffer, size) creates an alias pointing to buffer.
//
// All three methods perform a validity check on the buffer. There are
// private, "unsafe" versions of these functions that bypass the
// validity check. They are used internally and by friend-functions
// that are handling UTF-8 data that has already been validated.
//
// The purpose of an alias is to avoid making an unnecessary copy of a
// UTF-8 buffer while still providing access to the Unicode values
// within that text through iterators or the fast scanners that are
// based on UTF-8 state tables. The lifetime of an alias must not
// exceed the lifetime of the buffer from which it was constructed.
//
// The semantics of an alias might be described as "copy on write or
// repair." The source data is never modified. If push_back() or
// append() is called on an alias, a copy of the data will be created,
// and the UnicodeText will become an owner. If clear() is called on
// an alias, it becomes an (empty) owner.
//
// The copy constructor and the assignment operator produce an owner.
// That is, after direct initialization ("UnicodeText x(y);") or copy
// initialization ("UnicodeText x = y;") x will be an owner, even if y
// was an alias. The assignment operator ("x = y;") also produces an
// owner unless x and y are the same object and y is an alias.
//
// Aliases should be used with care. If the source from which an alias
// was created is freed, or if the contents are changed, while the
// alias is still in use, fatal errors could result. But it can be
// quite useful to have a UnicodeText "window" through which to see a
// UTF-8 buffer without having to pay the price of making a copy.
//
// UTILITIES
//
// The interfaces in util/utf8/public/textutils.h provide higher-level
// utilities for dealing with UnicodeTexts, including routines for
// creating UnicodeTexts (both owners and aliases) from UTF-8 buffers or
// strings, creating strings from UnicodeTexts, normalizing text for
// efficient matching or display, and others.

class UnicodeText {
 public:
  class const_iterator;

  typedef char32 value_type;

  // Constructors. These always produce owners.
  UnicodeText();  // Create an empty text.
  UnicodeText(const UnicodeText& src);  // copy constructor
  // Construct a substring (copies the data).
  UnicodeText(const const_iterator& first, const const_iterator& last);

  // Assignment operator. This copies the data and produces an owner
  // unless this == &src, e.g., "x = x;", which is a no-op.
  UnicodeText& operator=(const UnicodeText& src);

  // x.Copy(y) copies the data from y into x.
  UnicodeText& Copy(const UnicodeText& src);
  inline UnicodeText& assign(const UnicodeText& src) { return Copy(src); }

  // x.PointTo(y) changes x so that it points to y's data.
  // It does not copy y or take ownership of y's data.
  UnicodeText& PointTo(const UnicodeText& src);
  UnicodeText& PointTo(const const_iterator& first,
                       const const_iterator& last);

  ~UnicodeText();

  void clear();  // Clear text.
  bool empty() { return repr_.size_ == 0; }  // Test if text is empty.

  // Add a codepoint to the end of the text.
  // If the codepoint is not interchange-valid, add a space instead
  // and log a warning.
  void push_back(char32 codepoint);

  // Generic appending operation.
  // iterator_traits<ForwardIterator>::value_type must be implicitly
  // convertible to char32. Typical uses of this method might include:
  //     char32 chars[] = {0x1, 0x2, ...};
  //     vector<char32> more_chars = ...;
  //     utext.append(chars, chars+arraysize(chars));
  //     utext.append(more_chars.begin(), more_chars.end());
  template<typename ForwardIterator>
  UnicodeText& append(ForwardIterator first, const ForwardIterator last) {
    while (first != last) { push_back(*first++); }
    return *this;
  }

  // A specialization of the generic append() method.
  UnicodeText& append(const const_iterator& first, const const_iterator& last);

  // An optimization of append(source.begin(), source.end()).
  UnicodeText& append(const UnicodeText& source);

  int size() const;  // the number of Unicode characters (codepoints)

  friend bool operator==(const UnicodeText& lhs, const UnicodeText& rhs);
  friend bool operator!=(const UnicodeText& lhs, const UnicodeText& rhs);

  class const_iterator {
    typedef const_iterator CI;
   public:
    typedef bidirectional_iterator_tag iterator_category;
    typedef char32 value_type;
    typedef ptrdiff_t difference_type;
    typedef void pointer;  // (Not needed.)
    typedef const char32 reference;  // (Needed for const_reverse_iterator)

    // Iterators are default-constructible.
    const_iterator();

    // It's safe to make multiple passes over a UnicodeText.
    const_iterator(const const_iterator& other);
    const_iterator& operator=(const const_iterator& other);

    char32 operator*() const;  // Dereference

    const_iterator& operator++();  // Advance (++iter)
    const_iterator operator++(int) {  // (iter++)
      const_iterator result(*this);
      ++*this;
      return result;
    }

    const_iterator& operator--();  // Retreat (--iter)
    const_iterator operator--(int) {  // (iter--)
      const_iterator result(*this);
      --*this;
      return result;
    }

    // We love relational operators.
    friend bool operator==(const CI& lhs, const CI& rhs) {
      return lhs.it_ == rhs.it_; }
    friend bool operator!=(const CI& lhs, const CI& rhs) {
      return !(lhs == rhs); }
    friend bool operator<(const CI& lhs, const CI& rhs);
    friend bool operator>(const CI& lhs, const CI& rhs) {
      return rhs < lhs; }
    friend bool operator<=(const CI& lhs, const CI& rhs) {
      return !(rhs < lhs); }
    friend bool operator>=(const CI& lhs, const CI& rhs) {
      return !(lhs < rhs); }

    friend difference_type distance(const CI& first, const CI& last);

    // UTF-8-specific methods
    // Store the UTF-8 encoding of the current codepoint into buf,
    // which must be at least 4 bytes long. Return the number of
    // bytes written.
    int get_utf8(char* buf) const;
    // Return the iterator's pointer into the UTF-8 data.
    const char* utf8_data() const { return it_; }

    string DebugString() const;

   private:
    friend class UnicodeText;
    friend class UnicodeTextUtils;
    friend class UTF8StateTableProperty;
    explicit const_iterator(const char* it) : it_(it) {}

    const char* it_;
  };

  const_iterator begin() const;
  const_iterator end() const;

  class const_reverse_iterator : public std::reverse_iterator<const_iterator> {
   public:
    const_reverse_iterator(const_iterator it) :
        std::reverse_iterator<const_iterator>(it) {}
    const char* utf8_data() const {
      const_iterator tmp_it = base();
      return (--tmp_it).utf8_data();
    }
    int get_utf8(char* buf) const {
      const_iterator tmp_it = base();
      return (--tmp_it).get_utf8(buf);
    }
  };
  const_reverse_iterator rbegin() const {
    return const_reverse_iterator(end());
  }
  const_reverse_iterator rend() const {
    return const_reverse_iterator(begin());
  }

  // Substring searching.  Returns the beginning of the first
  // occurrence of "look", or end() if not found.
  const_iterator find(const UnicodeText& look, const_iterator start_pos) const;
  // Equivalent to find(look, begin())
  const_iterator find(const UnicodeText& look) const;

  // Returns whether this contains the character U+FFFD.  This can
  // occur, for example, if the input to Encodings::Decode() had byte
  // sequences that were invalid in the source encoding.
  bool HasReplacementChar() const;

  // UTF-8-specific methods
  //
  // Return the data, length, and capacity of UTF-8-encoded version of
  // the text. Length and capacity are measured in bytes.
  const char* utf8_data() const { return repr_.data_; }
  int utf8_length() const { return repr_.size_; }
  int utf8_capacity() const { return repr_.capacity_; }

  // Return the UTF-8 data as a string.
  static string UTF8Substring(const const_iterator& first,
                              const const_iterator& last);

  // There are three methods for initializing a UnicodeText from UTF-8
  // data. They vary in details of memory management. In all cases,
  // the data is tested for interchange-validity. If it is not
  // interchange-valid, a LOG(WARNING) is issued, and each
  // structurally invalid byte and each interchange-invalid codepoint
  // is replaced with a space.

  // x.CopyUTF8(buf, len) copies buf into x.
  UnicodeText& CopyUTF8(const char* utf8_buffer, int byte_length);

  // x.TakeOwnershipOfUTF8(buf, len, capacity). x takes ownership of
  // buf. buf is not copied.
  UnicodeText& TakeOwnershipOfUTF8(char* utf8_buffer,
                                   int byte_length,
                                   int byte_capacity);

  // x.PointToUTF8(buf,len) changes x so that it points to buf
  // ("becomes an alias"). It does not take ownership or copy buf.
  // If the buffer is not valid, this has the same effect as
  // CopyUTF8(utf8_buffer, byte_length).
  UnicodeText& PointToUTF8(const char* utf8_buffer, int byte_length);

  // Occasionally it is necessary to use functions that operate on the
  // pointer returned by utf8_data(). MakeIterator(p) provides a way
  // to get back to the UnicodeText level. It uses CHECK to ensure
  // that p is a pointer within this object's UTF-8 data, and that it
  // points to the beginning of a character.
  const_iterator MakeIterator(const char* p) const;

  string DebugString() const;

 private:
  friend class const_iterator;
  friend class UnicodeTextUtils;

  class Repr {  // A byte-string.
   public:
    char* data_;
    int size_;
    int capacity_;
    bool ours_;  // Do we own data_?

    Repr() : data_(NULL), size_(0), capacity_(0), ours_(true) {}
    ~Repr() { if (ours_) delete[] data_; }

    void clear();
    void reserve(int capacity);
    void resize(int size);

    void append(const char* bytes, int byte_length);
    void Copy(const char* data, int size);
    void TakeOwnershipOf(char* data, int size, int capacity);
    void PointTo(const char* data, int size);

    string DebugString() const;

   private:
    Repr& operator=(const Repr&);
    Repr(const Repr& other);
  };

  Repr repr_;

  // UTF-8-specific private methods.
  // These routines do not perform a validity check when compiled
  // in opt mode.
  // It is an error to call these methods with UTF-8 data that
  // is not interchange-valid.
  //
  UnicodeText& UnsafeCopyUTF8(const char* utf8_buffer, int byte_length);
  UnicodeText& UnsafeTakeOwnershipOfUTF8(
      char* utf8_buffer, int byte_length, int byte_capacity);
  UnicodeText& UnsafePointToUTF8(const char* utf8_buffer, int byte_length);
  UnicodeText& UnsafeAppendUTF8(const char* utf8_buffer, int byte_length);
  const_iterator UnsafeFind(const UnicodeText& look,
                            const_iterator start_pos) const;
};

bool operator==(const UnicodeText& lhs, const UnicodeText& rhs);

inline bool operator!=(const UnicodeText& lhs, const UnicodeText& rhs) {
  return !(lhs == rhs);
}

// UnicodeTextRange is a pair of iterators, useful for specifying text
// segments. If the iterators are ==, the segment is empty.
typedef pair<UnicodeText::const_iterator,
             UnicodeText::const_iterator> UnicodeTextRange;

inline bool UnicodeTextRangeIsEmpty(const UnicodeTextRange& r) {
  return r.first == r.second;
}


// *************************** Utilities *************************

// A factory function for creating a UnicodeText from a buffer of
// UTF-8 data. The new UnicodeText takes ownership of the buffer. (It
// is an "owner.")
//
// Each byte that is structurally invalid will be replaced with a
// space. Each codepoint that is interchange-invalid will also be
// replaced with a space, even if the codepoint was represented with a
// multibyte sequence in the UTF-8 data.
//
inline UnicodeText MakeUnicodeTextAcceptingOwnership(
    char* utf8_buffer, int byte_length, int byte_capacity) {
  return UnicodeText().TakeOwnershipOfUTF8(
      utf8_buffer, byte_length, byte_capacity);
}

// A factory function for creating a UnicodeText from a buffer of
// UTF-8 data. The new UnicodeText does not take ownership of the
// buffer. (It is an "alias.")
//
inline UnicodeText MakeUnicodeTextWithoutAcceptingOwnership(
    const char* utf8_buffer, int byte_length) {
  return UnicodeText().PointToUTF8(utf8_buffer, byte_length);
}

// Create a UnicodeText from a UTF-8 string or buffer.
//
// If do_copy is true, then a copy of the string is made. The copy is
// owned by the resulting UnicodeText object and will be freed when
// the object is destroyed. This UnicodeText object is referred to
// as an "owner."
//
// If do_copy is false, then no copy is made. The resulting
// UnicodeText object does NOT take ownership of the string; in this
// case, the lifetime of the UnicodeText object must not exceed the
// lifetime of the string. This Unicodetext object is referred to as
// an "alias." This is the same as MakeUnicodeTextWithoutAcceptingOwnership.
//
// If the input string does not contain valid UTF-8, then a copy is
// made (as if do_copy were true) and coerced to valid UTF-8 by
// replacing each invalid byte with a space.
//
inline UnicodeText UTF8ToUnicodeText(const char* utf8_buf, int len,
                                     bool do_copy) {
  UnicodeText t;
  if (do_copy) {
    t.CopyUTF8(utf8_buf, len);
  } else {
    t.PointToUTF8(utf8_buf, len);
  }
  return t;
}

inline UnicodeText UTF8ToUnicodeText(const string& utf_string, bool do_copy) {
  return UTF8ToUnicodeText(utf_string.data(), utf_string.size(), do_copy);
}

inline UnicodeText UTF8ToUnicodeText(const char* utf8_buf, int len) {
  return UTF8ToUnicodeText(utf8_buf, len, true);
}
inline UnicodeText UTF8ToUnicodeText(const string& utf8_string) {
  return UTF8ToUnicodeText(utf8_string, true);
}

// Return a string containing the UTF-8 encoded version of all the
// Unicode characters in t.
inline string UnicodeTextToUTF8(const UnicodeText& t) {
  return string(t.utf8_data(), t.utf8_length());
}

}  // namespace phonenumbers
}  // namespace i18n

#endif  // UTIL_UTF8_UNICODETEXT_H__