This file is indexed.

/usr/include/osmium/utils/stringtable.hpp is in libosmium-dev 0.0~20111213-g7f3500a-1build2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#ifndef OSMIUM_UTILS_STRINGTABLE_HPP
#define OSMIUM_UTILS_STRINGTABLE_HPP

/*

Copyright 2011 Jochen Topf <jochen@topf.org> and others (see README).

This file is part of Osmium (https://github.com/joto/osmium).

Osmium is free software: you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License or (at your option) the GNU
General Public License as published by the Free Software Foundation, either
version 3 of the Licenses, or (at your option) any later version.

Osmium is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU Lesser General Public License and the GNU
General Public License for more details.

You should have received a copy of the Licenses along with Osmium. If not, see
<http://www.gnu.org/licenses/>.

*/

#include <stdint.h>
#include <string>
#include <map>
#include <iostream>

namespace Osmium {

    /**
     * StringTable management for PBF writer
     *
     * All strings are stored as indexes to rows in a StringTable. The StringTable contains
     * one row for each used string, so strings that are used multiple times need to be
     * stored only once. The StringTable is sorted by usage-count, so the most often used
     * string is stored at index 1.
     */
    class StringTable {

        /// type for string IDs (interim and final)
        typedef uint16_t string_id_t;

        /**
         * this is the struct used to build the StringTable. It is stored as
         * the value-part in the strings-map.
         *
         * when a new string is added to the map, its count is set to 0 and
         * the interim_id is set to the current size of the map. This interim_id
         * is then stored into the pbf-objects.
         *
         * before the PrimitiveBlock is serialized, the map is sorted by count
         * and stored into the pbf-StringTable. Afterwards the interim-ids are
         * mapped to the "real" id in the StringTable.
         *
         * this way often used strings get lower ids in the StringTable. As the
         * protobuf-serializer stores numbers in variable bit-lengths, lower
         * IDs means less used space in the resulting file.
         */
        struct string_info {
            /**
             * number of occurrences of this string
             */
            uint16_t count;

            /**
             * an intermediate-id
             */
            string_id_t interim_id;
        };

        friend bool operator<(const string_info& lhs, const string_info& rhs) {
            return lhs.count > rhs.count;
        }

        /**
         * Interim StringTable, storing all strings that should be written to
         * the StringTable once the block is written to disk.
         */
        typedef std::map<std::string, string_info> string2string_info_t;
        string2string_info_t m_strings;

        /**
         * This vector is used to map the interim IDs to real StringTable IDs after
         * writing all strings to the StringTable.
         */
        typedef std::vector<string_id_t> interim_id2id_t;
        interim_id2id_t m_id2id_map;

        int m_size;

    public:

        StringTable() : m_strings(), m_id2id_map(), m_size(0) {
        }

        /**
         * record a string in the interim StringTable if it's missing, otherwise just increase its counter,
         * return the interim-id assigned to the string.
         */
        string_id_t record_string(const std::string& string) {
            string_info& info = m_strings[string];
            if (info.interim_id == 0) {
                info.interim_id = ++m_size;
            } else {
                info.count++;
            }
            return info.interim_id;
        }

        template<typename A, typename B>
        static std::pair<B,A> flip_pair(const std::pair<A,B>& p) {
            return std::pair<B,A>(p.second, p.first);
        }

        /**
         * Sort the interim StringTable and store it to the real protobuf StringTable.
         * while storing to the real table, this function fills the id2id_map with
         * pairs, mapping the interim-ids to final and real StringTable ids.
         *
         * Note that the m_strings table is a std::map and as such is sorted lexicographically.
         * When the transformation into the sortedby multimap is done, it gets sorted by
         * the count. The end result (at least with the glibc standard container/algorithm
         * implementation) is that the string table is sorted first by reverse count (ie descending)
         * and then by reverse lexicographic order.
         */
        void store_stringtable(OSMPBF::StringTable* st) {
            typedef std::multimap<string_info, std::string> cmap;
            cmap sortedbycount;

            m_id2id_map.resize(m_size+1);

            std::transform(m_strings.begin(), m_strings.end(),
                           std::inserter(sortedbycount, sortedbycount.begin()), flip_pair<std::string, string_info>);

            int n=0;
            cmap::const_iterator end=sortedbycount.end();
            for (cmap::const_iterator it = sortedbycount.begin(); it != end; ++it) {
                // add the string of the current item to the pbf StringTable
                st->add_s(it->second);

                // store the mapping from the interim-id to the real id
                m_id2id_map[it->first.interim_id] = ++n;
            }
        }

        /**
         * Map from an interim ID to a real string ID.
         */
        string_id_t map_string_id(const string_id_t interim_id) const {
            return m_id2id_map[interim_id];
        }

        /**
         * Clear the stringtable, preparing for the next block.
         */
        void clear() {
            m_strings.clear();
            m_size = 0;
        }

    }; // class StringTable

} // namespace Osmium

#endif // OSMIUM_UTILS_STRINGTABLE_HPP