This file is indexed.

/usr/lib/nodejs/iconv-lite/generation/gen-dbcs.js is in node-iconv-lite 0.4.13-2ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
var utils = require("./utils"), 
    errTo = require("errto"),
    async = require("async");

async.parallel({
    $big5: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-big5.txt"), // Encodings with $ are not saved. They are used to calculate other encs.
    $gbk:  utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-gb18030.txt"),
    $gbRanges: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-gb18030-ranges.txt"),
    $eucKr: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-euc-kr.txt"),
    $jis0208: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-jis0208.txt"),
    $jis0212: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-jis0212.txt"),
    $cp932: utils.getFile.bind(null, "http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT"),
    cp936: utils.getFile.bind(null, "http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT"),
    cp949: utils.getFile.bind(null, "http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT"),
    cp950: utils.getFile.bind(null, "http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT"),
}, errTo(console.log, function(data) {
    // First, parse all files.
    for (var enc in data) {
        var dbcs = {};
        utils.parseText(data[enc]).map(function(a) {
            var dbcsCode = parseInt(a[0]);
            var unicode = parseInt(a[1]);
            if (!isNaN(unicode))
                dbcs[dbcsCode] = unicode;
        });
        data[enc] = dbcs;
    }

    // Calculate difference between big5 and cp950, and write it to a file.
    // See http://encoding.spec.whatwg.org/#big5-encoder
    var big5add = {}
    for (var i = 0x8100; i < 0x10000; i++) { // Lead byte is 0x81 .. 0xFE
        var trail = i & 0xFF;
        if (trail < 0x40 || (0x7E < trail && trail < 0xA1) || trail > 0xFE) continue;
        var lead = i >> 8;
        var offset = (trail < 0x7F) ? 0x40 : 0x62;
        var pointer = (lead - 0x81) * 157 + (trail - offset); 
        var cpChar = data.cp950[i];
        var big5Char = data.$big5[pointer];
        if (big5Char !== undefined && cpChar != big5Char)
            big5add[i] = big5Char;
    }

    // Add char sequences that are not in the index file (as given in http://encoding.spec.whatwg.org/#big5-encoder)
    function toIdx(pointer) { var trail = pointer % 157; var lead = Math.floor(pointer / 157) + 0x81; return (lead << 8) + (trail + (trail < 0x3F ? 0x40 : 0x62))}
    big5add[toIdx(1133)] = [0x00CA, 0x0304];
    big5add[toIdx(1135)] = [0x00CA, 0x030C];
    big5add[toIdx(1164)] = [0x00EA, 0x0304];
    big5add[toIdx(1166)] = [0x00EA, 0x030C];

    utils.writeTable("big5-added", utils.generateTable(big5add));

    // Calculate difference between GB18030 encoding and cp936.
    // See http://encoding.spec.whatwg.org/#gb18030-encoder
    var gbkadd = {}
    for (var i = 0x8100; i < 0x10000; i++) { // Lead byte is 0x81 .. 0xFE
        var trail = i & 0xFF;
        if (trail < 0x40 || trail === 0x7F || trail > 0xFE) continue;
        var lead = i >> 8;
        var offset = (trail < 0x7F) ? 0x40 : 0x41;
        var gbAddr = (lead - 0x81) * 190 + (trail - offset); 
        var cpChar = data.cp936[i];
        var gbChar = data.$gbk[gbAddr];
        if ((cpChar !== undefined) && (cpChar != gbChar))
            console.log("Dont match: ", i.toString(16), gbAddr.toString(16), gbChar, cpChar);

        if (gbChar !== undefined && cpChar != gbChar)
            gbkadd[i] = gbChar;
    }

    utils.writeTable("gbk-added", utils.generateTable(gbkadd));

    // Write GB18030 ranges
    var ranges = { uChars: [], gbChars: [] };
    for (var k in data.$gbRanges) {
        ranges.uChars.push(data.$gbRanges[k]);
        ranges.gbChars.push(+k);
    }
    utils.writeFile("gb18030-ranges", JSON.stringify(ranges));


    // Use http://encoding.spec.whatwg.org/#shift_jis-decoder
    var shiftjis = {};
    for (var i = 0; i <= 0x80; i++)
        shiftjis[i] = i;
    for (var i = 0xA1; i <= 0xDF; i++)
        shiftjis[i] = 0xFF61 + i - 0xA1;

    for (var lead = 0x81; lead < 0xFF; lead++)
        if (lead < 0xA1 || lead > 0xDF)
            for (var byte = 0; byte < 0xFF; byte++) {
                var offset = (byte < 0x7F) ? 0x40 : 0x41;
                var leadOffset = (lead < 0xA0) ? 0x81 : 0xC1;
                if ((0x40 <= byte && byte <= 0x7E) || (0x80 <= byte && byte <= 0xFC)) {
                    var pointer = (lead - leadOffset) * 188 + byte - offset;
                    if (data.$jis0208[pointer])
                        shiftjis[(lead << 8) + byte] = data.$jis0208[pointer];
                    else if (8836 <= pointer && pointer <= 10528)
                        shiftjis[(lead << 8) + byte] = 0xE000 + pointer - 8836; // Interoperable legacy from Windows known as EUDC
                }
            }

    utils.writeTable("shiftjis", utils.generateTable(shiftjis));

    // Fill out EUC-JP table according to http://encoding.spec.whatwg.org/#euc-jp
    var eucJp = {};
    for (var i = 0; i < 0x80; i++)
        eucJp[i] = i;
    for (var i = 0xA1; i <= 0xDF; i++)
        eucJp[(0x8E << 8) + i] = 0xFF61 + i - 0xA1;
    for (var i = 0xA1; i <= 0xFE; i++)
        for (var j = 0xA1; j <= 0xFE; j++) {
            eucJp[               (i << 8) + j] = data.$jis0208[(i - 0xA1) * 94 + (j - 0xA1)];
            eucJp[(0x8F << 16) + (i << 8) + j] = data.$jis0212[(i - 0xA1) * 94 + (j - 0xA1)];
        }
            
    utils.writeTable("eucjp", utils.generateTable(eucJp, 3));


    // Fill out EUC-KR Table and check that it is the same as cp949.
    var eucKr = {};
    for (var i = 0; i < 0x80; i++)
        eucKr[i] = i;
    for (var i = 0x8100; i < 0xFF00; i++) {
        var lead = i >> 8, byte = i & 0xFF, ptr = null, t;
        if (lead <= 0xC6) {
            t = (26+26+126)*(lead-0x81) + byte;
            if (0x41 <= byte && byte <= 0x5A)
                ptr = t - 0x41;
            else if (0x61 <= byte && byte <= 0x7A)
                ptr = t + 26 - 0x61;
            else if (0x81 <= byte && byte <= 0xFE)
                ptr = t + 26 + 26 - 0x81; 
        } else {
            if (0xA1 <= byte && byte <= 0xFE)
                ptr = (26+26+126)*(0xC7-0x81) + (lead-0xC7)*94+(byte-0xA1);
        }
        if (ptr !== null)
            eucKr[i] = data.$eucKr[ptr];

        // Compare with cp949
        if (data.cp949[i] !== eucKr[i])
            console.log("Warning: EUC-KR from Encoding Standard doesn't match with CP949 from Unicode.com: ", i, data.cp949[i], eucKr[i]);
    }


    // Write all plain tables as-is.
    for (var enc in data)
        if (enc[0] != "$") 
            utils.writeTable(enc, utils.generateTable(data[enc]));


    console.log("DBCS encodings regenerated.");
}));