/usr/lib/nodejs/iconv-lite/generation/gen-dbcs.js is in node-iconv-lite 0.4.13-2ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | var utils = require("./utils"),
errTo = require("errto"),
async = require("async");
async.parallel({
$big5: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-big5.txt"), // Encodings with $ are not saved. They are used to calculate other encs.
$gbk: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-gb18030.txt"),
$gbRanges: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-gb18030-ranges.txt"),
$eucKr: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-euc-kr.txt"),
$jis0208: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-jis0208.txt"),
$jis0212: utils.getFile.bind(null, "http://encoding.spec.whatwg.org/index-jis0212.txt"),
$cp932: utils.getFile.bind(null, "http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT"),
cp936: utils.getFile.bind(null, "http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT"),
cp949: utils.getFile.bind(null, "http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT"),
cp950: utils.getFile.bind(null, "http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT"),
}, errTo(console.log, function(data) {
// First, parse all files.
for (var enc in data) {
var dbcs = {};
utils.parseText(data[enc]).map(function(a) {
var dbcsCode = parseInt(a[0]);
var unicode = parseInt(a[1]);
if (!isNaN(unicode))
dbcs[dbcsCode] = unicode;
});
data[enc] = dbcs;
}
// Calculate difference between big5 and cp950, and write it to a file.
// See http://encoding.spec.whatwg.org/#big5-encoder
var big5add = {}
for (var i = 0x8100; i < 0x10000; i++) { // Lead byte is 0x81 .. 0xFE
var trail = i & 0xFF;
if (trail < 0x40 || (0x7E < trail && trail < 0xA1) || trail > 0xFE) continue;
var lead = i >> 8;
var offset = (trail < 0x7F) ? 0x40 : 0x62;
var pointer = (lead - 0x81) * 157 + (trail - offset);
var cpChar = data.cp950[i];
var big5Char = data.$big5[pointer];
if (big5Char !== undefined && cpChar != big5Char)
big5add[i] = big5Char;
}
// Add char sequences that are not in the index file (as given in http://encoding.spec.whatwg.org/#big5-encoder)
function toIdx(pointer) { var trail = pointer % 157; var lead = Math.floor(pointer / 157) + 0x81; return (lead << 8) + (trail + (trail < 0x3F ? 0x40 : 0x62))}
big5add[toIdx(1133)] = [0x00CA, 0x0304];
big5add[toIdx(1135)] = [0x00CA, 0x030C];
big5add[toIdx(1164)] = [0x00EA, 0x0304];
big5add[toIdx(1166)] = [0x00EA, 0x030C];
utils.writeTable("big5-added", utils.generateTable(big5add));
// Calculate difference between GB18030 encoding and cp936.
// See http://encoding.spec.whatwg.org/#gb18030-encoder
var gbkadd = {}
for (var i = 0x8100; i < 0x10000; i++) { // Lead byte is 0x81 .. 0xFE
var trail = i & 0xFF;
if (trail < 0x40 || trail === 0x7F || trail > 0xFE) continue;
var lead = i >> 8;
var offset = (trail < 0x7F) ? 0x40 : 0x41;
var gbAddr = (lead - 0x81) * 190 + (trail - offset);
var cpChar = data.cp936[i];
var gbChar = data.$gbk[gbAddr];
if ((cpChar !== undefined) && (cpChar != gbChar))
console.log("Dont match: ", i.toString(16), gbAddr.toString(16), gbChar, cpChar);
if (gbChar !== undefined && cpChar != gbChar)
gbkadd[i] = gbChar;
}
utils.writeTable("gbk-added", utils.generateTable(gbkadd));
// Write GB18030 ranges
var ranges = { uChars: [], gbChars: [] };
for (var k in data.$gbRanges) {
ranges.uChars.push(data.$gbRanges[k]);
ranges.gbChars.push(+k);
}
utils.writeFile("gb18030-ranges", JSON.stringify(ranges));
// Use http://encoding.spec.whatwg.org/#shift_jis-decoder
var shiftjis = {};
for (var i = 0; i <= 0x80; i++)
shiftjis[i] = i;
for (var i = 0xA1; i <= 0xDF; i++)
shiftjis[i] = 0xFF61 + i - 0xA1;
for (var lead = 0x81; lead < 0xFF; lead++)
if (lead < 0xA1 || lead > 0xDF)
for (var byte = 0; byte < 0xFF; byte++) {
var offset = (byte < 0x7F) ? 0x40 : 0x41;
var leadOffset = (lead < 0xA0) ? 0x81 : 0xC1;
if ((0x40 <= byte && byte <= 0x7E) || (0x80 <= byte && byte <= 0xFC)) {
var pointer = (lead - leadOffset) * 188 + byte - offset;
if (data.$jis0208[pointer])
shiftjis[(lead << 8) + byte] = data.$jis0208[pointer];
else if (8836 <= pointer && pointer <= 10528)
shiftjis[(lead << 8) + byte] = 0xE000 + pointer - 8836; // Interoperable legacy from Windows known as EUDC
}
}
utils.writeTable("shiftjis", utils.generateTable(shiftjis));
// Fill out EUC-JP table according to http://encoding.spec.whatwg.org/#euc-jp
var eucJp = {};
for (var i = 0; i < 0x80; i++)
eucJp[i] = i;
for (var i = 0xA1; i <= 0xDF; i++)
eucJp[(0x8E << 8) + i] = 0xFF61 + i - 0xA1;
for (var i = 0xA1; i <= 0xFE; i++)
for (var j = 0xA1; j <= 0xFE; j++) {
eucJp[ (i << 8) + j] = data.$jis0208[(i - 0xA1) * 94 + (j - 0xA1)];
eucJp[(0x8F << 16) + (i << 8) + j] = data.$jis0212[(i - 0xA1) * 94 + (j - 0xA1)];
}
utils.writeTable("eucjp", utils.generateTable(eucJp, 3));
// Fill out EUC-KR Table and check that it is the same as cp949.
var eucKr = {};
for (var i = 0; i < 0x80; i++)
eucKr[i] = i;
for (var i = 0x8100; i < 0xFF00; i++) {
var lead = i >> 8, byte = i & 0xFF, ptr = null, t;
if (lead <= 0xC6) {
t = (26+26+126)*(lead-0x81) + byte;
if (0x41 <= byte && byte <= 0x5A)
ptr = t - 0x41;
else if (0x61 <= byte && byte <= 0x7A)
ptr = t + 26 - 0x61;
else if (0x81 <= byte && byte <= 0xFE)
ptr = t + 26 + 26 - 0x81;
} else {
if (0xA1 <= byte && byte <= 0xFE)
ptr = (26+26+126)*(0xC7-0x81) + (lead-0xC7)*94+(byte-0xA1);
}
if (ptr !== null)
eucKr[i] = data.$eucKr[ptr];
// Compare with cp949
if (data.cp949[i] !== eucKr[i])
console.log("Warning: EUC-KR from Encoding Standard doesn't match with CP949 from Unicode.com: ", i, data.cp949[i], eucKr[i]);
}
// Write all plain tables as-is.
for (var enc in data)
if (enc[0] != "$")
utils.writeTable(enc, utils.generateTable(data[enc]));
console.log("DBCS encodings regenerated.");
}));
|