/etc/w3c/charset.cfg is in w3c-markup-validator 1.3+dfsg-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | #
# list of accepted/preferred character encodings
#
# Syntax:
#
# charset/encoding = ? result
#
# Note: charsets and results are lowercase, actions are uppercase
#
# ? indicates the action to take:
# 1: OK, character supported
# A: OK, character supported, Encode::Alias it to result
# X: frequent error, e.g. starting with x-; ask user to replace with result
# ERR: a charset we refuse, per some policy. Reason stated after ERR
#e.g:
# utf-8 = 1
# some-alias = A perl-Encode-name
# odd-alias = X good-alias
# bad_charset = ERR explain reason
utf-8 = 1
utf-16 = 1
utf-16be = 1
utf-16le = 1
iso-8859-1 = 1
iso-8859-2 = 1
iso-8859-3 = 1
iso-8859-4 = 1
iso-8859-5 = 1
iso-8859-6 = 1
# implicit bidi, but character encoding is the same
iso-8859-6-i = A iso-8859-6
iso-8859-7 = 1
iso-8859-8 = 1
# implicit bidi, but character encoding is the same
iso-8859-8-i = A iso-8859-8
iso-8859-9 = 1
iso-8859-10 = 1
iso-8859-11 = 1
# iso-8859-12 doesn't exist (yet?)
iso-8859-13 = 1
iso-8859-14 = 1
iso-8859-15 = 1
iso-8859-16 = 1
us-ascii = 1
iso-2022-jp = 1
shift_jis = 1
euc-jp = 1
gb2312 = 1
big5 = 1
big5-hkscs = 1
iso-2022-kr = 1
euc-kr = 1
gb18030 = 1
# 0xA0 is U+00A0 in ISO-8859-11 but undefined in tis-620
# other than that the character encodings are equivalent
tis-620 = A iso-8859-11
koi8-r = 1
koi8-u = 1
iso-ir-111 = 1
windows-1250 = 1
windows-1251 = 1
windows-1252 = 1
windows-1253 = 1
windows-1254 = 1
windows-1255 = 1
windows-1256 = 1
windows-1257 = 1
# windows-1258 = 1
# Encode::Byte does not know 'macintosh' but MacRoman
macintosh = A MacRoman
ks_c_5601-1987 = 1
# Encode only knows the long hand version of 'ksc_5601'
ksc_5601 = A KS_C_5601-1987
x-mac-roman = X macintosh
x-sjis = X shift_jis
iso8859-1 = X iso-8859-1
ascii = X us-ascii
8859_1 = X iso-8859-1
# this one is in IANA, but better use only windows-1252
iso-8859-1-Windows-3.1-Latin-1 = X windows-1252
|