This file is indexed.

/etc/w3c/charset.cfg is in w3c-markup-validator 1.3+dfsg-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#
# list of accepted/preferred character encodings
#
# Syntax:
#
# charset/encoding      = ? result
#
# Note: charsets and results are lowercase, actions are uppercase
#
# ? indicates the action to take:
# 1: OK, character supported
# A: OK, character supported, Encode::Alias it to result
# X: frequent error, e.g. starting with x-; ask user to replace with result
# ERR: a charset we refuse, per some policy. Reason stated after ERR

#e.g:
# utf-8         = 1
# some-alias    = A perl-Encode-name
# odd-alias     = X good-alias
# bad_charset   = ERR explain reason

utf-8                           = 1
utf-16                          = 1
utf-16be                        = 1
utf-16le                        = 1
iso-8859-1                      = 1
iso-8859-2                      = 1
iso-8859-3                      = 1
iso-8859-4                      = 1
iso-8859-5                      = 1
iso-8859-6                      = 1
# implicit bidi, but character encoding is the same
iso-8859-6-i                    = A iso-8859-6
iso-8859-7                      = 1
iso-8859-8                      = 1
# implicit bidi, but character encoding is the same
iso-8859-8-i                    = A iso-8859-8
iso-8859-9                      = 1
iso-8859-10                     = 1
iso-8859-11                     = 1
# iso-8859-12 doesn't exist (yet?)
iso-8859-13                     = 1
iso-8859-14                     = 1
iso-8859-15                     = 1
iso-8859-16                     = 1
us-ascii                        = 1
iso-2022-jp                     = 1
shift_jis                       = 1
euc-jp                          = 1
gb2312                          = 1
big5                            = 1
big5-hkscs                      = 1
iso-2022-kr                     = 1
euc-kr                          = 1
gb18030                         = 1
# 0xA0 is U+00A0 in ISO-8859-11 but undefined in tis-620
# other than that the character encodings are equivalent
tis-620                         = A iso-8859-11
koi8-r                          = 1
koi8-u                          = 1
iso-ir-111                      = 1
windows-1250                    = 1
windows-1251                    = 1
windows-1252                    = 1
windows-1253                    = 1
windows-1254                    = 1
windows-1255                    = 1
windows-1256                    = 1
windows-1257                    = 1
# windows-1258                  = 1
# Encode::Byte does not know 'macintosh' but MacRoman
macintosh                       = A MacRoman
ks_c_5601-1987                  = 1
# Encode only knows the long hand version of 'ksc_5601'
ksc_5601                        = A KS_C_5601-1987

x-mac-roman                     = X macintosh
x-sjis                          = X shift_jis
iso8859-1                       = X iso-8859-1
ascii                           = X us-ascii
8859_1                          = X iso-8859-1
# this one is in IANA, but better use only windows-1252
iso-8859-1-Windows-3.1-Latin-1  = X windows-1252