/usr/lib/ruby/1.8/mechanize/util.rb is in libwww-mechanize-ruby1.8 1.0.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | require 'cgi'
class Mechanize
class Util
CODE_DIC = {
:JIS => "ISO-2022-JP",
:EUC => "EUC-JP",
:SJIS => "SHIFT_JIS",
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
class << self
def build_query_string(parameters, enc=nil)
parameters.map { |k,v|
# WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
[CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
}.compact.join('&')
end
def to_native_charset(s, code=nil)
if Mechanize.html_parser == Nokogiri::HTML
return unless s
code ||= detect_charset(s)
Iconv.iconv("UTF-8", code, s).join("")
else
s
end
end
def from_native_charset(s, code)
return s unless s && code
return s unless Mechanize.html_parser == Nokogiri::HTML
begin
Iconv.iconv(code.to_s, "UTF-8", s).join("")
rescue Iconv::InvalidEncoding
s
end
end
def html_unescape(s)
return s unless s
s.gsub(/&(\w+|#[0-9]+);/) { |match|
number = case match
when /&(\w+);/
Mechanize.html_parser::NamedCharacters[$1]
when /&#([0-9]+);/
$1.to_i
end
number ? ([number].pack('U') rescue match) : match
}
end
def detect_charset(src)
tmp = NKF.guess(src || "<html></html>")
if RUBY_VERSION >= "1.9.0"
enc = tmp.to_s.upcase
else
enc = NKF.constants.find{|c|
NKF.const_get(c) == tmp
}
enc = CODE_DIC[enc.intern]
end
enc || "ISO-8859-1"
end
end
end
end
|