/usr/lib/ruby/vendor_ruby/mechanize/util.rb is in ruby-mechanize 2.7.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | require 'cgi'
require 'nkf'
class Mechanize::Util
CODE_DIC = {
NKF::JIS => "ISO-2022-JP",
NKF::EUC => "EUC-JP",
NKF::SJIS => "SHIFT_JIS",
NKF::UTF8 => "UTF-8",
NKF::UTF16 => "UTF-16",
NKF::UTF32 => "UTF-32",
}
# true if RUBY_VERSION is 1.9.0 or later
NEW_RUBY_ENCODING = RUBY_VERSION >= '1.9.0'
# contains encoding error classes to raise
ENCODING_ERRORS = if NEW_RUBY_ENCODING
[EncodingError]
else
[Iconv::InvalidEncoding, Iconv::IllegalSequence]
end
# default mime type data for Page::Image#mime_type.
# You can use another Apache-compatible mimetab.
# mimetab = WEBrick::HTTPUtils.load_mime_types('/etc/mime.types')
# Mechanize::Util::DefaultMimeTypes.replace(mimetab)
DefaultMimeTypes = WEBrick::HTTPUtils::DefaultMimeTypes
def self.build_query_string(parameters, enc = nil)
parameters.map { |k,v|
# WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
[CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
}.compact.join('&')
end
# Converts string +s+ from +code+ to UTF-8.
def self.from_native_charset(s, code, ignore_encoding_error = false, log = nil)
return s unless s && code
return s unless Mechanize.html_parser == Nokogiri::HTML
begin
encode_to(code, s)
rescue *ENCODING_ERRORS => ex
log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
if ignore_encoding_error
s
else
raise
end
end
end
# inner convert method of Util.from_native_charset
def self.encode_to(encoding, str)
if NEW_RUBY_ENCODING
str.encode(encoding)
else
Iconv.conv(encoding.to_s, "UTF-8", str)
end
end
private_class_method :encode_to
def self.html_unescape(s)
return s unless s
s.gsub(/&(\w+|#[0-9]+);/) { |match|
number = case match
when /&(\w+);/
Mechanize.html_parser::NamedCharacters[$1]
when /&#([0-9]+);/
$1.to_i
end
number ? ([number].pack('U') rescue match) : match
}
end
def self.detect_charset(src)
case enc = src && NKF.guess(src)
when Integer
# Ruby <= 1.8
CODE_DIC[enc]
else
# Ruby >= 1.9
enc && enc.to_s.upcase
end || "ISO-8859-1"
end
def self.uri_escape str, unsafe = nil
@parser ||= begin
URI::Parser.new
rescue NameError
URI
end
if URI == @parser then
unsafe ||= URI::UNSAFE
else
unsafe ||= @parser.regexp[:UNSAFE]
end
@parser.escape str, unsafe
end
def self.uri_unescape str
@parser ||= begin
URI::Parser.new
rescue NameError
URI
end
@parser.unescape str
end
end
|