/usr/share/php/ApiGen/Charset/CharsetDetector.php is in php-apigen 4.1.2-1ubuntu2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | <?php
/**
* This file is part of the ApiGen (http://apigen.org)
*
* For the full copyright and license information, please view
* the file license.md that was distributed with this source code.
*/
namespace ApiGen\Charset;
use ApiGen\Charset\Configuration\CharsetOptionsResolver;
class CharsetDetector
{
/**
* @var string[]
*/
private $charsets = [];
/**
* @var array { filePath => fileEncoding }
*/
private $detectedFileEncodings = [];
/**
* @var CharsetOptionsResolver
*/
private $charsetOptionsResolver;
public function __construct(CharsetOptionsResolver $charsetOptionsResolver)
{
$this->charsetOptionsResolver = $charsetOptionsResolver;
$this->charsets = [Encoding::UTF_8];
}
public function setCharsets(array $charsets)
{
$this->charsets = $this->charsetOptionsResolver->resolve($charsets);
}
/**
* @param string $filePath
* @return string
*/
public function detectForFilePath($filePath)
{
if (isset($this->detectedFileEncodings[$filePath])) {
return $this->detectedFileEncodings[$filePath];
}
$detectedEncoding = $this->detectForContent(file_get_contents($filePath));
$this->detectedFileEncodings[$filePath] = $detectedEncoding;
return $detectedEncoding;
}
/**
* @param string $fileContent
* @return string
*/
private function detectForContent($fileContent)
{
$fileEncoding = mb_detect_encoding($fileContent, $this->charsets);
// mb_detect_encoding can not handle WINDOWS-1250 and returns ISO-8859-1 instead
if ($this->isWindows1250($fileEncoding, $fileContent)) {
return Encoding::WIN_1250;
}
return $fileEncoding;
}
/**
* @param string $fileEncoding
* @param string $fileContent
* @return bool
*/
private function isWindows1250($fileEncoding, $fileContent)
{
if ($fileEncoding === Encoding::ISO_8859_1 && preg_match('~[\x7F-\x9F\xBC]~', $fileContent)) {
return TRUE;
}
return FALSE;
}
}
|