/usr/share/octave/packages/io-2.4.10/utf82unicode.m is in octave-io 2.4.10-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | ## Copyright (C) 2017 Markus Mützel
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
## -*- texinfo -*-
## @deftypefn {} {[@var{ustr}, @var{error_flag}] =} utf82unicode (@var{istr})
## Convert UTF-8 encoded strings @var{istr} to (1-byte) Unicode @var{ustr}.
##
## UTF-8 characters with more than 2 bytes are dropped since Octave does not
## support characters >255.
## If an error occured @var{error_flag} is set to true.
## @end deftypefn
## Author: Markus Mützel <markus.muetzel@gmx.de>
## Created: 2016-10-12
function [ustr, error_flag] = utf82unicode (istr="")
error_flag = false;
istr = uint8 (istr);
ibyte = 1;
ustr = uint8 ([]);
while (true)
if (isequal (bitget (istr(ibyte), 8), 0))
## Single byte character
ustr(end+1) = istr(ibyte);
ibyte += 1;
elseif (isequal (bitget (istr(ibyte), 6:8), [0 1 1]))
## Start of double-byte char
if (isequal (bitget (istr(ibyte+1), 7:8), [0 1]))
## Decode byte if it is valid UTF-8
ustr(end+1) = bitand (31, istr(ibyte))*64 + bitand (63, istr(ibyte+1));
else
error_flag = true;
endif
ibyte += 2;
elseif (isequal (bitget (istr(ibyte), 6:8), [1 1 1]))
## Drop this character (Octave does not support chars > 255).
error_flag = true;
## Detect how many bytes to drop
ibyte += find (bitget (istr(ibyte), 8:-1:1) == 0, 1, "first") - 1;
elseif (isequal (bitget (istr(ibyte), 7:8), [0 1]))
## Drop this character (must follow a start byte)
error_flag = true;
ibyte += 1;
else
## Should not reach here but maybe for safety?
error_flag = true;
ibyte += 1;
endif
if (ibyte > numel (istr))
break
endif
endwhile
|