/usr/lib/kubuntu-l10n/libexec/msgsplit is in pkg-kde-tools 0.15.20~ubuntu4.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | #! /usr/bin/env python
import sys, string, codecs, os
# TODO: currently the 78 chars are *without* the quotes, while for Gettext it is *with* the quotes
# FIXME: it seems possible to get lines bigger than 80 characters.
max_length = 78
wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>',
'<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>',
'<blockquote>', '<pre>', '<hr>', '<hr/>']
### TODO: try to support any charset, not only UTF-8 (so that it can be used outside KDE)
def splitit( start, message, outfile ):
# print start+"\""+message+"\"" # DEBUG
if len(start):
if len(message) + len(start) < max_length and \
string.find(message, '\\n') == -1:
outstr = '%s"%s"\n' % (start, message)
outfile.write(outstr.encode('utf-8'))
return
outfile.write(start)
outfile.write(u'""\n')
index = 0
mlen = len(message)
last_brace = 0
last_space = 0
last_comma = 0
while index < mlen:
if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \
and (index < 2 or message[index-2] != '\\'):
outstr = '"%s"\n' % message[:index+1]
outfile.write(outstr.encode('utf-8'))
message = message[index+1:]
mlen -= index + 1
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
elif message[index] == u'>':
last_brace = index
elif message[index] == u' ':
last_space = index
elif message[index] == u',':
last_comma = index
elif message[index] == u'<':
for s in wrap_before:
if index > 0 and message[index:].startswith(s):
outstr = '"%s"\n' % message[:index]
outfile.write(outstr.encode('utf-8'))
message = message[index:]
mlen -= index
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
if index > max_length:
if last_brace > 50:
index = last_brace
while index < mlen - 1 and message[index+1] == ' ':
index += 1
elif last_space != 0:
index = last_space
elif last_comma != 0:
index = last_comma
else:
while index > 0 and message[index] == u'\\':
index = index - 1
outstr = '"%s"\n' % message[:index+1]
outfile.write(outstr.encode('utf-8'))
message = message[index+1:]
mlen -= index + 1
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
index += 1
if len(message):
outstr = '"%s"\n' % message
outfile.write(outstr.encode('utf-8'))
if sys.hexversion >= 0x02030000:
# We have Python 2.3 or better
open_type="rU" # Open for read with "Universal Newline Support"
else:
# We have a Python older than 2.3
open_type="r" # Normal open for read
### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting.
for file in sys.argv[1:]:
orig_file = open(file, open_type)
new_file = open(file + ".new", 'w')
last=''
start=''
index=0
line=' '
while 1: # python 2.1 has no True ;)
line = orig_file.readline()
index += 1
if not line:
break
if line == '\n' or line[0] == '#':
splitit(start, last, new_file)
start = ''
last = ''
new_file.write(line)
continue
try:
line = string.strip(unicode(line, 'utf-8'))
except UnicodeError:
print file
if line[0] == '"' and line[-1:] == '"':
last += line[1:-1]
continue
# new message
splitit(start, last, new_file)
if line.startswith("msgid "):
start = "msgid "
last = string.lstrip(line[6:-1])[1:]
elif line.startswith("msgstr "):
start = "msgstr "
last = string.lstrip(line[7:-1])[1:]
elif line.startswith("msgctxt "):
start = "msgctxt "
last = string.lstrip(line[8:-1])[1:]
elif line.startswith("msgid_plural "):
start = "msgid_plural "
last = string.lstrip(line[13:-1])[1:]
elif line.startswith("msgstr["):
# For most languages, there will be only one digit
if line[8] == "]" and line[9] == " ":
if line[7].isdigit():
start = line[:10]
last = string.lstrip(line[10:-1])[1:]
else:
print file, "not-a-digit error for mgstr[] in line", index
orig_file.close()
new_file.close()
sys.exit(1)
else:
posdigit = 7 # The first digit is at position 7
while line[posdigit].isdigit():
posdigit += 1
if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ":
posdigit += 2 # skip ] and the space
start = line[:posdigit]
last = string.lstrip(line[posdigit:-1])[1:]
else:
print file, "parse error after msgstr[ in line", index
orig_file.close()
new_file.close()
sys.exit(1)
else:
print file, "parsing error in line", index, line
orig_file.close()
new_file.close()
sys.exit(1)
splitit(start, last, new_file)
orig_file.close()
new_file.close()
os.rename(file + ".new", file)
# kate: space-indent off; indent-width 8; replace-tabs off;
|