/usr/lib/python2.7/dist-packages/lxml/isoschematron/__init__.py is in python-lxml 4.2.1-1ubuntu0.1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 | """The ``lxml.isoschematron`` package implements ISO Schematron support on top
of the pure-xslt 'skeleton' implementation.
"""
import sys
import os.path
from lxml import etree as _etree # due to validator __init__ signature
# some compat stuff, borrowed from lxml.html
try:
unicode
except NameError:
# Python 3
unicode = str
try:
basestring
except NameError:
# Python 3
basestring = str
__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
'iso_abstract_expand', 'iso_svrl_for_xslt1',
'svrl_validation_errors', 'schematron_schema_valid',
'stylesheet_params', 'Schematron']
# some namespaces
#FIXME: Maybe lxml should provide a dedicated place for common namespace
#FIXME: definitions?
XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
# some helpers
_schematron_root = '{%s}schema' % SCHEMATRON_NS
_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
_resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
# the iso-schematron skeleton implementation steps aka xsl transformations
extract_xsd = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
extract_rng = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
iso_dsdl_include = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
'iso_dsdl_include.xsl')))
iso_abstract_expand = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
'iso_abstract_expand.xsl')))
iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
os.path.join(_resources_dir,
'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
# svrl result accessors
svrl_validation_errors = _etree.XPath(
'//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
# RelaxNG validator for schematron schemas
schematron_schema_valid = _etree.RelaxNG(_etree.parse(
os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')))
def stylesheet_params(**kwargs):
"""Convert keyword args to a dictionary of stylesheet parameters.
XSL stylesheet parameters must be XPath expressions, i.e.:
* string expressions, like "'5'"
* simple (number) expressions, like "5"
* valid XPath expressions, like "/a/b/text()"
This function converts native Python keyword arguments to stylesheet
parameters following these rules:
If an arg is a string wrap it with XSLT.strparam().
If an arg is an XPath object use its path string.
If arg is None raise TypeError.
Else convert arg to string.
"""
result = {}
for key, val in kwargs.items():
if isinstance(val, basestring):
val = _etree.XSLT.strparam(val)
elif val is None:
raise TypeError('None not allowed as a stylesheet parameter')
elif not isinstance(val, _etree.XPath):
val = unicode(val)
result[key] = val
return result
# helper function for use in Schematron __init__
def _stylesheet_param_dict(paramsDict, kwargsDict):
"""Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
stylesheet arguments.
kwargsDict entries with a value of None are ignored.
"""
# beware of changing mutable default arg
paramsDict = dict(paramsDict)
for k, v in kwargsDict.items():
if v is not None: # None values do not override
paramsDict[k] = v
paramsDict = stylesheet_params(**paramsDict)
return paramsDict
class Schematron(_etree._Validator):
"""An ISO Schematron validator.
Pass a root Element or an ElementTree to turn it into a validator.
Alternatively, pass a filename as keyword argument 'file' to parse from
the file system.
Schematron is a less well known, but very powerful schema language.
The main idea is to use the capabilities of XPath to put restrictions on
the structure and the content of XML documents.
The standard behaviour is to fail on ``failed-assert`` findings only
(``ASSERTS_ONLY``). To change this, you can either pass a report filter
function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
or a custom ``XPath`` object), or subclass isoschematron.Schematron for
complete control of the validation process.
Built on the Schematron language 'reference' skeleton pure-xslt
implementation, the validator is created as an XSLT 1.0 stylesheet using
these steps:
0) (Extract from XML Schema or RelaxNG schema)
1) Process inclusions
2) Process abstract patterns
3) Compile the schematron schema to XSLT
The ``include`` and ``expand`` keyword arguments can be used to switch off
steps 1) and 2).
To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
keyword arguments ``include_params``, ``expand_params`` or
``compile_params``.
For convenience, the compile-step parameter ``phase`` is also exposed as a
keyword argument ``phase``. This takes precedence if the parameter is also
given in the parameter dictionary.
If ``store_schematron`` is set to True, the (included-and-expanded)
schematron document tree is stored and available through the ``schematron``
property.
If ``store_xslt`` is set to True, the validation XSLT document tree will be
stored and can be retrieved through the ``validator_xslt`` property.
With ``store_report`` set to True (default: False), the resulting validation
report document gets stored and can be accessed as the ``validation_report``
property.
Here is a usage example::
>>> from lxml import etree
>>> from lxml.isoschematron import Schematron
>>> schematron = Schematron(etree.XML('''
... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
... <pattern id="id_only_attribute">
... <title>id is the only permitted attribute name</title>
... <rule context="*">
... <report test="@*[not(name()='id')]">Attribute
... <name path="@*[not(name()='id')]"/> is forbidden<name/>
... </report>
... </rule>
... </pattern>
... </schema>'''),
... error_finder=Schematron.ASSERTS_AND_REPORTS)
>>> xml = etree.XML('''
... <AAA name="aaa">
... <BBB id="bbb"/>
... <CCC color="ccc"/>
... </AAA>
... ''')
>>> schematron.validate(xml)
False
>>> xml = etree.XML('''
... <AAA id="aaa">
... <BBB id="bbb"/>
... <CCC/>
... </AAA>
... ''')
>>> schematron.validate(xml)
True
"""
# libxml2 error categorization for validation errors
_domain = _etree.ErrorDomains.SCHEMATRONV
_level = _etree.ErrorLevels.ERROR
_error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
# convenience definitions for common behaviours
ASSERTS_ONLY = svrl_validation_errors # Default
ASSERTS_AND_REPORTS = _etree.XPath(
'//svrl:failed-assert | //svrl:successful-report',
namespaces={'svrl': SVRL_NS})
def _extract(self, element):
"""Extract embedded schematron schema from non-schematron host schema.
This method will only be called by __init__ if the given schema document
is not a schematron schema by itself.
Must return a schematron schema document tree or None.
"""
schematron = None
if element.tag == _xml_schema_root:
schematron = self._extract_xsd(element)
elif element.nsmap[element.prefix] == RELAXNG_NS:
# RelaxNG does not have a single unique root element
schematron = self._extract_rng(element)
return schematron
# customization points
# etree.XSLT objects that provide the extract, include, expand, compile
# steps
_extract_xsd = extract_xsd
_extract_rng = extract_rng
_include = iso_dsdl_include
_expand = iso_abstract_expand
_compile = iso_svrl_for_xslt1
# etree.xpath object that determines input document validity when applied to
# the svrl result report; must return a list of result elements (empty if
# valid)
_validation_errors = ASSERTS_ONLY
def __init__(self, etree=None, file=None, include=True, expand=True,
include_params={}, expand_params={}, compile_params={},
store_schematron=False, store_xslt=False, store_report=False,
phase=None, error_finder=ASSERTS_ONLY):
super(Schematron, self).__init__()
self._store_report = store_report
self._schematron = None
self._validator_xslt = None
self._validation_report = None
if error_finder is not self.ASSERTS_ONLY:
self._validation_errors = error_finder
# parse schema document, may be a schematron schema or an XML Schema or
# a RelaxNG schema with embedded schematron rules
root = None
try:
if etree is not None:
if _etree.iselement(etree):
root = etree
else:
root = etree.getroot()
elif file is not None:
root = _etree.parse(file).getroot()
except Exception:
raise _etree.SchematronParseError(
"No tree or file given: %s" % sys.exc_info()[1])
if root is None:
raise ValueError("Empty tree")
if root.tag == _schematron_root:
schematron = root
else:
schematron = self._extract(root)
if schematron is None:
raise _etree.SchematronParseError(
"Document is not a schematron schema or schematron-extractable")
# perform the iso-schematron skeleton implementation steps to get a
# validating xslt
if include:
schematron = self._include(schematron, **include_params)
if expand:
schematron = self._expand(schematron, **expand_params)
if not schematron_schema_valid(schematron):
raise _etree.SchematronParseError(
"invalid schematron schema: %s" %
schematron_schema_valid.error_log)
if store_schematron:
self._schematron = schematron
# add new compile keyword args here if exposing them
compile_kwargs = {'phase': phase}
compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
validator_xslt = self._compile(schematron, **compile_params)
if store_xslt:
self._validator_xslt = validator_xslt
self._validator = _etree.XSLT(validator_xslt)
def __call__(self, etree):
"""Validate doc using Schematron.
Returns true if document is valid, false if not.
"""
self._clear_error_log()
result = self._validator(etree)
if self._store_report:
self._validation_report = result
errors = self._validation_errors(result)
if errors:
if _etree.iselement(etree):
fname = etree.getroottree().docinfo.URL or '<file>'
else:
fname = etree.docinfo.URL or '<file>'
for error in errors:
# Does svrl report the line number, anywhere? Don't think so.
self._append_log_message(
domain=self._domain, type=self._error_type,
level=self._level, line=0,
message=_etree.tostring(error, encoding='unicode'),
filename=fname)
return False
return True
@property
def schematron(self):
"""ISO-schematron schema document (None if object has been initialized
with store_schematron=False).
"""
return self._schematron
@property
def validator_xslt(self):
"""ISO-schematron skeleton implementation XSLT validator document (None
if object has been initialized with store_xslt=False).
"""
return self._validator_xslt
@property
def validation_report(self):
"""ISO-schematron validation result report (None if result-storing has
been turned off).
"""
return self._validation_report
|