1#!/usr/bin/env python3
2
3"""Validate XML document against general XML rules and optionally a RelaxNG schema.
4
5This is a fully standalone file which does not use and CHART files. It looks for schemas in the
6same directory as the script. This is because it's needed as part of the initial build process
7when the normal launcher may not be functional.
8"""
9
10import os
11import sys
12import logging
13import subprocess
14from argparse import ArgumentParser
15from pathlib import Path
16
17from lxml import etree
18
19SCHEMA_DIR = Path(__file__).parent
20CONVERTER_TOOL = 'trang'
21
22logger = logging.getLogger()
23
24
25class ConverterToolError(Exception):
26 """Raised if there is a problem or missing schema convert tool."""
27 pass
28
29
30def validate(xml_filename, force_relaxng=None, force_xsd=None):
31 """Validate an XML file.
32
33 Basic syntax checking (balanced tags etc.) is always performed.
34 If an appropriate RelaxNG schema can be identified (or generated from a RelaxNG Compact
35 file) in the CHART `schemas` directory the file will be validated against this.
36 Otherwise if an XSD schema is given in the file, that will be used for validation.
37 If `force_relaxng` or `force_xsd` are set to the full filename of a schema file validation
38 is done against them instead of the default schema.
39 """
40 logger.debug('Loading {path}'.format(path=xml_filename))
41
42 # basic validation is perform upon load
43 root_elem = etree.parse(xml_filename)
44
45 # make sure the document has:
46 # xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
47 # xsi:noNamespaceSchemaLocation="http://chart/schemas/*.xsd">
48 if (document_rnc_name(root_elem) is None or
49 document_xsd_filename(root_elem) is None or
50 document_xsd_filename(root_elem).suffix != '.xsd'):
51 logger.error('XSD schema not set in {path}'.format(path=xml_filename))
52
53 relaxng_filename = None
54 xsd_filename = None
55
56 if force_relaxng is not None:
57 # see if the user specified a Relax NG schema manually
58 relaxng_filename = force_relaxng
59 if not relaxng_filename.endswith('.rng'):
60 raise RuntimeError('Can only validate against RelaxNG schema with extension "rng"')
61
62 if force_xsd is not None:
63 xsd_filename = force_xsd
64
65 if relaxng_filename is None and xsd_filename is None:
66 # no particular schema requested so try to find one
67 relaxngc_name = document_rnc_name(root_elem)
68 if relaxngc_name is not None:
69 logger.debug(
70 'Document requires RelaxNG compact schema {schema}'.format(schema=relaxngc_name))
71
72 # A RelaxNG Compact schema was identified.
73 # Find the corresponding RelaxNG file.
74 relaxng_filename = rnc_name_to_rng_filename(relaxngc_name)
75
76 else:
77 logger.info('No RelaxNG schema identified')
78
79 if relaxng_filename is not None:
80 # apply Relax NG schema if found earlier
81
82 logger.info('Validating {xml} against RelaxNG schema {rng}'.format(
83 xml=xml_filename, rng=relaxng_filename))
84 validator = etree.RelaxNG(file=relaxng_filename)
85 # relaxng_parser.assertValid(root_elem)
86 if not validator.validate(root_elem):
87 for e in validator.error_log: # pylint:disable=E1133
88 if e.level_name == 'ERROR':
89 fn = logger.error
90 level = ''
91 else:
92 fn = logger.error
93 level = e.level_name + ' '
94
95 fn('{filename}:{line}:{column} {level}{message}'.format(
96 filename=e.filename,
97 column=e.column,
98 line=e.line,
99 level=level,
100 message=e.message))
101
102 elif xsd_filename is not None:
103 xsd = etree.XMLSchema(etree.parse(xsd_filename))
104 xsd.validate(root_elem)
105
106 else:
107 raise RuntimeError('Neither RalaxNG nor XSD schema were found')
108
109
110def document_xsd_filename(root_elem):
111 """Given an XML root element return the filename of it's XSD file."""
112 filename = root_elem.attrib.get(
113 '{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation')
114 # {http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation
115 return filename
116
117
118def document_rnc_name(root_elem):
119 """Given an XML root element return the name of it's RNC file."""
120 index = etree.parse(str(SCHEMA_DIR.joinpath('schemas.xml')))
121 ns = etree.FunctionNamespace('http://thaiopensource.com/ns/locating-rules/1.0')
122 ns.prefix = 'lr'
123 typeids = index.xpath("//lr:documentElement[@localName='" + root_elem.tag + "']/@typeId")
124 if len(typeids) == 1:
125 typeid = typeids[0]
126 uris = index.xpath("//lr:typeId[@id='{typ}']/@uri".format(typ=typeid))
127 if len(uris) == 1:
128 uri = uris[0]
129 # a=str(uri)
130 # print(uri, dir(uri))
131 # print(uri.text)
132 # print(uri.attributes)
133 # 1/0
134 return Path(str(uri))
135
136 return None
137
138
139def rnc_name_to_rng_filename(rnc_name):
140 """Convert `rnc_name` into the name of the associated RNG file.
141
142 The RNG file will be created if not present.
143 """
144 if rnc_name.exists():
145 # it is a local (relative or absolute) filename
146 return convert_rnc_to_rng(rnc_name)
147
148 if SCHEMA_DIR.joinpath(rnc_name).exists():
149 # it a file in the schemas directory
150 rnc_filename = SCHEMA_DIR.joinpath(rnc_name)
151 return convert_rnc_to_rng(rnc_filename)
152
153 raise RuntimeError('Could not locate RNC {n}'.format(n=rnc_name))
154
155
156def rnc_name_to_xsd_filename(rnc_name):
157 """Convert `rnc_name` into the name of the associated XSD file.
158
159 The XSD file will be created if not present.
160 """
161 if rnc_name.exists():
162 # it is a local (relative or absolute) filename
163 return convert_rnc_to_xsd(rnc_name)
164
165 if SCHEMA_DIR.joinpath(rnc_name).exists():
166 # it a file in the schemas directory
167 rnc_filename = SCHEMA_DIR.joinpath(rnc_name)
168 return convert_rnc_to_xsd(rnc_filename)
169
170 raise RuntimeError('Could not locate RNC ' + rnc_name)
171
172
173def convert_rnc_to_rng(rnc_filename, rng_filename=None, force=False):
174 """Convert RelaxNG Compact file `rnc_filename` into RalaxNG file `rng_filename`.
175
176 Conversion is not done if RNG file has later timestamp than RNC file.
177 """
178 if rng_filename is None:
179 # change the final extension char to 'g'
180 rng_filename = Path(str(rnc_filename)[:-1] + 'g')
181
182 if (not force and
183 rng_filename.exists() and
184 rng_filename.stat().st_mtime > os.stat(rnc_filename).st_mtime):
185 # logger.debug('Using existing RelaxNG file ' + rng_filename)
186 return rng_filename
187
188 command = [CONVERTER_TOOL, '-I', 'rnc', '-O', 'rng', rnc_filename, rng_filename]
189 logger.info('Converting {inp} to {out}'.format(inp=rnc_filename, out=rng_filename))
190 try:
191 subprocess.check_call(command, stderr=subprocess.STDOUT)
192 # subprocess.check_output(['trang', '-I', 'rnc', '-O', 'rng', rnc_filename, rng_filename],
193 # stderr=subprocess.STDOUT)
194
195 except subprocess.CalledProcessError as e:
196 logger.error('Could not run "{cmd}"'.format(cmd=' '.join(str(s) for s in command)))
197 if e.output is not None:
198 for line in e.output.split('\n'):
199 logger.error(line)
200
201 else:
202 logger.error(str(e))
203
204 raise ConverterToolError(str(e))
205
206 except FileNotFoundError:
207 raise ConverterToolError()
208
209 except OSError:
210 logger.error('Could not run "{tool}" executable for schema conversion'.format(
211 tool=CONVERTER_TOOL))
212
213 return rng_filename
214
215
216def convert_rnc_to_xsd(rnc_filename, xsd_filename=None, force=False):
217 """Convert RelaxNG Compact file `rnc_filename` into XSD Schema file `xsd_filename`.
218
219 Conversion is not done if XSD file has later timestamp than RNC file.
220 """
221 if xsd_filename is None:
222 xsd_filename = Path(str(rnc_filename)[:-3] + 'xsd')
223
224 if (not force and
225 xsd_filename.exists() and
226 xsd_filename.stat().st_mtime > os.stat(rnc_filename).st_mtime):
227 return None
228
229 logger.info('Converting {src} to {dst}'.format(src=rnc_filename, dst=xsd_filename))
230 try:
231 # subprocess.check_output(['trang', '-I', 'rnc', '-O', 'xsd', rnc_filename, xsd_filename],
232 # stderr=subprocess.STDOUT)
233 subprocess.check_call(
234 [CONVERTER_TOOL, '-I', 'rnc', '-O', 'xsd', rnc_filename, xsd_filename],
235 stderr=subprocess.STDOUT)
236 except subprocess.CalledProcessError as e:
237 for line in e.output.split('\n'):
238 logger.error(line)
239
240 raise
241
242 except FileNotFoundError:
243 raise ConverterToolError()
244
245 # trang inserts 2 lines which XMLSpy doesn't like
246 xsd_file = xsd_filename.open('r')
247 temp_filename = Path(str(xsd_filename) + '.tmp')
248 temp_file = temp_filename.open('w')
249 for line in xsd_file.readlines():
250 if ('<xs:import namespace="http://www.w3.org/2001/XMLSchema-instance" '
251 'schemaLocation="xsi.xsd"/>') in line or \
252 """<xs:attribute ref="xsi:noNamespaceSchemaLocation" use="required"/>""" in line:
253
254 continue
255
256 temp_file.write(line)
257
258 temp_file.close()
259 xsd_file.close()
260 temp_filename.rename(xsd_filename)
261 return xsd_filename
262
263
264def convert_all_schemas():
265 """Update all XSD and RNC schema files."""
266 for rnc_filename in SCHEMA_DIR.glob('*.rnc'):
267 convert_rnc_to_rng(rnc_filename)
268 convert_rnc_to_xsd(rnc_filename)
269
270
271def main():
272 """Command line entry point."""
273 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
274 parser = ArgumentParser(__doc__)
275 parser.add_argument('--validate',
276 nargs='+',
277 help='Validate XML file')
278 parser.add_argument('--relaxng',
279 help='Validate against specific Relax NG schema')
280 parser.add_argument('--xsd',
281 help='Force validation against a specific XSD file')
282 parser.add_argument('--rnc-to-rng',
283 nargs='+',
284 help='Convert named Relax-NG compact file to Relax-NG')
285 parser.add_argument('--rnc-to-xsd',
286 nargs='+',
287 help='Convert named Relax-NG compact file to XSD')
288 parser.add_argument('--build-all',
289 action='store_true',
290 help='Convert all Relax-NG compact files in CHART schemas dir to '
291 'Relag-NG and XSD')
292 parser.add_argument('--verbose',
293 action='store_true')
294 args = parser.parse_args()
295
296 if not args.verbose:
297 logging.getLogger().setLevel('INFO')
298
299 if args.validate is not None:
300 for xml_filename in args.validate:
301 validate(Path(xml_filename), force_relaxng=args.relaxng, force_xsd=args.xsd)
302
303 logger.info('All done')
304 parser.exit()
305
306 if args.rnc_to_rng is not None:
307 for rnc_filename in args.rnc_to_rng:
308 convert_rnc_to_rng(Path(rnc_filename), force=True)
309
310 parser.exit()
311
312 if args.rnc_to_xsd is not None:
313 for rnc_name in args.rnc_to_xsd:
314 convert_rnc_to_xsd(Path(rnc_name), force=True)
315
316 parser.exit()
317
318 if args.build_all:
319 convert_all_schemas()
320 parser.exit()
321
322 parser.error('No actions specified')
323
324
325if __name__ == '__main__':
326 main()