1#!/usr/bin/env python3
  2
  3"""Validate XML document against general XML rules and optionally a RelaxNG schema.
  4
  5This is a fully standalone file which does not use and CHART files. It looks for schemas in the
  6same directory as the script. This is because it's needed as part of the initial build process
  7when the normal launcher may not be functional.
  8"""
  9
 10import os
 11import sys
 12import logging
 13import subprocess
 14from argparse import ArgumentParser
 15from pathlib import Path
 16
 17from lxml import etree
 18
 19SCHEMA_DIR = Path(__file__).parent
 20CONVERTER_TOOL = 'trang'
 21
 22logger = logging.getLogger()
 23
 24
 25class ConverterToolError(Exception):
 26    """Raised if there is a problem or missing schema convert tool."""
 27    pass
 28
 29
 30def validate(xml_filename, force_relaxng=None, force_xsd=None):
 31    """Validate an XML file.
 32
 33    Basic syntax checking (balanced tags etc.) is always performed.
 34    If an appropriate RelaxNG schema can be identified (or generated from a RelaxNG Compact
 35    file) in the CHART `schemas` directory the file will be validated against this.
 36    Otherwise if an XSD schema is given in the file, that will be used for validation.
 37    If `force_relaxng` or `force_xsd` are set to the full filename of a schema file validation
 38    is done against them instead of the default schema.
 39    """
 40    logger.debug('Loading {path}'.format(path=xml_filename))
 41
 42    # basic validation is perform upon load
 43    root_elem = etree.parse(xml_filename)
 44
 45    # make sure the document has:
 46    # xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 47    # xsi:noNamespaceSchemaLocation="http://chart/schemas/*.xsd">
 48    if (document_rnc_name(root_elem) is None or
 49        document_xsd_filename(root_elem) is None or
 50        document_xsd_filename(root_elem).suffix != '.xsd'):
 51        logger.error('XSD schema not set in {path}'.format(path=xml_filename))
 52
 53    relaxng_filename = None
 54    xsd_filename = None
 55
 56    if force_relaxng is not None:
 57        # see if the user specified a Relax NG schema manually
 58        relaxng_filename = force_relaxng
 59        if not relaxng_filename.endswith('.rng'):
 60            raise RuntimeError('Can only validate against RelaxNG schema with extension "rng"')
 61
 62    if force_xsd is not None:
 63        xsd_filename = force_xsd
 64
 65    if relaxng_filename is None and xsd_filename is None:
 66        # no particular schema requested so try to find one
 67        relaxngc_name = document_rnc_name(root_elem)
 68        if relaxngc_name is not None:
 69            logger.debug(
 70                'Document requires RelaxNG compact schema {schema}'.format(schema=relaxngc_name))
 71
 72            # A RelaxNG Compact schema was identified.
 73            # Find the corresponding RelaxNG file.
 74            relaxng_filename = rnc_name_to_rng_filename(relaxngc_name)
 75
 76        else:
 77            logger.info('No RelaxNG schema identified')
 78
 79    if relaxng_filename is not None:
 80        # apply Relax NG schema if found earlier
 81
 82        logger.info('Validating {xml} against RelaxNG schema {rng}'.format(
 83                xml=xml_filename, rng=relaxng_filename))
 84        validator = etree.RelaxNG(file=relaxng_filename)
 85        # relaxng_parser.assertValid(root_elem)
 86        if not validator.validate(root_elem):
 87            for e in validator.error_log:  # pylint:disable=E1133
 88                if e.level_name == 'ERROR':
 89                    fn = logger.error
 90                    level = ''
 91                else:
 92                    fn = logger.error
 93                    level = e.level_name + ' '
 94
 95                fn('{filename}:{line}:{column} {level}{message}'.format(
 96                    filename=e.filename,
 97                    column=e.column,
 98                    line=e.line,
 99                    level=level,
100                    message=e.message))
101
102    elif xsd_filename is not None:
103        xsd = etree.XMLSchema(etree.parse(xsd_filename))
104        xsd.validate(root_elem)
105
106    else:
107        raise RuntimeError('Neither RalaxNG nor XSD schema were found')
108
109
110def document_xsd_filename(root_elem):
111    """Given an XML root element return the filename of it's XSD file."""
112    filename = root_elem.attrib.get(
113        '{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation')
114    # {http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation
115    return filename
116
117
118def document_rnc_name(root_elem):
119    """Given an XML root element return the name of it's RNC file."""
120    index = etree.parse(str(SCHEMA_DIR.joinpath('schemas.xml')))
121    ns = etree.FunctionNamespace('http://thaiopensource.com/ns/locating-rules/1.0')
122    ns.prefix = 'lr'
123    typeids = index.xpath("//lr:documentElement[@localName='" + root_elem.tag + "']/@typeId")
124    if len(typeids) == 1:
125        typeid = typeids[0]
126        uris = index.xpath("//lr:typeId[@id='{typ}']/@uri".format(typ=typeid))
127        if len(uris) == 1:
128            uri = uris[0]
129            # a=str(uri)
130            # print(uri, dir(uri))
131            # print(uri.text)
132            # print(uri.attributes)
133            # 1/0
134            return Path(str(uri))
135
136    return None
137
138
139def rnc_name_to_rng_filename(rnc_name):
140    """Convert `rnc_name` into the name of the associated RNG file.
141
142    The RNG file will be created if not present.
143    """
144    if rnc_name.exists():
145        # it is a local (relative or absolute) filename
146        return convert_rnc_to_rng(rnc_name)
147
148    if SCHEMA_DIR.joinpath(rnc_name).exists():
149        # it a file in the schemas directory
150        rnc_filename = SCHEMA_DIR.joinpath(rnc_name)
151        return convert_rnc_to_rng(rnc_filename)
152
153    raise RuntimeError('Could not locate RNC {n}'.format(n=rnc_name))
154
155
156def rnc_name_to_xsd_filename(rnc_name):
157    """Convert `rnc_name` into the name of the associated XSD file.
158
159    The XSD file will be created if not present.
160    """
161    if rnc_name.exists():
162        # it is a local (relative or absolute) filename
163        return convert_rnc_to_xsd(rnc_name)
164
165    if SCHEMA_DIR.joinpath(rnc_name).exists():
166        # it a file in the schemas directory
167        rnc_filename = SCHEMA_DIR.joinpath(rnc_name)
168        return convert_rnc_to_xsd(rnc_filename)
169
170    raise RuntimeError('Could not locate RNC ' + rnc_name)
171
172
173def convert_rnc_to_rng(rnc_filename, rng_filename=None, force=False):
174    """Convert RelaxNG Compact file `rnc_filename` into RalaxNG file `rng_filename`.
175
176    Conversion is not done if RNG file has later timestamp than RNC file.
177    """
178    if rng_filename is None:
179        # change the final extension char to 'g'
180        rng_filename = Path(str(rnc_filename)[:-1] + 'g')
181
182    if (not force and
183        rng_filename.exists() and
184        rng_filename.stat().st_mtime > os.stat(rnc_filename).st_mtime):
185        # logger.debug('Using existing RelaxNG file ' + rng_filename)
186        return rng_filename
187
188    command = [CONVERTER_TOOL, '-I', 'rnc', '-O', 'rng', rnc_filename, rng_filename]
189    logger.info('Converting {inp} to {out}'.format(inp=rnc_filename, out=rng_filename))
190    try:
191        subprocess.check_call(command, stderr=subprocess.STDOUT)
192        # subprocess.check_output(['trang', '-I', 'rnc', '-O', 'rng', rnc_filename, rng_filename],
193        # stderr=subprocess.STDOUT)
194
195    except subprocess.CalledProcessError as e:
196        logger.error('Could not run "{cmd}"'.format(cmd=' '.join(str(s) for s in command)))
197        if e.output is not None:
198            for line in e.output.split('\n'):
199                logger.error(line)
200
201        else:
202            logger.error(str(e))
203
204        raise ConverterToolError(str(e))
205
206    except FileNotFoundError:
207        raise ConverterToolError()
208
209    except OSError:
210        logger.error('Could not run "{tool}" executable for schema conversion'.format(
211            tool=CONVERTER_TOOL))
212
213    return rng_filename
214
215
216def convert_rnc_to_xsd(rnc_filename, xsd_filename=None, force=False):
217    """Convert RelaxNG Compact file `rnc_filename` into XSD Schema file `xsd_filename`.
218
219    Conversion is not done if XSD file has later timestamp than RNC file.
220    """
221    if xsd_filename is None:
222        xsd_filename = Path(str(rnc_filename)[:-3] + 'xsd')
223
224    if (not force and
225        xsd_filename.exists() and
226        xsd_filename.stat().st_mtime > os.stat(rnc_filename).st_mtime):
227        return None
228
229    logger.info('Converting {src} to {dst}'.format(src=rnc_filename, dst=xsd_filename))
230    try:
231        # subprocess.check_output(['trang', '-I', 'rnc', '-O', 'xsd', rnc_filename, xsd_filename],
232        # stderr=subprocess.STDOUT)
233        subprocess.check_call(
234            [CONVERTER_TOOL, '-I', 'rnc', '-O', 'xsd', rnc_filename, xsd_filename],
235            stderr=subprocess.STDOUT)
236    except subprocess.CalledProcessError as e:
237        for line in e.output.split('\n'):
238            logger.error(line)
239
240        raise
241
242    except FileNotFoundError:
243        raise ConverterToolError()
244
245    # trang inserts 2 lines which XMLSpy doesn't like
246    xsd_file = xsd_filename.open('r')
247    temp_filename = Path(str(xsd_filename) + '.tmp')
248    temp_file = temp_filename.open('w')
249    for line in xsd_file.readlines():
250        if ('<xs:import namespace="http://www.w3.org/2001/XMLSchema-instance" '
251            'schemaLocation="xsi.xsd"/>') in line or \
252            """<xs:attribute ref="xsi:noNamespaceSchemaLocation" use="required"/>""" in line:
253
254            continue
255
256        temp_file.write(line)
257
258    temp_file.close()
259    xsd_file.close()
260    temp_filename.rename(xsd_filename)
261    return xsd_filename
262
263
264def convert_all_schemas():
265    """Update all XSD and RNC schema files."""
266    for rnc_filename in SCHEMA_DIR.glob('*.rnc'):
267        convert_rnc_to_rng(rnc_filename)
268        convert_rnc_to_xsd(rnc_filename)
269
270
271def main():
272    """Command line entry point."""
273    logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
274    parser = ArgumentParser(__doc__)
275    parser.add_argument('--validate',
276                        nargs='+',
277                        help='Validate XML file')
278    parser.add_argument('--relaxng',
279                        help='Validate against specific Relax NG schema')
280    parser.add_argument('--xsd',
281                        help='Force validation against a specific XSD file')
282    parser.add_argument('--rnc-to-rng',
283                        nargs='+',
284                        help='Convert named Relax-NG compact file to Relax-NG')
285    parser.add_argument('--rnc-to-xsd',
286                        nargs='+',
287                        help='Convert named Relax-NG compact file to XSD')
288    parser.add_argument('--build-all',
289                        action='store_true',
290                        help='Convert all Relax-NG compact files in CHART schemas dir to '
291                        'Relag-NG and XSD')
292    parser.add_argument('--verbose',
293                        action='store_true')
294    args = parser.parse_args()
295
296    if not args.verbose:
297        logging.getLogger().setLevel('INFO')
298
299    if args.validate is not None:
300        for xml_filename in args.validate:
301            validate(Path(xml_filename), force_relaxng=args.relaxng, force_xsd=args.xsd)
302
303        logger.info('All done')
304        parser.exit()
305
306    if args.rnc_to_rng is not None:
307        for rnc_filename in args.rnc_to_rng:
308            convert_rnc_to_rng(Path(rnc_filename), force=True)
309
310        parser.exit()
311
312    if args.rnc_to_xsd is not None:
313        for rnc_name in args.rnc_to_xsd:
314            convert_rnc_to_xsd(Path(rnc_name), force=True)
315
316        parser.exit()
317
318    if args.build_all:
319        convert_all_schemas()
320        parser.exit()
321
322    parser.error('No actions specified')
323
324
325if __name__ == '__main__':
326    main()