1#!/usr/bin/env python3
   2
   3"""Utility functions for reading and writing XML files."""
   4
   5import os
   6import re
   7import sys
   8import logging
   9import subprocess
  10from datetime import datetime
  11from datetime import timedelta
  12
  13from lxml import etree
  14
  15# to allow client code to use 'from chart.common.xml import Element'
  16from lxml.etree import Element    # (unused import) pylint: disable=W0611,E0611
  17from lxml.etree import SubElement  # (unused import) pylint: disable=W0611,E0611
  18from lxml.etree import _Element
  19
  20from chart.common.path import Path
  21from chart.common.resource import Resource
  22
  23logger = logging.getLogger()
  24
  25EXT_XML = '.xml'
  26
  27
  28class XMLException(BaseException):
  29    """Base class to XML specific exceptions."""
  30
  31    pass
  32
  33
  34class XMLSyntaxError(XMLException):
  35    """Found a basic parsing error in an XML file."""
  36
  37    def __init__(self, filename, message, line=None, column=None):
  38        super(XMLSyntaxError, self).__init__()
  39        self.filename = filename
  40        self.message = message
  41        self.line = line
  42        self.column = column
  43
  44    def __str__(self):
  45        return '{filename}:{line}:{column}: {message}'.format(
  46            filename=self.filename,
  47            line=self.line,
  48            column=self.column,
  49            message=self.message,
  50        )
  51
  52
  53class XMLElementError(XMLException):
  54    """Generic problem passing XML element."""
  55
  56    pass
  57
  58
  59class XMLElementNotFound(XMLElementError):
  60    """Missing XML element."""
  61
  62    pass
  63
  64
  65class XMLElement:
  66    """XML file handling class, based on lxml and enforcing CHART project standards.
  67
  68    Goals:
  69
  70    - Lightweight
  71    - Similar interface to lxml
  72    - Do not support attributes except for setting schema
  73    - (maybe in future) Explicitly block mixed mode nodes
  74    - (maybe in future) Enforce node naming standards
  75            (and even enforce use of keywords for tag names?)
  76    - Use project standards for encoding datatypes: datetime, timedelta, bool, hexcolour
  77    - Operate closely with Trait class
  78    """
  79
  80    @staticmethod
  81    def qname(namespace, tag):
  82        """Configure a link between a namespace and a tag globally."""
  83        if namespace is None:
  84            return etree.QName(tag).text
  85
  86        return etree.QName(namespace, tag).text
  87
  88    def __init__(
  89        self,
  90        elem: _Element = None,
  91        tag: str = None,
  92        text: str = None,
  93        filename: Path = None,
  94        xinclude: bool = False,
  95        from_text: str = None,
  96    ):
  97        """Constructor.
  98
  99        Args:
 100            `elem`: Construct a node from an lxml Element object
 101            `tag`: Create a new node with given <tag> name
 102            `text`: Insert given string as the body of the new element
 103            `filename`: Load and parse XML from file
 104            `xinclude`: If reading from file, apply xinclude instructions as a post-processing step
 105            `from_text`: Construct a node by parsing a string containing raw XML
 106
 107        Raises:
 108                ValueError: Cannot interpret any input
 109        """
 110        if elem is not None:
 111            assert isinstance(elem, _Element)
 112            self.elem = elem
 113
 114        elif tag is not None:
 115            self.elem = Element(tag)
 116
 117        elif filename is not None:
 118            self.elem = load_xml(filename, xinclude=xinclude)
 119
 120        elif from_text is not None:
 121            self.elem = etree.fromstring(from_text)
 122
 123        else:
 124            raise ValueError(
 125                'Must construct XMLElement with an lxml Element object, '
 126                'node tag name, or filename'
 127            )
 128
 129        if text is not None:
 130            from chart.common.traits import to_str
 131            self.elem.text = to_str(text)
 132
 133    def add(self, tag=None, text=None, node=None):
 134        """Create or add a new child node to ourselves.
 135
 136        If `tag` is non-None, use that as the tag name.
 137
 138        If `text` is non-None, set the text content of the new node. Requires `tag`.
 139
 140        If `node` is an XMLElement or lxml Element/SubElement, add that as the child.
 141        """
 142        if node is not None and isinstance(node, XMLElement):
 143            self.elem.append(node.elem)
 144            return node
 145
 146        res = XMLElement(tag=tag, text=text)
 147        self.elem.append(res.elem)
 148        return res
 149
 150    def set(self, tag=None, text=None):
 151        """Modify our text (if tag is None) or change an existing child element value."""
 152        if tag is None:
 153            from chart.common.traits import to_str
 154            self.elem.text = to_str(text)
 155
 156        else:
 157            child_elem = self.find(tag)
 158            child_elem.set(text=text)
 159
 160    def find(self, childname=None):
 161        """Return child element matching name, or None if not found.
 162
 163        Raise an XMLElementError if multiple cases found.
 164        """
 165        childs = self.elem.findall(childname)
 166        # if child is None:
 167        # return None
 168        if len(childs) == 0:
 169            return None
 170
 171        if len(childs) > 1:
 172            raise XMLElementError(
 173                'Found multiple matches for {name}'.format(name=childname)
 174            )
 175
 176        return XMLElement(elem=childs[0])
 177
 178    def findall(self, childname=None):
 179        """Yield all child elements with matching name."""
 180        if childname is None:
 181            # Iterate through child elements (not text or comments nodes)
 182            for child in self.elem.iterchildren(etree.Element):
 183                # print(etree.Comment, type(etree.Comment))
 184                # if isinstance(child, etree.Comment):
 185                # pass
 186                # print('findall iter return',child,'type',type(child))
 187                yield XMLElement(elem=child)
 188
 189        else:
 190            for child in self.elem.findall(childname):
 191                yield XMLElement(elem=child)
 192
 193    def set_tag(self, new_tag):
 194        """Change our element tag name."""
 195        self.elem.tag = new_tag
 196
 197    def get_tag(self):
 198        """Read our element tag name."""
 199        return self.elem.tag
 200
 201    tag = property(get_tag, set_tag)
 202
 203    @property
 204    def text(self):
 205        return self.elem.text
 206
 207    def parse_str(self, name, default=type, expand_settings=None):
 208        """Read text of child node `name`, or `default` if not present."""
 209        # slow method. raises an error if there are multiple matching nodes, which could be useful
 210        # for validation
 211        # return parsechildstr(self.elem, name, default=default, expand_settings=expand_settings)
 212        node = self.elem.find(name)
 213        if node is None:
 214            if default is type:
 215                raise XMLElementNotFound(
 216                    f'Cannot find expected element "{name}" in file {self.elem.base} '
 217                    f'in line {self.elem.sourceline}'
 218                )
 219
 220            return default
 221
 222        result = node.text    # .strip()
 223
 224        if expand_settings:
 225            from chart.project import settings
 226            result = result.format(settings=settings)
 227
 228        return result
 229
 230    def parse_int(self, name, default=type):
 231        """Read integer value of child node `name`."""
 232        # result = self.elem.xpath(name + '/text()')  # dont use, slower than find().text
 233        # return parsechildint(self.elem, name, default=default)
 234        node = self.elem.find(name)
 235        if node is None:
 236            if default is type:
 237                raise XMLElementNotFound('Cannot find {n}'.format(n=name))
 238
 239            return default
 240
 241        # try:
 242        return int(node.text)
 243        # except
 244
 245    def parse_ints(self, name):
 246        """Return a list of integer contents of all `name` childs of ourselves."""
 247        res = []
 248        for child_elem in self.elem.findall(name):
 249            res.append(int(child_elem.text.strip()))
 250
 251        return res
 252
 253    def parse_float(self, name, default=type):
 254        """Read integer value of child node `name`."""
 255        return parsechildfloat(self.elem, name, default=default)
 256
 257    def parse_floats(self, name):
 258        """Read integer value of child node `name`."""
 259        return parsechildfloats(self.elem, name)
 260
 261    def parse_numeric(self, name, default=type):
 262        """Return numerical value of `name`.
 263
 264        If it looks like an integer, return an int.
 265        If it seems to be base encoded like '0x10', translate it.
 266        If it looks like a floating point value, return a float.
 267        """
 268        node = self.elem.find(name)
 269        if node is None:
 270            if default is type:
 271                raise XMLElementNotFound('Cannot find {n}'.format(n=name))
 272
 273            return default
 274
 275        text = node.text
 276
 277        if '.' in text:
 278            return float(text)
 279
 280        return int(text, 0)
 281
 282    def parse_bool(self, name, default=type):
 283        """Return content of our child `name` element as a boolean.
 284
 285        If not present then `default` is returned if specified, otherwise XMLElementNotFound is
 286        raised.
 287        """
 288        return parsechildbool(self.elem, name, default)
 289
 290    def parse_timedelta(self, name, default=type):
 291        """Return content of our child `name` element as a timedelta in iso9660 format.
 292
 293        If not present then `default` is returned if specified, otherwise XMLElementNotFound is
 294        raised.
 295        """
 296        return parsechildtimedelta(self.elem, name, default)
 297
 298    def parse_datetime(self, name, default=type):
 299        """Return content of our child `name` element as a datetime in iso9660 format.
 300
 301        A few special keywords are allowed.
 302
 303        If not present then `default` is returned if specified, otherwise XMLElementNotFound is
 304        raised.
 305        """
 306        return parsechilddatetime(self.elem, name, default)
 307
 308    def parse_strs(self, name):
 309        """Return a list of the content of all child nodes called `name`."""
 310        return parsechildstrs(self.elem, name)
 311
 312    def write(self, output, pretty_print=True, report=False):
 313        """Convert ourselves to text.
 314
 315        If `output` is a handle, write to that.
 316        Otherwise assume it's a filename and write there.
 317        """
 318        # optionally, set schema via automatic detection
 319        if output is sys.stdout:
 320            output.write(xml_to_str(self.elem, pretty_print=True))
 321
 322        else:
 323            write_xml(self.elem, output, pretty_print=pretty_print, report=report)
 324
 325    def filename(self):
 326        """Return original filename if exists."""
 327        return Path(self.elem.getroottree().docinfo.URL)
 328
 329    def parent(self):
 330        """Return our parent node."""
 331        result = self.elem.getparent()
 332        if result is None:
 333            return None
 334
 335        return XMLElement(result)
 336
 337    def set_schema(self, schema=None):
 338        """Change the XML schema for the document."""
 339        assert schema is not None, 'Schema auto detection not implemented'
 340        self.elem.set(
 341            '{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation',
 342            'http://chart/schemas/{schema}.xsd'.format(schema=schema),
 343        )
 344
 345    # def xpathraw(self, path):
 346    # """Compatibility function to quickly get some older code working.
 347
 348    # Should be returning XMLElement instances instead.
 349
 350    # Or avoid xpath completely because it's very rarely actually used properly."""
 351    # return self.elem.xpath(path)
 352
 353    schema = property(None, set_schema)
 354
 355    def __repr__(self):
 356        if self.elem is not None:
 357            return f'XmlElement("{self.elem.tag}", {len(self.elem.getchildren())} children)'
 358        else:
 359            return f'XmlElement()'
 360
 361    def to_str(self) -> str:
 362        """Render ourselves as a nice string."""
 363        return xml_to_str(self.elem, pretty_print=True)
 364
 365
 366def load_xml(path_or_buffer, xinclude=False):
 367    """Wrap function to use ElementTree to read XML `filename` and return the root node.
 368
 369    `path` should be a Path object or a file buffer.
 370    If `xinclude` is True then any xinclude expressions will be expanded.
 371    """
 372    # this function should be rewritten as separate buffer, Path and Resource loaders
 373    # the logic is silly
 374
 375    # if isinstance(path_or_buffer, Resource):
 376    # return
 377
 378    if isinstance(path_or_buffer, Path):
 379        # save the original object so we can call is_file() on it if it was a Path
 380        # object and lxml threw an exception
 381        orig_path_or_buffer = path_or_buffer
 382        path_or_buffer = str(path_or_buffer)
 383
 384    else:
 385        orig_path_or_buffer = path_or_buffer
 386
 387    try:
 388        if isinstance(path_or_buffer, Resource):
 389            buf = path_or_buffer.read()
 390            if len(buf) == 0:
 391                raise XMLSyntaxError(
 392                    filename=path_or_buffer, message='Zero length file'
 393                )
 394
 395            tree = etree.fromstring(buf)
 396
 397        elif isinstance(path_or_buffer, Path):
 398            tree = etree.parse(str(path_or_buffer))
 399
 400        else:
 401            tree = etree.parse(path_or_buffer)
 402
 403    except etree.XMLSyntaxError as e:
 404        # f = str(e)
 405        if isinstance(path_or_buffer, str):
 406            # if we were called with a filename, parse the syntax error returned by
 407            # lxml into our own XMLSyntaxError class for reporting / analysis
 408            # by client code
 409            parts = str(e).split(',')
 410            if len(parts) == 3:
 411                msg = parts[0]
 412                line = parts[1].strip().partition(' ')[2]
 413                column = parts[2].strip().partition(' ')[2]
 414                raise XMLSyntaxError(
 415                    filename=path_or_buffer, message=msg, line=line, column=column
 416                )
 417
 418            raise XMLSyntaxError(filename=path_or_buffer, message=e)
 419
 420        raise XMLSyntaxError(filename=None, message=e)
 421
 422    except IOError as e:
 423        if hasattr(orig_path_or_buffer, 'is_file'):
 424            if not orig_path_or_buffer.is_file():
 425                # lxml returns a really weird error if the file is not found
 426                raise FileNotFoundError('{path} does not exist'.format(path=orig_path_or_buffer))
 427
 428            raise
 429
 430        raise IOError('Cannot read XML file {p}'.format(p=path_or_buffer))
 431
 432    if xinclude:
 433        tree.xinclude()
 434
 435    if isinstance(path_or_buffer, Resource):
 436        return tree
 437
 438    return tree.getroot()
 439
 440
 441def xml_filename(elem):
 442    """Given on element, return the filename it came from."""
 443    return Path(elem.getroottree().docinfo.URL)
 444
 445
 446def parse_xml(instr):
 447    """Return an ElementTree from a string."""
 448    try:
 449        return etree.fromstring(instr)
 450    except ValueError as e:
 451        raise ValueError('Cannot parse {instr} ({e})'.format(instr=instr, e=e))
 452
 453
 454def write_xml(root, filename, pretty_print=False, report=False):
 455    """Write XML tree using ElementTree with optional pretty print using xmllint."""
 456    try:
 457        etree.ElementTree(root).write(
 458            str(filename), pretty_print=pretty_print, encoding='utf-8'
 459        )
 460    except PermissionError:
 461        raise IOError('No write permission for {path}'.format(path=filename))
 462    except etree.SerialisationError:
 463        raise IOError('Could not write to {path}'.format(path=filename))
 464
 465    if pretty_print:
 466        os.environ['XMLLINT_INDENT'] = '\t'
 467        subprocess.call(('xmllint', '--format', str(filename), '-o', str(filename)))
 468
 469    if report:
 470        logging.info(
 471            'Wrote {sz} to {fn}'.format(sz=filename.stat().st_size, fn=filename)
 472        )
 473
 474
 475def to_html(elem):
 476    """Convert XML `elem` to colourised HTML."""
 477    # Colourise our XML element
 478    from pygments import highlight
 479
 480    # (for some reason pylint cannot find the HtmlFormatter class)
 481    from pygments.formatters import HtmlFormatter  # pylint: disable=E0611
 482    from pygments.lexers import get_lexer_by_name
 483
 484    if elem is None:
 485        return 'None'
 486
 487    if sys.version_info.major == 2:
 488        plain_elem = etree.tostring(elem, pretty_print=True, encoding='utf-8').replace(
 489            ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"', ''
 490        )
 491
 492    else:
 493        plain_elem = etree.tostring(elem, pretty_print=True, encoding=str).replace(
 494            ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"', ''
 495        )
 496
 497    html_elem = highlight(
 498        '\t' + plain_elem, get_lexer_by_name('xml'), HtmlFormatter(noclasses=True)
 499    )
 500
 501    return html_elem
 502
 503
 504def xml_to_str(elem, one_liner=False, pretty_print=False, xmlcharref=False):
 505    """Convert `elem` to a string.
 506
 507    If `one_liner` is set, output will be a compact single line representation.
 508    If `pretty_print` is set we attempt to format the output nicely
 509    (doesn't currently work with Manifest files where we have added history items).
 510    If `xmlcharref` is set, non-ascii characters will be converted to HTML glyphs.
 511    """
 512    if one_liner:
 513        result = etree.tostring(elem, encoding='utf-8')
 514        # result = etree.tostring(elem, encoding=str)
 515        # remove pointless namespace declaration
 516        result = result.replace(
 517            ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"', ''
 518        )
 519        # remove newlines and leading whitespace
 520        result = ''.join(line.strip() for line in result)
 521
 522    elif xmlcharref:
 523        if sys.version_info.major == 2:
 524            # fails with "ORA-00932: inconsistent datatypes: expected - got BINARY"
 525            # under python3
 526            result = etree.tostring(elem, encoding='utf-8').encode(
 527                'utf-8', 'xmlcharrefreplace'
 528            )
 529
 530        else:
 531            # works on python3
 532            # not sure if the lack of xmlcharrefreplace actually causes a problem -
 533            # it may stop display of events containing non-ascii chars from displaying
 534            result = etree.tostring(elem, encoding=str)
 535
 536    else:
 537        result = etree.tostring(
 538            elem, pretty_print=pretty_print, encoding='utf-8'
 539        ).decode('utf-8')
 540        # return etree.tostring(elem, pretty_print=pretty_print, encoding=str)
 541
 542    return result
 543
 544
 545# def write_xml(root, filename, pretty_print=False):
 546#    from lxml import etree
 547#    from xml.dom import minidom
 548#    if pretty_print:
 549#        rough_string = etree.ElementTree.tostring(root, 'utf-8')
 550#        reparsed = minidom.parseString(rough_string)
 551#        return reparsed.toprettyxml()
 552
 553#    else:
 554#        etree.ElementTree(root).write(filename)
 555
 556
 557def datetime_to_xml(dt, include_ms=False, include_us=False):
 558    """Convert a Python datetime to an ISO 8601 format timestamp.
 559
 560    With optional milliseconds or microseconds.
 561    """
 562    if include_ms:
 563        return dt.strftime('%Y-%m-%dT%H:%M:%S.') + '{s:03}'.format(
 564            s=dt.microsecond // 1000
 565        )
 566
 567    if include_us:
 568        return dt.strftime('%Y-%m-%dT%H:%M:%S.') + '{s:06}'.format(s=dt.microsecond)
 569
 570    return dt.strftime('%Y-%m-%dT%H:%M:%SZ')
 571
 572
 573def date_to_xml(d):
 574    """Convert a Python date-like object to an ISO 8601 format date."""
 575    return d.strftime('%Y-%m-%d')
 576
 577
 578def xml_to_datetime(t):
 579    """Convert ISO8660 format time strings datetime objects.
 580
 581    Possible formats are:
 582    * YYYY-mm-dd
 583    * YYYY-mm-ddTHH:MM:SS
 584    * YYYY-mm-ddTHH:MM:SS.mmm
 585    * YYYY-ddd
 586    * YYYY-dddTHH:MM:SS
 587    * YYYY-dddTHH:MM:SS.mmm
 588
 589    Where YYYY is year, mm is month, dd is day of month, HH is hour, MM is minute, SS is second,
 590    mmm is millisecond and ddd is day of year. All values must be zero padded to the number of
 591    characters used above.
 592
 593    >>> xml_to_datetime('2010-03-04T06:41:00')
 594    datetime.datetime(2010, 3, 4, 6, 41)
 595
 596    >>> xml_to_datetime('2010-099')
 597    datetime.datetime(2010, 4, 9, 0, 0)
 598
 599    >>> xml_to_datetime('2013-300T12:00')
 600    datetime.datetime(2013, 10, 27, 12, 0)
 601    """
 602    if t == 'now':
 603        return datetime.utcnow()
 604
 605    if t == 'today':
 606        date = datetime.utcnow().date()
 607        return datetime(date.year, date.month, date.day)
 608
 609    if t == 'yesterday':
 610        date = datetime.utcnow().date() - timedelta(days=1)
 611        return datetime(date.year, date.month, date.day)
 612
 613    if t == 'tomorrow':
 614        date = datetime.utcnow().date() + timedelta(days=1)
 615        return datetime(date.year, date.month, date.day)
 616
 617    # pylint: disable=E1101
 618    if xml_to_datetime.matcher is None:
 619        xml_to_datetime.matcher = re.compile(  # (unused var) pylint: disable=W0612
 620            r'(?P<year>\d{4})-'
 621            r'(((?P<month>\d{2})-(?P<day>\d{2}))|'
 622            r'(?P<yday>\d{3}))'
 623            r'(T'
 624            r'(?P<hour>\d{2}):'
 625            r'(?P<min>\d{2})'
 626            r'(:'
 627            r'(?P<sec>\d{2})'
 628            r'(\.(?P<ms>\d{1,6}))?'
 629            r')?'
 630            ')?'
 631        )
 632
 633    match_obj = xml_to_datetime.matcher.search(t)
 634    if match_obj is None:
 635        raise ValueError('Cannot convert XML time "{s}" to datetime'.format(s=t))
 636
 637    # print('groups', match_obj.groups())
 638    # print('ms', match_obj.group('ms'))
 639
 640    year = int(match_obj.group('year'))
 641
 642    if match_obj.group('yday') is None:
 643        # 2013-12-01
 644        month = int(match_obj.group('month'))
 645        day = int(match_obj.group('day'))
 646        date = datetime(year, month, day)
 647
 648    else:
 649        # 2013-300
 650        doy = int(match_obj.group('yday'))
 651        date = datetime(year, 1, 1) + timedelta(days=doy - 1)
 652
 653    hour = match_obj.group('hour')
 654    if hour is None:
 655        return date
 656
 657    hour = int(hour)
 658    minute = int(match_obj.group('min'))
 659
 660    sec = match_obj.group('sec')
 661    if sec is None:
 662        sec = 0
 663
 664    else:
 665        sec = int(sec)
 666
 667    ms = match_obj.group('ms')
 668    if ms is None:
 669        us = 0
 670
 671    else:
 672        # Convert fractional seconds part to count of microseconds, taking into account
 673        # the number of significant digits the user passed in
 674        us = int(1e6 * int(ms) / pow(10, len(ms)))
 675
 676    return date.replace(hour=hour, minute=minute, second=sec, microsecond=us)
 677
 678    # else:
 679    #    print(match_obj.groups())
 680    #    # User supplied the day of year "2013-300..."
 681    #    if match_obj.group('hour') is None:
 682    #        # Only date
 683    #        return datetime.strptime(t, '%Y-%j')
 684
 685    #    elif match_obj.group('ms') is None:
 686    #        # Date + time to seconds
 687    #        print(1)
 688    #        a = datetime.strptime(t, '%Y-%jT%H:%M:%S')
 689    #        print(2)
 690    #        return datetime.strptime(t, '%Y-%jT%H:%M:%S')
 691
 692    #    else:
 693    #        # Date + time to milliseconds
 694    #        return datetime.strptime(t[:17], '%Y-%jT%H:%M:%S').replace(
 695    #            microsecond=int(match_obj.group('ms')) * 1000)
 696
 697
 698xml_to_datetime.matcher = None
 699
 700
 701def timedelta_to_xml(td):
 702    """Convert a Python timedelta to an ISO formatted duration string.
 703
 704    >>> timedelta_to_xml(timedelta(days=7, minutes=3, seconds=5, microseconds=12345))
 705    'P7DT3M5.012345S'
 706
 707    >>> timedelta_to_xml(timedelta(hours=3, minutes=20))
 708    'PT3H20M'
 709    """
 710    # logger.debug('raw days {d} seconds {s} microseconds {u}'.format(
 711    # d=td.days, s=td.seconds, u=td.seconds))
 712    days = td.days
 713    seconds = td.seconds % 60
 714    minutes = (td.seconds // 60) % 60
 715    hours = td.seconds // 3600
 716    us = td.microseconds
 717    # logger.debug('interpret ' + str(days) + ' ' + str(seconds) + ' ' +
 718    # str(minutes) + ' ' + str(hours) + ' ' + str(us))
 719    if td.seconds == 0 and td.microseconds == 0:
 720        # exact number of days
 721        if days == -1 or days == 0:
 722            return 'PT0S'
 723
 724        s = ''
 725
 726    else:
 727        if us != 0:
 728            s = '{s:.6f}S'.format(s=seconds + us / 1000000.0)
 729
 730        elif seconds != 0:
 731            s = '{s}S'.format(s=seconds)
 732
 733        else:
 734            s = ''
 735
 736    # allows negative ISO 8601 format
 737    return (
 738        'P'
 739        + ((str(days) + 'D') if days != 0 else '')
 740        + ('T' if td.seconds > 0 or td.microseconds > 0 else '')
 741        + (str(hours) + 'H' if hours > 0 else '')
 742        + (str(minutes) + 'M' if minutes > 0 else '')
 743        + s
 744    )
 745
 746
 747def xml_to_timedelta(t):
 748    """Convert XML period (PnYnMnDTnHnMnS) to datetime.timedelta."""
 749    if t == '0':
 750        return timedelta()
 751
 752    if xml_to_timedelta.matcher is None:
 753        xml_to_timedelta.matcher = re.compile(    # (unused var) pylint: disable=W0612
 754            r'^(?P<minus>-)?P((?P<day>\d+)D)?(T((?P<hour>\d+)H)?'
 755            r'((?P<minute>\d+)M)?((?P<sec>[0-9.]+)S)?)?$'
 756        )
 757
 758    res = xml_to_timedelta.matcher.search(t)
 759    if res is None:
 760        raise ValueError('Could not convert {t} to time interval'.format(t=t))
 761
 762    res_dict = res.groupdict()
 763    # valid = False
 764
 765    # for k, v in res_dict.iteritems():
 766    #    if v is None:
 767    #        res_dict[k] = 0
 768    #    else:
 769    #        valid = True
 770
 771    # if valid == False:
 772    #    raise ValueError("Could not parse timedelta '"+t+"'")
 773
 774    if res_dict['day'] is None:
 775        day = 0
 776
 777    else:
 778        day = int(res_dict['day'])
 779
 780    if res_dict['hour'] is None:
 781        hour = 0
 782
 783    else:
 784        hour = int(res_dict['hour'])
 785
 786    if res_dict['minute'] is None:
 787        minute = 0
 788
 789    else:
 790        minute = int(res_dict['minute'])
 791
 792    ssec = res_dict.get('sec')
 793    if ssec is None:
 794        # no seconds specified
 795        sec = 0
 796        us = 0
 797
 798    else:
 799        # look for fractional seconds
 800        parts = ssec.split('.')
 801        if len(parts) == 1:
 802            # no, just whole seconds
 803            sec = int(ssec)
 804            us = 0
 805
 806        else:
 807            # partial seconds present
 808            sec = int(parts[0])
 809            us = float('.' + parts[1]) * 1000000
 810            # if parts[0][0] == '-':
 811            # us = -us
 812
 813    seconds = sec + minute * 60 + hour * 3600 + day * 86400
 814    res = timedelta(seconds=seconds, microseconds=us)
 815    if res_dict['minus'] is not None:
 816        res = -res
 817
 818    return res
 819
 820
 821xml_to_timedelta.matcher = None
 822
 823
 824class ParseException(Exception):
 825    """XML parsing exception."""
 826
 827    def __init__(self, mess):
 828        super(ParseException, self).__init__()
 829        self.mess = mess
 830
 831    def __str__(self):
 832        return repr(self.mess)
 833
 834
 835class ParseExceptionOneExpected(ParseException):
 836    """XML parsing exception where one child is expected."""
 837
 838    def __init__(self, node, cc, name):
 839        super().__init__(f'{node} has {cc} child nodes of type "{name}", expecting 1')
 840
 841
 842class ParseExceptionNoChild(ParseException):
 843    """XML parsing exception where one child is expected."""
 844
 845    def __init__(self, node, name):
 846        super().__init__(f'{node} has no child nodes of required type "{name}"')
 847
 848
 849def xml_to_date(s):
 850    """Convert a string containing either an ISO format date.
 851
 852    Input is either without time) or a date in format YYYY-MMM to a datetime object.
 853    """
 854    if not isinstance(s, str):
 855        raise ParseException('Input to xml_to_date must be a string')
 856
 857    if len(s) == 10:  # 2009-04-01
 858        return datetime.strptime(s, '%Y-%m-%d')
 859
 860    if len(s) == 8:  # 2009-100
 861        return datetime.strptime(s, '%Y-%j')
 862
 863    raise ParseException(
 864        'Format of date should be either YYYY-MM-DD (year/month/day of month) '
 865        'or YYYY-DDD (year/day of year'
 866    )
 867
 868
 869def describe_node(node):
 870    """Pretty print an ElementTree node."""
 871    return 'Element {tag} at {file}:{line}'.format(
 872        tag=node.tag, file=node.getroottree().docinfo.URL, line=node.sourceline
 873    )
 874
 875
 876# used to indicate a result has not yet been found
 877INVALID = object()
 878
 879# indicate we have no default value
 880NO_DEFAULT = object()
 881
 882
 883def parsestr(elem):
 884    """Return the test content of `elem` as a unicode object.
 885
 886    Leading and trailing whitespaces are removed and a string is always returned
 887    even if empty.
 888    """
 889    raw = elem.text
 890    if raw is None:
 891        # `name` exists but contains no text
 892        result = ''
 893
 894    else:
 895        result = str(raw).strip()
 896
 897    return result
 898
 899
 900def parsechildstr(
 901    parent,
 902    name,
 903    default=NO_DEFAULT,
 904    expand_env=False,
 905    expand_settings=False,
 906    expand_user=False,
 907):
 908    """Return the text content of a child `name` of XML node `parent`.
 909
 910    Allows optional default `value`.
 911
 912    An exception is raised if `name` does not exist and no default is specified.
 913    An exception is also raised if there are multiple child nodes called `name`.
 914    If `expand_env` is True then references to $ENVIRONMENT_VARIABLES will be expanded.
 915    If `expand_settings` is True then references to {settings.VARIABLE} will be expanded.
 916    If `expand_user` is True then references to ~username will be expanded to that
 917    user's home directory.
 918
 919    TBD: see if we can use lxml findtext for speed.
 920    """
 921    result = INVALID
 922    for child in parent.iterchildren(name):
 923        if result is not INVALID:
 924            raise ParseException(
 925                '{parent} has multiple child nodes of type "{name}", '
 926                'expecting 1'.format(parent=describe_node(parent), name=name)
 927            )
 928
 929        raw = child.text
 930        if raw is None:
 931            # `name` exists but contains no text
 932            result = ''
 933
 934        else:
 935            result = str(raw).strip()
 936            if expand_env:
 937                result = os.path.expandvars(result)
 938
 939            if expand_settings:
 940                # careful import settings from here as settings.py import xml.py indirectly
 941                from chart.project import settings
 942
 943                result = result.format(settings=settings)
 944
 945            if expand_user:
 946                result = os.path.expanduser(result)
 947
 948    if result is INVALID:
 949        if default == NO_DEFAULT:
 950            raise ParseException(
 951                '{parent} has no child nodes of required name "{name}"'.format(
 952                    parent=describe_node(parent), name=name
 953                )
 954            )
 955
 956        return default
 957
 958    return result
 959
 960
 961def parsechildint(parent, name, default=type):
 962    """Return the text content if child `name` of parent node `parent`, converted to an integer.
 963
 964    If `child` is not present, or if present more than once, or if the content is not
 965    an integer, ParseException is raised unless `default` is given.
 966    """
 967    if parent is None:
 968        raise Exception('parsechildint called with null parent')
 969
 970    childs = parent.findall(name)
 971    if len(childs) > 1:
 972        raise ParseException(
 973            '{parent} has {childs} child nodes of type {type}, expecting 1"'.format(
 974                parent=describe_node(parent), childs=len(childs), type=name
 975            )
 976        )
 977
 978    if len(childs) == 0:
 979        if default == type:
 980            raise ParseException(
 981                '{parent} has no child nodes of required type {name}'.format(
 982                    parent=describe_node(parent), name=name
 983                )
 984            )
 985
 986        return default
 987
 988    s = childs[0].text
 989    try:
 990        return int(s)
 991    except ValueError:
 992        raise ParseException(
 993            '{node} is not an integer found {s}'.format(
 994                node=describe_node(childs[0]), s=s
 995            )
 996        )
 997
 998
 999def parsechildfloat(parent, name, default=type, allow_eval=False):
1000    """Return the text content if child `name` of parent node `parent`, converted to a float.
1001
1002    If `child` is not present, or if present more than once, or if the content is not
1003    a float, ParseException is raised unless `default` is given.
1004    """
1005    childs = parent.findall(name)
1006    if len(childs) > 1:
1007        raise ParseException(
1008            '{parent} has {count} child nodes of type {name}, expecting 1'.format(
1009                count=len(childs), parent=describe_node(parent), name=len(childs)
1010            )
1011        )
1012
1013    if len(childs) == 0:
1014        if default == type:
1015            raise ParseException(
1016                '{parent} has no child nodes of required type {name}'.format(
1017                    parent=describe_node(parent), name=name
1018                )
1019            )
1020
1021        return default
1022
1023    s = childs[0].text
1024
1025    if s is None:
1026        if default != type:
1027            return default
1028
1029        raise ParseException(
1030            '{node}: Cannot interpret empty node {name} as a float'.format(
1031                node=describe_node(parent), name=name
1032            )
1033        )
1034
1035    try:
1036        return float(s)
1037    except ValueError:
1038        if allow_eval:
1039            try:
1040                return float(eval(s))  # (use of eval) pylint: disable=W0123
1041            except Exception:
1042                raise ParseException(
1043                    '{node} could not be evaluated (found {s})'.format(
1044                        node=describe_node(childs[0]), s=s
1045                    )
1046                )
1047
1048        raise ParseException(
1049            '{node} is not numeric (found {s})'.format(
1050                node=describe_node(childs[0]), s=s
1051            )
1052        )
1053
1054
1055def parsechilddatetime(parent, name, default=type):
1056    """Return the text content if child `name` of parent node `parent`.
1057
1058    Result is converted to a Python datetime.
1059
1060    If `child` is not present, or if present more than once, or if the content is not
1061    an ISO format datetime, ParseException is raised unless `default` is given.
1062    """
1063    childs = parent.findall(name)
1064    if len(childs) > 1:
1065        raise ParseExceptionOneExpected(
1066            node=describe_node(parent), cc=len(childs), name=name
1067        )
1068
1069    if len(childs) == 0:
1070        if default == type:
1071            raise ParseExceptionNoChild(node=describe_node(parent), name=name)
1072
1073        return default
1074
1075    s = childs[0].text
1076
1077    if s is None:
1078        if default == type:
1079            raise ParseExceptionNoChild(node=describe_node(parent), name=name)
1080
1081        return default
1082
1083    try:
1084        return xml_to_datetime(s)
1085    except Exception:
1086        raise ParseException(
1087            '{node} is not a valid XML date (found {s})'.format(
1088                node=describe_node(childs[0]), s=s
1089            )
1090        )
1091
1092
1093def parsechildtimedelta(parent, name, default=type):
1094    """Return the text content if child `name` of parent node `parent`.
1095
1096    Result is  converted to a Python timedelta.
1097
1098    If `child` is not present, or if present more than once, or if the content is not
1099    an ISO format duration, ParseException is raised unless `default` is given.
1100    """
1101    childs = parent.findall(name)
1102    if len(childs) > 1:
1103        raise ParseException(
1104            '{parent} has {actual} child nodes of type {type}, expecting 1'.format(
1105                parent=describe_node(parent), actual=len(childs), type=name
1106            )
1107        )
1108
1109    if len(childs) == 0:
1110        if default == type:
1111            raise ParseException(
1112                '{parent} has no child nodes of required type {name}'.format(
1113                    parent=describe_node(parent), name=name
1114                )
1115            )
1116
1117        return default
1118
1119    s = childs[0].text
1120    try:
1121        return xml_to_timedelta(s)
1122    except ValueError:
1123        raise ParseException(
1124            '{node} does not contain a valid XML duration (found "{act}")'.format(
1125                node=describe_node(childs[0]), act=s
1126            )
1127        )
1128
1129
1130def parsechildbool(parent, name, default=type, env_expand=False):
1131    """Return the text content if child `name` of parent node `parent`, converted to a boolean.
1132
1133    If `child` is not present, or if present more than once, or if the content is not
1134    an boolean, ParseException is raised unless `default` is given.
1135    Boolean false is returned if the 1st letter of the node text is F, f or 0.
1136    Boolean true is returned if the 1st letter of the node text is T, t or 1.
1137    """
1138    childs = parent.findall(name)
1139    if len(childs) > 1:
1140        raise ParseExceptionOneExpected(
1141            node=describe_node(parent), cc=len(childs), name=name
1142        )
1143
1144    if len(childs) == 0:
1145        if default == type:
1146            raise ParseExceptionNoChild(node=describe_node(parent), name=name)
1147
1148        return default
1149
1150    s = childs[0].text
1151    if s is None:
1152        if default == type:
1153            raise ParseExceptionNoChild(node=describe_node(parent), name=name)
1154
1155        return default
1156
1157    if env_expand is True:
1158        s = os.path.expandvars(s)
1159
1160    if len(s) > 0 and s[0] in ('f', 'F', '0'):
1161        return False
1162
1163    if len(s) > 0 and s[0] in ('t', 'T', '1'):
1164        return True
1165
1166    raise ParseException(
1167        '{node} is not a recognised boolean (found {s})'.format(
1168            node=describe_node(childs[0]), s=s
1169        )
1170    )
1171
1172
1173def parsechildstringlist(parent, name, childname, default=NO_DEFAULT):
1174    """Return a list of child strings.
1175
1176    >>> parsechildstringlist(parse_xml('<a><b><c>e</c><c>f</c></b></a>'), 'b', 'c')
1177    ['e', 'f']
1178    """
1179    childs = parent.findall(name)
1180    if len(childs) == 0:
1181        if default is NO_DEFAULT:
1182            raise ParseException(
1183                '{node} has no children of required type {name}'.format(
1184                    node=describe_node(parent), name=name
1185                )
1186            )
1187
1188        return default
1189
1190    if len(childs) != 1:
1191        raise ParseExceptionOneExpected(
1192            node=describe_node(parent), cc=len(childs), name=name
1193        )
1194
1195    res = []
1196    for child in childs[0].findall(childname):
1197        res.append(child.text.strip())
1198
1199    return res
1200
1201
1202def parsechildstrs(parent_elem,
1203                   name,
1204                   default=[],    # (dangerous default) pylint: disable=W0102
1205                   expand_settings=False):
1206    """Return a list of child strings.
1207
1208    >>> parsechildstrs(parse_xml('<a><b>e</b><b>f</b></a>'), 'b')
1209    ['e', 'f']
1210    """
1211    res = []
1212    for child_elem in parent_elem.findall(name):
1213        res.append(child_elem.text.strip())
1214
1215    if default is type and len(res) == 0:
1216        res = default
1217
1218    if expand_settings:
1219        from chart.project import settings
1220        res = [r.format(settings=settings) for r in res]
1221
1222    return res
1223
1224
1225def parsechildfloats(parent_elem, name):
1226    """Return a list of child strings.
1227
1228    >>> node = parse_xml('<a><b>1</b><b>2</b></a>')
1229    >>> parsechildfloats(node, 'b')
1230    [1.0, 2.0]
1231    """
1232    res = []
1233    for child_elem in parent_elem.findall(name):
1234        res.append(float(child_elem.text.strip()))
1235
1236    return res
1237
1238
1239def is_xml_comment(elem):
1240    """Test if element `elem` an XML comment."""
1241    return isinstance(elem, etree._Comment)
1242
1243
1244def mixed_decoder(err):
1245    """Allow binary containing latin-1 symbols to be decoded with the utf-8 codec."""
1246    REPLACEMENT = {
1247        b'\xb0': '\u00b0',    # degrees symbol
1248        b'\xb1': '\u00b1',    # plusminus symbol
1249    }
1250
1251    # obj = err.object[err.start: err.end]
1252    err_start = err.object
1253    err_len = err.end - err.start
1254    next_pos = err.start + err_len
1255    err = err_start[err.start : err.end]
1256
1257    if err in REPLACEMENT:
1258        return REPLACEMENT[err], next_pos
1259
1260    return err
1261
1262
1263def main():
1264    """Command line entry point."""
1265    from chart.common.args import ArgumentParser
1266
1267    parser = ArgumentParser()
1268    parser.add_argument('--time', '-t', type=xml_to_datetime)
1269    # parser.add_argument('--timedelta', '--dt', type=ArgumentParser.timedelta)
1270    parser.add_argument('--timedelta', '--dt')
1271    parser.add_argument('--minus', action='store_true')
1272
1273    args = parser.parse_args()
1274
1275    if args.minus:
1276        args.timedelta = '-' + args.timedelta
1277
1278    if args.time:
1279        print('Datetime    : ', args.time)
1280        print('Day of year : ', args.time.timetuple().tm_yday)
1281
1282    if args.timedelta:
1283        td = xml_to_timedelta(args.timedelta)
1284        print('Datetime    : ', td)
1285        print(td.days, td.seconds, td.microseconds)
1286
1287
1288if __name__ == '__main__':
1289    main()