1#!/usr/bin/env python3
2
3"""Utility functions for reading and writing XML files."""
4
5import os
6import re
7import sys
8import logging
9import subprocess
10from datetime import datetime
11from datetime import timedelta
12
13from lxml import etree
14
15# to allow client code to use 'from chart.common.xml import Element'
16from lxml.etree import Element # (unused import) pylint: disable=W0611,E0611
17from lxml.etree import SubElement # (unused import) pylint: disable=W0611,E0611
18from lxml.etree import _Element
19
20from chart.common.path import Path
21from chart.common.resource import Resource
22
23logger = logging.getLogger()
24
25EXT_XML = '.xml'
26
27
28class XMLException(BaseException):
29 """Base class to XML specific exceptions."""
30
31 pass
32
33
34class XMLSyntaxError(XMLException):
35 """Found a basic parsing error in an XML file."""
36
37 def __init__(self, filename, message, line=None, column=None):
38 super(XMLSyntaxError, self).__init__()
39 self.filename = filename
40 self.message = message
41 self.line = line
42 self.column = column
43
44 def __str__(self):
45 return '{filename}:{line}:{column}: {message}'.format(
46 filename=self.filename,
47 line=self.line,
48 column=self.column,
49 message=self.message,
50 )
51
52
53class XMLElementError(XMLException):
54 """Generic problem passing XML element."""
55
56 pass
57
58
59class XMLElementNotFound(XMLElementError):
60 """Missing XML element."""
61
62 pass
63
64
65class XMLElement:
66 """XML file handling class, based on lxml and enforcing CHART project standards.
67
68 Goals:
69
70 - Lightweight
71 - Similar interface to lxml
72 - Do not support attributes except for setting schema
73 - (maybe in future) Explicitly block mixed mode nodes
74 - (maybe in future) Enforce node naming standards
75 (and even enforce use of keywords for tag names?)
76 - Use project standards for encoding datatypes: datetime, timedelta, bool, hexcolour
77 - Operate closely with Trait class
78 """
79
80 @staticmethod
81 def qname(namespace, tag):
82 """Configure a link between a namespace and a tag globally."""
83 if namespace is None:
84 return etree.QName(tag).text
85
86 return etree.QName(namespace, tag).text
87
88 def __init__(
89 self,
90 elem: _Element = None,
91 tag: str = None,
92 text: str = None,
93 filename: Path = None,
94 xinclude: bool = False,
95 from_text: str = None,
96 ):
97 """Constructor.
98
99 Args:
100 `elem`: Construct a node from an lxml Element object
101 `tag`: Create a new node with given <tag> name
102 `text`: Insert given string as the body of the new element
103 `filename`: Load and parse XML from file
104 `xinclude`: If reading from file, apply xinclude instructions as a post-processing step
105 `from_text`: Construct a node by parsing a string containing raw XML
106
107 Raises:
108 ValueError: Cannot interpret any input
109 """
110 if elem is not None:
111 assert isinstance(elem, _Element)
112 self.elem = elem
113
114 elif tag is not None:
115 self.elem = Element(tag)
116
117 elif filename is not None:
118 self.elem = load_xml(filename, xinclude=xinclude)
119
120 elif from_text is not None:
121 self.elem = etree.fromstring(from_text)
122
123 else:
124 raise ValueError(
125 'Must construct XMLElement with an lxml Element object, '
126 'node tag name, or filename'
127 )
128
129 if text is not None:
130 from chart.common.traits import to_str
131 self.elem.text = to_str(text)
132
133 def add(self, tag=None, text=None, node=None):
134 """Create or add a new child node to ourselves.
135
136 If `tag` is non-None, use that as the tag name.
137
138 If `text` is non-None, set the text content of the new node. Requires `tag`.
139
140 If `node` is an XMLElement or lxml Element/SubElement, add that as the child.
141 """
142 if node is not None and isinstance(node, XMLElement):
143 self.elem.append(node.elem)
144 return node
145
146 res = XMLElement(tag=tag, text=text)
147 self.elem.append(res.elem)
148 return res
149
150 def set(self, tag=None, text=None):
151 """Modify our text (if tag is None) or change an existing child element value."""
152 if tag is None:
153 from chart.common.traits import to_str
154 self.elem.text = to_str(text)
155
156 else:
157 child_elem = self.find(tag)
158 child_elem.set(text=text)
159
160 def find(self, childname=None):
161 """Return child element matching name, or None if not found.
162
163 Raise an XMLElementError if multiple cases found.
164 """
165 childs = self.elem.findall(childname)
166 # if child is None:
167 # return None
168 if len(childs) == 0:
169 return None
170
171 if len(childs) > 1:
172 raise XMLElementError(
173 'Found multiple matches for {name}'.format(name=childname)
174 )
175
176 return XMLElement(elem=childs[0])
177
178 def findall(self, childname=None):
179 """Yield all child elements with matching name."""
180 if childname is None:
181 # Iterate through child elements (not text or comments nodes)
182 for child in self.elem.iterchildren(etree.Element):
183 # print(etree.Comment, type(etree.Comment))
184 # if isinstance(child, etree.Comment):
185 # pass
186 # print('findall iter return',child,'type',type(child))
187 yield XMLElement(elem=child)
188
189 else:
190 for child in self.elem.findall(childname):
191 yield XMLElement(elem=child)
192
193 def set_tag(self, new_tag):
194 """Change our element tag name."""
195 self.elem.tag = new_tag
196
197 def get_tag(self):
198 """Read our element tag name."""
199 return self.elem.tag
200
201 tag = property(get_tag, set_tag)
202
203 @property
204 def text(self):
205 return self.elem.text
206
207 def parse_str(self, name, default=type, expand_settings=None):
208 """Read text of child node `name`, or `default` if not present."""
209 # slow method. raises an error if there are multiple matching nodes, which could be useful
210 # for validation
211 # return parsechildstr(self.elem, name, default=default, expand_settings=expand_settings)
212 node = self.elem.find(name)
213 if node is None:
214 if default is type:
215 raise XMLElementNotFound(
216 f'Cannot find expected element "{name}" in file {self.elem.base} '
217 f'in line {self.elem.sourceline}'
218 )
219
220 return default
221
222 result = node.text # .strip()
223
224 if expand_settings:
225 from chart.project import settings
226 result = result.format(settings=settings)
227
228 return result
229
230 def parse_int(self, name, default=type):
231 """Read integer value of child node `name`."""
232 # result = self.elem.xpath(name + '/text()') # dont use, slower than find().text
233 # return parsechildint(self.elem, name, default=default)
234 node = self.elem.find(name)
235 if node is None:
236 if default is type:
237 raise XMLElementNotFound('Cannot find {n}'.format(n=name))
238
239 return default
240
241 # try:
242 return int(node.text)
243 # except
244
245 def parse_ints(self, name):
246 """Return a list of integer contents of all `name` childs of ourselves."""
247 res = []
248 for child_elem in self.elem.findall(name):
249 res.append(int(child_elem.text.strip()))
250
251 return res
252
253 def parse_float(self, name, default=type):
254 """Read integer value of child node `name`."""
255 return parsechildfloat(self.elem, name, default=default)
256
257 def parse_floats(self, name):
258 """Read integer value of child node `name`."""
259 return parsechildfloats(self.elem, name)
260
261 def parse_numeric(self, name, default=type):
262 """Return numerical value of `name`.
263
264 If it looks like an integer, return an int.
265 If it seems to be base encoded like '0x10', translate it.
266 If it looks like a floating point value, return a float.
267 """
268 node = self.elem.find(name)
269 if node is None:
270 if default is type:
271 raise XMLElementNotFound('Cannot find {n}'.format(n=name))
272
273 return default
274
275 text = node.text
276
277 if '.' in text:
278 return float(text)
279
280 return int(text, 0)
281
282 def parse_bool(self, name, default=type):
283 """Return content of our child `name` element as a boolean.
284
285 If not present then `default` is returned if specified, otherwise XMLElementNotFound is
286 raised.
287 """
288 return parsechildbool(self.elem, name, default)
289
290 def parse_timedelta(self, name, default=type):
291 """Return content of our child `name` element as a timedelta in iso9660 format.
292
293 If not present then `default` is returned if specified, otherwise XMLElementNotFound is
294 raised.
295 """
296 return parsechildtimedelta(self.elem, name, default)
297
298 def parse_datetime(self, name, default=type):
299 """Return content of our child `name` element as a datetime in iso9660 format.
300
301 A few special keywords are allowed.
302
303 If not present then `default` is returned if specified, otherwise XMLElementNotFound is
304 raised.
305 """
306 return parsechilddatetime(self.elem, name, default)
307
308 def parse_strs(self, name):
309 """Return a list of the content of all child nodes called `name`."""
310 return parsechildstrs(self.elem, name)
311
312 def write(self, output, pretty_print=True, report=False):
313 """Convert ourselves to text.
314
315 If `output` is a handle, write to that.
316 Otherwise assume it's a filename and write there.
317 """
318 # optionally, set schema via automatic detection
319 if output is sys.stdout:
320 output.write(xml_to_str(self.elem, pretty_print=True))
321
322 else:
323 write_xml(self.elem, output, pretty_print=pretty_print, report=report)
324
325 def filename(self):
326 """Return original filename if exists."""
327 return Path(self.elem.getroottree().docinfo.URL)
328
329 def parent(self):
330 """Return our parent node."""
331 result = self.elem.getparent()
332 if result is None:
333 return None
334
335 return XMLElement(result)
336
337 def set_schema(self, schema=None):
338 """Change the XML schema for the document."""
339 assert schema is not None, 'Schema auto detection not implemented'
340 self.elem.set(
341 '{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation',
342 'http://chart/schemas/{schema}.xsd'.format(schema=schema),
343 )
344
345 # def xpathraw(self, path):
346 # """Compatibility function to quickly get some older code working.
347
348 # Should be returning XMLElement instances instead.
349
350 # Or avoid xpath completely because it's very rarely actually used properly."""
351 # return self.elem.xpath(path)
352
353 schema = property(None, set_schema)
354
355 def __repr__(self):
356 if self.elem is not None:
357 return f'XmlElement("{self.elem.tag}", {len(self.elem.getchildren())} children)'
358 else:
359 return f'XmlElement()'
360
361 def to_str(self) -> str:
362 """Render ourselves as a nice string."""
363 return xml_to_str(self.elem, pretty_print=True)
364
365
366def load_xml(path_or_buffer, xinclude=False):
367 """Wrap function to use ElementTree to read XML `filename` and return the root node.
368
369 `path` should be a Path object or a file buffer.
370 If `xinclude` is True then any xinclude expressions will be expanded.
371 """
372 # this function should be rewritten as separate buffer, Path and Resource loaders
373 # the logic is silly
374
375 # if isinstance(path_or_buffer, Resource):
376 # return
377
378 if isinstance(path_or_buffer, Path):
379 # save the original object so we can call is_file() on it if it was a Path
380 # object and lxml threw an exception
381 orig_path_or_buffer = path_or_buffer
382 path_or_buffer = str(path_or_buffer)
383
384 else:
385 orig_path_or_buffer = path_or_buffer
386
387 try:
388 if isinstance(path_or_buffer, Resource):
389 buf = path_or_buffer.read()
390 if len(buf) == 0:
391 raise XMLSyntaxError(
392 filename=path_or_buffer, message='Zero length file'
393 )
394
395 tree = etree.fromstring(buf)
396
397 elif isinstance(path_or_buffer, Path):
398 tree = etree.parse(str(path_or_buffer))
399
400 else:
401 tree = etree.parse(path_or_buffer)
402
403 except etree.XMLSyntaxError as e:
404 # f = str(e)
405 if isinstance(path_or_buffer, str):
406 # if we were called with a filename, parse the syntax error returned by
407 # lxml into our own XMLSyntaxError class for reporting / analysis
408 # by client code
409 parts = str(e).split(',')
410 if len(parts) == 3:
411 msg = parts[0]
412 line = parts[1].strip().partition(' ')[2]
413 column = parts[2].strip().partition(' ')[2]
414 raise XMLSyntaxError(
415 filename=path_or_buffer, message=msg, line=line, column=column
416 )
417
418 raise XMLSyntaxError(filename=path_or_buffer, message=e)
419
420 raise XMLSyntaxError(filename=None, message=e)
421
422 except IOError as e:
423 if hasattr(orig_path_or_buffer, 'is_file'):
424 if not orig_path_or_buffer.is_file():
425 # lxml returns a really weird error if the file is not found
426 raise FileNotFoundError('{path} does not exist'.format(path=orig_path_or_buffer))
427
428 raise
429
430 raise IOError('Cannot read XML file {p}'.format(p=path_or_buffer))
431
432 if xinclude:
433 tree.xinclude()
434
435 if isinstance(path_or_buffer, Resource):
436 return tree
437
438 return tree.getroot()
439
440
441def xml_filename(elem):
442 """Given on element, return the filename it came from."""
443 return Path(elem.getroottree().docinfo.URL)
444
445
446def parse_xml(instr):
447 """Return an ElementTree from a string."""
448 try:
449 return etree.fromstring(instr)
450 except ValueError as e:
451 raise ValueError('Cannot parse {instr} ({e})'.format(instr=instr, e=e))
452
453
454def write_xml(root, filename, pretty_print=False, report=False):
455 """Write XML tree using ElementTree with optional pretty print using xmllint."""
456 try:
457 etree.ElementTree(root).write(
458 str(filename), pretty_print=pretty_print, encoding='utf-8'
459 )
460 except PermissionError:
461 raise IOError('No write permission for {path}'.format(path=filename))
462 except etree.SerialisationError:
463 raise IOError('Could not write to {path}'.format(path=filename))
464
465 if pretty_print:
466 os.environ['XMLLINT_INDENT'] = '\t'
467 subprocess.call(('xmllint', '--format', str(filename), '-o', str(filename)))
468
469 if report:
470 logging.info(
471 'Wrote {sz} to {fn}'.format(sz=filename.stat().st_size, fn=filename)
472 )
473
474
475def to_html(elem):
476 """Convert XML `elem` to colourised HTML."""
477 # Colourise our XML element
478 from pygments import highlight
479
480 # (for some reason pylint cannot find the HtmlFormatter class)
481 from pygments.formatters import HtmlFormatter # pylint: disable=E0611
482 from pygments.lexers import get_lexer_by_name
483
484 if elem is None:
485 return 'None'
486
487 if sys.version_info.major == 2:
488 plain_elem = etree.tostring(elem, pretty_print=True, encoding='utf-8').replace(
489 ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"', ''
490 )
491
492 else:
493 plain_elem = etree.tostring(elem, pretty_print=True, encoding=str).replace(
494 ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"', ''
495 )
496
497 html_elem = highlight(
498 '\t' + plain_elem, get_lexer_by_name('xml'), HtmlFormatter(noclasses=True)
499 )
500
501 return html_elem
502
503
504def xml_to_str(elem, one_liner=False, pretty_print=False, xmlcharref=False):
505 """Convert `elem` to a string.
506
507 If `one_liner` is set, output will be a compact single line representation.
508 If `pretty_print` is set we attempt to format the output nicely
509 (doesn't currently work with Manifest files where we have added history items).
510 If `xmlcharref` is set, non-ascii characters will be converted to HTML glyphs.
511 """
512 if one_liner:
513 result = etree.tostring(elem, encoding='utf-8')
514 # result = etree.tostring(elem, encoding=str)
515 # remove pointless namespace declaration
516 result = result.replace(
517 ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"', ''
518 )
519 # remove newlines and leading whitespace
520 result = ''.join(line.strip() for line in result)
521
522 elif xmlcharref:
523 if sys.version_info.major == 2:
524 # fails with "ORA-00932: inconsistent datatypes: expected - got BINARY"
525 # under python3
526 result = etree.tostring(elem, encoding='utf-8').encode(
527 'utf-8', 'xmlcharrefreplace'
528 )
529
530 else:
531 # works on python3
532 # not sure if the lack of xmlcharrefreplace actually causes a problem -
533 # it may stop display of events containing non-ascii chars from displaying
534 result = etree.tostring(elem, encoding=str)
535
536 else:
537 result = etree.tostring(
538 elem, pretty_print=pretty_print, encoding='utf-8'
539 ).decode('utf-8')
540 # return etree.tostring(elem, pretty_print=pretty_print, encoding=str)
541
542 return result
543
544
545# def write_xml(root, filename, pretty_print=False):
546# from lxml import etree
547# from xml.dom import minidom
548# if pretty_print:
549# rough_string = etree.ElementTree.tostring(root, 'utf-8')
550# reparsed = minidom.parseString(rough_string)
551# return reparsed.toprettyxml()
552
553# else:
554# etree.ElementTree(root).write(filename)
555
556
557def datetime_to_xml(dt, include_ms=False, include_us=False):
558 """Convert a Python datetime to an ISO 8601 format timestamp.
559
560 With optional milliseconds or microseconds.
561 """
562 if include_ms:
563 return dt.strftime('%Y-%m-%dT%H:%M:%S.') + '{s:03}'.format(
564 s=dt.microsecond // 1000
565 )
566
567 if include_us:
568 return dt.strftime('%Y-%m-%dT%H:%M:%S.') + '{s:06}'.format(s=dt.microsecond)
569
570 return dt.strftime('%Y-%m-%dT%H:%M:%SZ')
571
572
573def date_to_xml(d):
574 """Convert a Python date-like object to an ISO 8601 format date."""
575 return d.strftime('%Y-%m-%d')
576
577
578def xml_to_datetime(t):
579 """Convert ISO8660 format time strings datetime objects.
580
581 Possible formats are:
582 * YYYY-mm-dd
583 * YYYY-mm-ddTHH:MM:SS
584 * YYYY-mm-ddTHH:MM:SS.mmm
585 * YYYY-ddd
586 * YYYY-dddTHH:MM:SS
587 * YYYY-dddTHH:MM:SS.mmm
588
589 Where YYYY is year, mm is month, dd is day of month, HH is hour, MM is minute, SS is second,
590 mmm is millisecond and ddd is day of year. All values must be zero padded to the number of
591 characters used above.
592
593 >>> xml_to_datetime('2010-03-04T06:41:00')
594 datetime.datetime(2010, 3, 4, 6, 41)
595
596 >>> xml_to_datetime('2010-099')
597 datetime.datetime(2010, 4, 9, 0, 0)
598
599 >>> xml_to_datetime('2013-300T12:00')
600 datetime.datetime(2013, 10, 27, 12, 0)
601 """
602 if t == 'now':
603 return datetime.utcnow()
604
605 if t == 'today':
606 date = datetime.utcnow().date()
607 return datetime(date.year, date.month, date.day)
608
609 if t == 'yesterday':
610 date = datetime.utcnow().date() - timedelta(days=1)
611 return datetime(date.year, date.month, date.day)
612
613 if t == 'tomorrow':
614 date = datetime.utcnow().date() + timedelta(days=1)
615 return datetime(date.year, date.month, date.day)
616
617 # pylint: disable=E1101
618 if xml_to_datetime.matcher is None:
619 xml_to_datetime.matcher = re.compile( # (unused var) pylint: disable=W0612
620 r'(?P<year>\d{4})-'
621 r'(((?P<month>\d{2})-(?P<day>\d{2}))|'
622 r'(?P<yday>\d{3}))'
623 r'(T'
624 r'(?P<hour>\d{2}):'
625 r'(?P<min>\d{2})'
626 r'(:'
627 r'(?P<sec>\d{2})'
628 r'(\.(?P<ms>\d{1,6}))?'
629 r')?'
630 ')?'
631 )
632
633 match_obj = xml_to_datetime.matcher.search(t)
634 if match_obj is None:
635 raise ValueError('Cannot convert XML time "{s}" to datetime'.format(s=t))
636
637 # print('groups', match_obj.groups())
638 # print('ms', match_obj.group('ms'))
639
640 year = int(match_obj.group('year'))
641
642 if match_obj.group('yday') is None:
643 # 2013-12-01
644 month = int(match_obj.group('month'))
645 day = int(match_obj.group('day'))
646 date = datetime(year, month, day)
647
648 else:
649 # 2013-300
650 doy = int(match_obj.group('yday'))
651 date = datetime(year, 1, 1) + timedelta(days=doy - 1)
652
653 hour = match_obj.group('hour')
654 if hour is None:
655 return date
656
657 hour = int(hour)
658 minute = int(match_obj.group('min'))
659
660 sec = match_obj.group('sec')
661 if sec is None:
662 sec = 0
663
664 else:
665 sec = int(sec)
666
667 ms = match_obj.group('ms')
668 if ms is None:
669 us = 0
670
671 else:
672 # Convert fractional seconds part to count of microseconds, taking into account
673 # the number of significant digits the user passed in
674 us = int(1e6 * int(ms) / pow(10, len(ms)))
675
676 return date.replace(hour=hour, minute=minute, second=sec, microsecond=us)
677
678 # else:
679 # print(match_obj.groups())
680 # # User supplied the day of year "2013-300..."
681 # if match_obj.group('hour') is None:
682 # # Only date
683 # return datetime.strptime(t, '%Y-%j')
684
685 # elif match_obj.group('ms') is None:
686 # # Date + time to seconds
687 # print(1)
688 # a = datetime.strptime(t, '%Y-%jT%H:%M:%S')
689 # print(2)
690 # return datetime.strptime(t, '%Y-%jT%H:%M:%S')
691
692 # else:
693 # # Date + time to milliseconds
694 # return datetime.strptime(t[:17], '%Y-%jT%H:%M:%S').replace(
695 # microsecond=int(match_obj.group('ms')) * 1000)
696
697
698xml_to_datetime.matcher = None
699
700
701def timedelta_to_xml(td):
702 """Convert a Python timedelta to an ISO formatted duration string.
703
704 >>> timedelta_to_xml(timedelta(days=7, minutes=3, seconds=5, microseconds=12345))
705 'P7DT3M5.012345S'
706
707 >>> timedelta_to_xml(timedelta(hours=3, minutes=20))
708 'PT3H20M'
709 """
710 # logger.debug('raw days {d} seconds {s} microseconds {u}'.format(
711 # d=td.days, s=td.seconds, u=td.seconds))
712 days = td.days
713 seconds = td.seconds % 60
714 minutes = (td.seconds // 60) % 60
715 hours = td.seconds // 3600
716 us = td.microseconds
717 # logger.debug('interpret ' + str(days) + ' ' + str(seconds) + ' ' +
718 # str(minutes) + ' ' + str(hours) + ' ' + str(us))
719 if td.seconds == 0 and td.microseconds == 0:
720 # exact number of days
721 if days == -1 or days == 0:
722 return 'PT0S'
723
724 s = ''
725
726 else:
727 if us != 0:
728 s = '{s:.6f}S'.format(s=seconds + us / 1000000.0)
729
730 elif seconds != 0:
731 s = '{s}S'.format(s=seconds)
732
733 else:
734 s = ''
735
736 # allows negative ISO 8601 format
737 return (
738 'P'
739 + ((str(days) + 'D') if days != 0 else '')
740 + ('T' if td.seconds > 0 or td.microseconds > 0 else '')
741 + (str(hours) + 'H' if hours > 0 else '')
742 + (str(minutes) + 'M' if minutes > 0 else '')
743 + s
744 )
745
746
747def xml_to_timedelta(t):
748 """Convert XML period (PnYnMnDTnHnMnS) to datetime.timedelta."""
749 if t == '0':
750 return timedelta()
751
752 if xml_to_timedelta.matcher is None:
753 xml_to_timedelta.matcher = re.compile( # (unused var) pylint: disable=W0612
754 r'^(?P<minus>-)?P((?P<day>\d+)D)?(T((?P<hour>\d+)H)?'
755 r'((?P<minute>\d+)M)?((?P<sec>[0-9.]+)S)?)?$'
756 )
757
758 res = xml_to_timedelta.matcher.search(t)
759 if res is None:
760 raise ValueError('Could not convert {t} to time interval'.format(t=t))
761
762 res_dict = res.groupdict()
763 # valid = False
764
765 # for k, v in res_dict.iteritems():
766 # if v is None:
767 # res_dict[k] = 0
768 # else:
769 # valid = True
770
771 # if valid == False:
772 # raise ValueError("Could not parse timedelta '"+t+"'")
773
774 if res_dict['day'] is None:
775 day = 0
776
777 else:
778 day = int(res_dict['day'])
779
780 if res_dict['hour'] is None:
781 hour = 0
782
783 else:
784 hour = int(res_dict['hour'])
785
786 if res_dict['minute'] is None:
787 minute = 0
788
789 else:
790 minute = int(res_dict['minute'])
791
792 ssec = res_dict.get('sec')
793 if ssec is None:
794 # no seconds specified
795 sec = 0
796 us = 0
797
798 else:
799 # look for fractional seconds
800 parts = ssec.split('.')
801 if len(parts) == 1:
802 # no, just whole seconds
803 sec = int(ssec)
804 us = 0
805
806 else:
807 # partial seconds present
808 sec = int(parts[0])
809 us = float('.' + parts[1]) * 1000000
810 # if parts[0][0] == '-':
811 # us = -us
812
813 seconds = sec + minute * 60 + hour * 3600 + day * 86400
814 res = timedelta(seconds=seconds, microseconds=us)
815 if res_dict['minus'] is not None:
816 res = -res
817
818 return res
819
820
821xml_to_timedelta.matcher = None
822
823
824class ParseException(Exception):
825 """XML parsing exception."""
826
827 def __init__(self, mess):
828 super(ParseException, self).__init__()
829 self.mess = mess
830
831 def __str__(self):
832 return repr(self.mess)
833
834
835class ParseExceptionOneExpected(ParseException):
836 """XML parsing exception where one child is expected."""
837
838 def __init__(self, node, cc, name):
839 super().__init__(f'{node} has {cc} child nodes of type "{name}", expecting 1')
840
841
842class ParseExceptionNoChild(ParseException):
843 """XML parsing exception where one child is expected."""
844
845 def __init__(self, node, name):
846 super().__init__(f'{node} has no child nodes of required type "{name}"')
847
848
849def xml_to_date(s):
850 """Convert a string containing either an ISO format date.
851
852 Input is either without time) or a date in format YYYY-MMM to a datetime object.
853 """
854 if not isinstance(s, str):
855 raise ParseException('Input to xml_to_date must be a string')
856
857 if len(s) == 10: # 2009-04-01
858 return datetime.strptime(s, '%Y-%m-%d')
859
860 if len(s) == 8: # 2009-100
861 return datetime.strptime(s, '%Y-%j')
862
863 raise ParseException(
864 'Format of date should be either YYYY-MM-DD (year/month/day of month) '
865 'or YYYY-DDD (year/day of year'
866 )
867
868
869def describe_node(node):
870 """Pretty print an ElementTree node."""
871 return 'Element {tag} at {file}:{line}'.format(
872 tag=node.tag, file=node.getroottree().docinfo.URL, line=node.sourceline
873 )
874
875
876# used to indicate a result has not yet been found
877INVALID = object()
878
879# indicate we have no default value
880NO_DEFAULT = object()
881
882
883def parsestr(elem):
884 """Return the test content of `elem` as a unicode object.
885
886 Leading and trailing whitespaces are removed and a string is always returned
887 even if empty.
888 """
889 raw = elem.text
890 if raw is None:
891 # `name` exists but contains no text
892 result = ''
893
894 else:
895 result = str(raw).strip()
896
897 return result
898
899
900def parsechildstr(
901 parent,
902 name,
903 default=NO_DEFAULT,
904 expand_env=False,
905 expand_settings=False,
906 expand_user=False,
907):
908 """Return the text content of a child `name` of XML node `parent`.
909
910 Allows optional default `value`.
911
912 An exception is raised if `name` does not exist and no default is specified.
913 An exception is also raised if there are multiple child nodes called `name`.
914 If `expand_env` is True then references to $ENVIRONMENT_VARIABLES will be expanded.
915 If `expand_settings` is True then references to {settings.VARIABLE} will be expanded.
916 If `expand_user` is True then references to ~username will be expanded to that
917 user's home directory.
918
919 TBD: see if we can use lxml findtext for speed.
920 """
921 result = INVALID
922 for child in parent.iterchildren(name):
923 if result is not INVALID:
924 raise ParseException(
925 '{parent} has multiple child nodes of type "{name}", '
926 'expecting 1'.format(parent=describe_node(parent), name=name)
927 )
928
929 raw = child.text
930 if raw is None:
931 # `name` exists but contains no text
932 result = ''
933
934 else:
935 result = str(raw).strip()
936 if expand_env:
937 result = os.path.expandvars(result)
938
939 if expand_settings:
940 # careful import settings from here as settings.py import xml.py indirectly
941 from chart.project import settings
942
943 result = result.format(settings=settings)
944
945 if expand_user:
946 result = os.path.expanduser(result)
947
948 if result is INVALID:
949 if default == NO_DEFAULT:
950 raise ParseException(
951 '{parent} has no child nodes of required name "{name}"'.format(
952 parent=describe_node(parent), name=name
953 )
954 )
955
956 return default
957
958 return result
959
960
961def parsechildint(parent, name, default=type):
962 """Return the text content if child `name` of parent node `parent`, converted to an integer.
963
964 If `child` is not present, or if present more than once, or if the content is not
965 an integer, ParseException is raised unless `default` is given.
966 """
967 if parent is None:
968 raise Exception('parsechildint called with null parent')
969
970 childs = parent.findall(name)
971 if len(childs) > 1:
972 raise ParseException(
973 '{parent} has {childs} child nodes of type {type}, expecting 1"'.format(
974 parent=describe_node(parent), childs=len(childs), type=name
975 )
976 )
977
978 if len(childs) == 0:
979 if default == type:
980 raise ParseException(
981 '{parent} has no child nodes of required type {name}'.format(
982 parent=describe_node(parent), name=name
983 )
984 )
985
986 return default
987
988 s = childs[0].text
989 try:
990 return int(s)
991 except ValueError:
992 raise ParseException(
993 '{node} is not an integer found {s}'.format(
994 node=describe_node(childs[0]), s=s
995 )
996 )
997
998
999def parsechildfloat(parent, name, default=type, allow_eval=False):
1000 """Return the text content if child `name` of parent node `parent`, converted to a float.
1001
1002 If `child` is not present, or if present more than once, or if the content is not
1003 a float, ParseException is raised unless `default` is given.
1004 """
1005 childs = parent.findall(name)
1006 if len(childs) > 1:
1007 raise ParseException(
1008 '{parent} has {count} child nodes of type {name}, expecting 1'.format(
1009 count=len(childs), parent=describe_node(parent), name=len(childs)
1010 )
1011 )
1012
1013 if len(childs) == 0:
1014 if default == type:
1015 raise ParseException(
1016 '{parent} has no child nodes of required type {name}'.format(
1017 parent=describe_node(parent), name=name
1018 )
1019 )
1020
1021 return default
1022
1023 s = childs[0].text
1024
1025 if s is None:
1026 if default != type:
1027 return default
1028
1029 raise ParseException(
1030 '{node}: Cannot interpret empty node {name} as a float'.format(
1031 node=describe_node(parent), name=name
1032 )
1033 )
1034
1035 try:
1036 return float(s)
1037 except ValueError:
1038 if allow_eval:
1039 try:
1040 return float(eval(s)) # (use of eval) pylint: disable=W0123
1041 except Exception:
1042 raise ParseException(
1043 '{node} could not be evaluated (found {s})'.format(
1044 node=describe_node(childs[0]), s=s
1045 )
1046 )
1047
1048 raise ParseException(
1049 '{node} is not numeric (found {s})'.format(
1050 node=describe_node(childs[0]), s=s
1051 )
1052 )
1053
1054
1055def parsechilddatetime(parent, name, default=type):
1056 """Return the text content if child `name` of parent node `parent`.
1057
1058 Result is converted to a Python datetime.
1059
1060 If `child` is not present, or if present more than once, or if the content is not
1061 an ISO format datetime, ParseException is raised unless `default` is given.
1062 """
1063 childs = parent.findall(name)
1064 if len(childs) > 1:
1065 raise ParseExceptionOneExpected(
1066 node=describe_node(parent), cc=len(childs), name=name
1067 )
1068
1069 if len(childs) == 0:
1070 if default == type:
1071 raise ParseExceptionNoChild(node=describe_node(parent), name=name)
1072
1073 return default
1074
1075 s = childs[0].text
1076
1077 if s is None:
1078 if default == type:
1079 raise ParseExceptionNoChild(node=describe_node(parent), name=name)
1080
1081 return default
1082
1083 try:
1084 return xml_to_datetime(s)
1085 except Exception:
1086 raise ParseException(
1087 '{node} is not a valid XML date (found {s})'.format(
1088 node=describe_node(childs[0]), s=s
1089 )
1090 )
1091
1092
1093def parsechildtimedelta(parent, name, default=type):
1094 """Return the text content if child `name` of parent node `parent`.
1095
1096 Result is converted to a Python timedelta.
1097
1098 If `child` is not present, or if present more than once, or if the content is not
1099 an ISO format duration, ParseException is raised unless `default` is given.
1100 """
1101 childs = parent.findall(name)
1102 if len(childs) > 1:
1103 raise ParseException(
1104 '{parent} has {actual} child nodes of type {type}, expecting 1'.format(
1105 parent=describe_node(parent), actual=len(childs), type=name
1106 )
1107 )
1108
1109 if len(childs) == 0:
1110 if default == type:
1111 raise ParseException(
1112 '{parent} has no child nodes of required type {name}'.format(
1113 parent=describe_node(parent), name=name
1114 )
1115 )
1116
1117 return default
1118
1119 s = childs[0].text
1120 try:
1121 return xml_to_timedelta(s)
1122 except ValueError:
1123 raise ParseException(
1124 '{node} does not contain a valid XML duration (found "{act}")'.format(
1125 node=describe_node(childs[0]), act=s
1126 )
1127 )
1128
1129
1130def parsechildbool(parent, name, default=type, env_expand=False):
1131 """Return the text content if child `name` of parent node `parent`, converted to a boolean.
1132
1133 If `child` is not present, or if present more than once, or if the content is not
1134 an boolean, ParseException is raised unless `default` is given.
1135 Boolean false is returned if the 1st letter of the node text is F, f or 0.
1136 Boolean true is returned if the 1st letter of the node text is T, t or 1.
1137 """
1138 childs = parent.findall(name)
1139 if len(childs) > 1:
1140 raise ParseExceptionOneExpected(
1141 node=describe_node(parent), cc=len(childs), name=name
1142 )
1143
1144 if len(childs) == 0:
1145 if default == type:
1146 raise ParseExceptionNoChild(node=describe_node(parent), name=name)
1147
1148 return default
1149
1150 s = childs[0].text
1151 if s is None:
1152 if default == type:
1153 raise ParseExceptionNoChild(node=describe_node(parent), name=name)
1154
1155 return default
1156
1157 if env_expand is True:
1158 s = os.path.expandvars(s)
1159
1160 if len(s) > 0 and s[0] in ('f', 'F', '0'):
1161 return False
1162
1163 if len(s) > 0 and s[0] in ('t', 'T', '1'):
1164 return True
1165
1166 raise ParseException(
1167 '{node} is not a recognised boolean (found {s})'.format(
1168 node=describe_node(childs[0]), s=s
1169 )
1170 )
1171
1172
1173def parsechildstringlist(parent, name, childname, default=NO_DEFAULT):
1174 """Return a list of child strings.
1175
1176 >>> parsechildstringlist(parse_xml('<a><b><c>e</c><c>f</c></b></a>'), 'b', 'c')
1177 ['e', 'f']
1178 """
1179 childs = parent.findall(name)
1180 if len(childs) == 0:
1181 if default is NO_DEFAULT:
1182 raise ParseException(
1183 '{node} has no children of required type {name}'.format(
1184 node=describe_node(parent), name=name
1185 )
1186 )
1187
1188 return default
1189
1190 if len(childs) != 1:
1191 raise ParseExceptionOneExpected(
1192 node=describe_node(parent), cc=len(childs), name=name
1193 )
1194
1195 res = []
1196 for child in childs[0].findall(childname):
1197 res.append(child.text.strip())
1198
1199 return res
1200
1201
1202def parsechildstrs(parent_elem,
1203 name,
1204 default=[], # (dangerous default) pylint: disable=W0102
1205 expand_settings=False):
1206 """Return a list of child strings.
1207
1208 >>> parsechildstrs(parse_xml('<a><b>e</b><b>f</b></a>'), 'b')
1209 ['e', 'f']
1210 """
1211 res = []
1212 for child_elem in parent_elem.findall(name):
1213 res.append(child_elem.text.strip())
1214
1215 if default is type and len(res) == 0:
1216 res = default
1217
1218 if expand_settings:
1219 from chart.project import settings
1220 res = [r.format(settings=settings) for r in res]
1221
1222 return res
1223
1224
1225def parsechildfloats(parent_elem, name):
1226 """Return a list of child strings.
1227
1228 >>> node = parse_xml('<a><b>1</b><b>2</b></a>')
1229 >>> parsechildfloats(node, 'b')
1230 [1.0, 2.0]
1231 """
1232 res = []
1233 for child_elem in parent_elem.findall(name):
1234 res.append(float(child_elem.text.strip()))
1235
1236 return res
1237
1238
1239def is_xml_comment(elem):
1240 """Test if element `elem` an XML comment."""
1241 return isinstance(elem, etree._Comment)
1242
1243
1244def mixed_decoder(err):
1245 """Allow binary containing latin-1 symbols to be decoded with the utf-8 codec."""
1246 REPLACEMENT = {
1247 b'\xb0': '\u00b0', # degrees symbol
1248 b'\xb1': '\u00b1', # plusminus symbol
1249 }
1250
1251 # obj = err.object[err.start: err.end]
1252 err_start = err.object
1253 err_len = err.end - err.start
1254 next_pos = err.start + err_len
1255 err = err_start[err.start : err.end]
1256
1257 if err in REPLACEMENT:
1258 return REPLACEMENT[err], next_pos
1259
1260 return err
1261
1262
1263def main():
1264 """Command line entry point."""
1265 from chart.common.args import ArgumentParser
1266
1267 parser = ArgumentParser()
1268 parser.add_argument('--time', '-t', type=xml_to_datetime)
1269 # parser.add_argument('--timedelta', '--dt', type=ArgumentParser.timedelta)
1270 parser.add_argument('--timedelta', '--dt')
1271 parser.add_argument('--minus', action='store_true')
1272
1273 args = parser.parse_args()
1274
1275 if args.minus:
1276 args.timedelta = '-' + args.timedelta
1277
1278 if args.time:
1279 print('Datetime : ', args.time)
1280 print('Day of year : ', args.time.timetuple().tm_yday)
1281
1282 if args.timedelta:
1283 td = xml_to_timedelta(args.timedelta)
1284 print('Datetime : ', td)
1285 print(td.days, td.seconds, td.microseconds)
1286
1287
1288if __name__ == '__main__':
1289 main()