1#!/usr/bin/env python3
2
3"""Retrieve the ACTIVITY xml files, with caching."""
4
5import ast
6import logging
7from enum import Enum
8
9from chart.common.path import Path
10from chart.project import settings
11from chart.common.xml import XMLElement
12from chart.common.decorators import memoized
13from chart.common.decorators import memoized2
14from chart.common.exceptions import ConfigError
15from chart.common.xml import XMLSyntaxError
16from chart.browse.make_url import make_url
17from chart.project import SID
18
19CLASS_ALGORITHM = 'algorithm'
20CLASS_REPORT = 'report'
21
22ELEM_EXECUTABLE = 'executable'
23ELEM_URL = 'url'
24ELEM_TEST = 'test'
25ELEM_EVENT = 'event'
26ELEM_RELATED = 'related'
27ELEM_GROUP = 'group'
28ELEM_DESCRIPTION = 'description'
29ELEM_REPORT = 'report'
30ELEM_SID = 'sid'
31ELEM_USE_REPORT_LOG = 'use-report-log'
32ELEM_TYPE = 'type'
33ELEM_TABLE = 'table'
34ELEM_WITH_STATS = 'with-stats'
35ELEM_CLASS = 'class'
36ELEM_CALLING_CONVENTION = 'calling-convention'
37ELEM_ENABLED = 'enabled'
38ELEM_EXECUTABLE = 'executable'
39ELEM_TRIGGER = 'trigger'
40ELEM_OUTPUT_TABLE = 'output-table'
41ELEM_TEMPLATE = 'template'
42ELEM_DESCRIPTION = 'description'
43ELEM_LATENCY = 'latency'
44ELEM_PRIORITY = 'priority'
45ELEM_WEIGHT = 'weight'
46ELEM_CONFIGURATION = 'configuration'
47ELEM_PARAMETER = 'parameter'
48ELEM_NAME = 'name'
49ELEM_DATATYPE = 'datatype'
50ELEM_VALUE = 'value'
51ELEM_TIMEOUT = 'timeout'
52
53# Allow high priority (higher numbers) or lower (lower or negative numbers) activities
54DEFAULT_PRIORITY = 1.0
55
56# Allow an activity to class itself as needing more resources than others. Each worker process
57# is started with a maximum weight it will accept
58DEFAULT_WEIGHT = 1.0
59
60logger = logging.getLogger()
61
62
63class Configuration:
64 """Activity configuration options."""
65 pass
66
67class CallingConvention(Enum):
68 """Allowed <calling-convention> values in the activities XML file."""
69
70 NONE = 'none'
71 FILENAME = 'filename'
72 SID_TIMERANGE = 'sid+timerange'
73 TIMERANGE = 'timerange'
74 SID_TIMERANGE_TABLE = 'sid+timerange+table'
75 EDL = 'edl'
76
77CallingConvention.NONE.description = 'System job with no parameters'
78CallingConvention.FILENAME.description = 'Process a file from a local directory'
79CallingConvention.SID_TIMERANGE.description = 'Pass a Source-ID and start/stop times'
80CallingConvention.SID_TIMERANGE_TABLE.description = \
81 'Pass a Source-ID, start/stop times and a table name'
82CallingConvention.EDL.description = \
83 'Process a product from the EUMETSAT DataLake via stored collection and name. '\
84 'SID and start time should be set as job is created'
85
86
87class UnknownActivity(Exception):
88 """Attempt to create an Activity with an unknown name."""
89
90 def __init__(self, name):
91 super(UnknownActivity, self).__init__()
92 self.name = name
93
94 def __str__(self):
95 return 'Unknown activity {name}'.format(name=self.name)
96
97
98class MissingAttribute(Exception):
99 """A client tried to validate a job to an activity but an attribute was missing."""
100
101 def __init__(self, message):
102 super(MissingAttribute, self).__init__()
103 self.message = message
104
105 def __str__(self):
106 return 'Missing attribute: {message}'.format(message=self.message)
107
108
109class ActivityTrigger:
110 """Representation of a <trigger> element of an activity."""
111
112 def __init__(self, trigger_type=None, table=None, with_stats=None, latency=None):
113 self.trigger_type = trigger_type
114 self.table = table
115 self.with_stats = with_stats
116 self.latency = latency
117
118 def __repr__(self):
119 result = []
120 if self.table is not None:
121 result.append(self.table.name)
122
123 if self.trigger_type is not None:
124 result.append(self.trigger_type.value)
125
126 if self.with_stats:
127 result.append('with-stats')
128
129 return 'ActivityTrigger({parts})'.format(parts=', '.join(result))
130
131 def __getitem__(self, arg):
132 if arg == 'type':
133 return self.trigger_type.value
134
135 return self.__dict__[arg]
136
137
138class ActivityTriggerType(Enum):
139 """Represent the <activity> <type> text."""
140
141 ORBITAL = 'orbital'
142 DAILY = 'daily'
143 THREEDAILY = 'threedaily'
144 HOURLY = 'hourly'
145 TWENTYMINUTE = '20min'
146
147
148# don't use memoized because it breaks isinstance
149class Activity:
150 """Implementation of Activity class."""
151
152 _cache = {}
153
154 class __metaclass__(type):
155 """Cached constructor implementation."""
156
157 def __call__(cls, *args, **kwargs):
158 return cls.__new__(cls, *args, **kwargs)
159
160 def __new__(cls, name):
161 res = cls._cache.get(name)
162 if res is not None:
163 return res
164
165 res = object.__new__(cls)
166 cls._cache[name] = res
167 res.__init__(name)
168 return res
169
170 def __init__(self, name):
171 """Contructor.
172
173 `name` can be either an actual filename of an activity file, or the
174 name of an XML file in the project Activities directory."""
175 if '/' in name or '.' in name:
176 filename = Path(name)
177 try:
178 if filename.exists():
179 # local activity file
180 try:
181 self.elem = XMLElement(filename=filename)
182 except XMLSyntaxError:
183 raise UnknownActivity(name)
184
185 else:
186 raise IOError('{name} not found'.format(name=name))
187
188 except TypeError:
189 # can happen if a web client sends a really weird string
190 raise UnknownActivity('<>')
191
192 else:
193 # from the project activities directory
194 name = name.upper()
195 self.elem = None
196 for activity_dir in settings.ACTIVITY_DIRS:
197 fullname = activity_dir.child(name + '.xml')
198 if fullname.exists():
199 self.elem = XMLElement(filename=fullname)
200
201 if self.elem is None:
202 if '-' in name and '_' not in name:
203 raise UnknownActivity(
204 'Unknown activity "{name}" (did you mean "{attempt}"?)'.format(
205 name=name, attempt=name.replace('-', '_')))
206
207 raise UnknownActivity(name)
208
209 self.name = name
210
211 self.classname = self.elem.parse_str(ELEM_CLASS)
212 if self.classname not in (CLASS_ALGORITHM, CLASS_REPORT):
213 raise ConfigError('<class> in activity {activity} is not one of: '
214 'algorithm, report'.format(activity=self.name))
215
216 self.convention = CallingConvention(self.elem.parse_str(ELEM_CALLING_CONVENTION))
217
218 def __lt__(self, other):
219 return self.name < other.name
220
221 def __eq__(self, other):
222 return self.name == other.name
223
224 def __str__(self):
225 return 'Activity({name})'.format(name=self.name)
226
227 def __repr__(self):
228 return self.__str__()
229
230 def __hash__(self):
231 return hash(self.name)
232
233 @property
234 def description(self):
235 """Return the <description> of the activity."""
236 return self.elem.parse_str(ELEM_DESCRIPTION, None)
237
238 @property
239 def group(self):
240 """Return the <group> of the activity.
241
242 Should exist for reports only, used by the report viewer.
243 """
244 return self.elem.parse_str(ELEM_GROUP, None)
245
246 @property
247 def related(self):
248 """Like all <related><report> nodes."""
249 related_elem = self.elem.find(ELEM_RELATED)
250 if related_elem is None:
251 return []
252
253 return related_elem.parse_strs(ELEM_REPORT)
254
255 @property
256 def docstring(self):
257 """Pull the module level docstring from the code."""
258 # generate abstract syntax tree of the executable
259 # (assumes it is Python code)
260 tree = ast.parse(self.abs_executable.open().read())
261 # walk through the top level elements
262 # for elem in tree.body:
263 # look for strings
264 # if type(elem.value) == ast.Str:
265 # return the first one found as the docstring
266 # return elem.value.s
267
268 # no top level string objects found
269 return ast.get_docstring(tree)
270
271 @property
272 def enabled(self):
273 """Return the <enabled> node of the activity as a boolean."""
274 return self.elem.parse_bool(ELEM_ENABLED, True)
275
276 @property
277 def use_report_log(self):
278 """Return the <use-report-log> node of the activity as a boolean."""
279 return self.elem.parse_bool(ELEM_USE_REPORT_LOG, False)
280
281 @property
282 def url(self):
283 """Return the <url> element of the activity."""
284 return self.elem.parse_str(ELEM_URL, None, expand_settings=True)
285
286 @property
287 def browse_url(self):
288 """Return a URL which can browse an auto-generated description of the activity."""
289 from django.urls import reverse
290 return reverse('backend:activities/single',
291 kwargs=dict(activityname=self.name))
292
293 @property
294 def browse_source_url(self):
295 """See the Activity XML source code in the browser."""
296 return make_url(self.elem.filename())
297
298 @property
299 def browse_executable_url(self):
300 """Return a url which can be used to browse to the activity source XML file."""
301 exe = self.abs_executable
302 if exe is None:
303 return None
304
305 return make_url(exe)
306
307 @property
308 def browse_template_url(self):
309 """Return a browseable URL to the report template."""
310 if self.template is not None:
311 return make_url(settings.REPORT_TEMPLATE_DIR.child(self.template))
312
313 return None
314
315 @property
316 def executable(self):
317 """Return the content of the <executable> element."""
318 exe = self.elem.parse_str(ELEM_EXECUTABLE, None, expand_settings=True)
319 if exe is None and self.template is not None:
320 # Allow the executable name to be ommited for report Activities
321 exe = 'cmd/report.py'
322
323 return Path(exe)
324
325 @property
326 def abs_executable(self):
327 """Return the absolute activity executable filename."""
328 exe = self.executable
329
330 # Check for absolute path
331 if exe.is_file():
332 return exe
333
334 # Check for relative to DU root
335 if getattr(settings, 'DU_DIR', None) is not None and settings.DU_DIR.child(exe).exists():
336 return settings.DU_DIR.joinpath(exe)
337
338 # Check for relative to project root
339 if settings.PROJECT_HOME_DIR.child(exe).exists():
340 return settings.PROJECT_HOME_DIR.joinpath(exe)
341
342 # Check for relative to core root
343 if settings.CORE_HOME_DIR.child(exe).exists():
344 return settings.CORE_HOME_DIR.joinpath(exe)
345
346 raise ConfigError('Algorithm {alg} for activity {act} not found in {dirs}'.format(
347 alg=exe,
348 act=self.name,
349 dirs=', '.join((str(settings.PROJECT_HOME_DIR), str(settings.CORE_HOME_DIR)))))
350
351 @property
352 def test(self):
353 """Return the content of the <test> element.
354
355 This gives the location of an automated test verifying this activity.
356 """
357 # I don't think this is used anywhere
358 res = self.elem.parse_str(ELEM_TEST, None)
359 if res is None:
360 return None
361
362 return settings.PROJECT_HOME_DIR.joinpath(res)
363
364 @property
365 def browse_test_url(self):
366 """Return the a source browseable link to the file pointed to by the <test> element."""
367 if self.test is None:
368 return None
369
370 return make_url(self.test)
371
372 @property
373 @memoized2
374 def triggers(self):
375 """Return the trigger tables for this activity as a list.
376
377 Each element consists of a dictionary with keys
378 `type` (should be 'orbital')
379 and `tablename` (a string).
380
381 There is a weird bit of the algorithm that means if the activity has something like
382
383 <trigger>
384 <table>A</>
385 <table>B</>
386 </>
387
388 This gets yielded as:
389
390 ActivityTrigger(table='A'),
391 ActivityTrigger(table='B'),
392
393 I'm not sure if this is needed we should probably just block <trigger> elements
394 from containing multiple tables but the code allows it.
395 """
396 # avoid circular dependency
397 from chart.db.model.table import TableInfo
398
399 res = []
400 for trigger_elem in self.elem.findall(ELEM_TRIGGER):
401 if self.convention not in (CallingConvention.SID_TIMERANGE,
402 CallingConvention.SID_TIMERANGE_TABLE):
403 raise ConfigError('{name}: Actions with triggers must have calling '
404 'convention sid+timerange'.format(name=self.name))
405
406 trigger_types = trigger_elem.parse_str(ELEM_TYPE, None)
407 if trigger_types is None:
408 trigger_type = None
409
410 else:
411 trigger_type = ActivityTriggerType(trigger_types)
412
413 tables = trigger_elem.parse_str(ELEM_TABLE, None)
414 if tables is None:
415 table = None
416
417 else:
418 table = TableInfo(tables)
419
420 res.append(ActivityTrigger(
421 trigger_type=trigger_type,
422 table=table,
423 with_stats=trigger_elem.parse_bool(ELEM_WITH_STATS, False),
424 latency=trigger_elem.parse_timedelta(ELEM_LATENCY, None)))
425
426 return res
427
428 @property
429 def output_tables(self):
430 """List of tables written to by this activity."""
431 # avoid circular dependency
432 from chart.db.model.table import TableInfo
433 res = []
434
435 for t in self.elem.parse_strs(ELEM_OUTPUT_TABLE):
436 res.append(TableInfo(t))
437
438 return res
439
440 @property
441 def allow_multiple(self):
442 """Return true if this activity can process more than one job in a single invocation."""
443 return self.classname == CLASS_ALGORITHM
444
445 @property
446 @memoized2
447 def sids(self):
448 """Return a list of strings covering any <sid> elements. Wildcards not expanded."""
449 result = []
450 for sid_elem in self.elem.findall(ELEM_SID):
451 result.append(SID.from_string(sid_elem.text.strip(), wildcard=True))
452
453 return result
454
455 def match_sid(self, other):
456 """Test if this activity applies to this sid.
457
458 Some activity.xml files contain <sid> elements restricting triggers
459 to only certain sids.
460 Wildcards are allowed.
461 """
462 # if there were no clauses, the test passes
463 # return not has_clauses
464 count = 0
465 for sid in self.sids:
466 if sid.match(other):
467 return True
468
469 count += 1
470
471 # with no filters, everything matches
472 return count == 0
473
474 @property
475 def events(self):
476 """List of events raised by this activity."""
477 # circular dependancy
478 from chart.events.eventclass import EventClass
479 res = []
480 for e in self.elem.parse_strs(ELEM_EVENT):
481 res.append(EventClass(e))
482
483 return res
484
485 @property
486 def eventnames(self):
487 """List of events raised by this activity."""
488 return self.elem.parse_strs(ELEM_EVENT)
489
490 @property
491 def template(self):
492 """Name of template, used by xml_report type activities."""
493 tmpl = self.elem.parse_str(ELEM_TEMPLATE, None)
494 if tmpl is None:
495 return None
496
497 return Path(tmpl)
498
499 @property
500 def template_description(self):
501 """Return description field of the report template."""
502 if self.template is None:
503 return None
504
505 root = XMLElement(filename=settings.REPORT_TEMPLATE_DIR.child(self.template))
506 return root.parse_str(ELEM_DESCRIPTION, None)
507
508 def validate_call(self,
509 sensing_start=None,
510 sensing_stop=None,
511 sid=None,
512 orbit=None,
513 tablename=None,
514 filename=None):
515 """Check this job can be called with the supplied parameters."""
516 if sid == SID():
517 sid = None
518
519 missing = []
520 superflous = []
521 if self.convention in (CallingConvention.TIMERANGE,
522 CallingConvention.SID_TIMERANGE,
523 CallingConvention.SID_TIMERANGE_TABLE):
524 if sensing_start is None and orbit is None:
525 missing.append('sensing_start')
526
527 if sensing_stop is None and orbit is None:
528 missing.append('sensing_stop')
529
530 else:
531 if orbit is not None:
532 superflous.append('orbit')
533
534 if sensing_start is not None:
535 superflous.append('sensing_start')
536
537 if sensing_stop is not None:
538 superflous.append('sensing_stop')
539
540 if self.convention is CallingConvention.FILENAME:
541 if filename is None:
542 missing.append('filename')
543
544 elif filename is not None:
545 superflous.append('filename')
546
547 if self.convention in (CallingConvention.SID_TIMERANGE,
548 CallingConvention.SID_TIMERANGE_TABLE):
549 if sid is None:
550 missing.append('sid')
551
552 # elif sid is not None:
553 # superflous.append('sid')
554
555 if self.convention is CallingConvention.SID_TIMERANGE_TABLE:
556 if tablename is None:
557 missing.append('tablename')
558
559 elif tablename is not None:
560 superflous.append('tablename')
561
562 errors = []
563 if len(missing) > 0:
564 errors.append('Missing attributes: {miss}'.format(miss=', '.join(missing)))
565
566 if len(superflous) > 0:
567 errors.append('Superflous attributes: {super}'.format(super=', '.join(superflous)))
568
569 if len(errors) == 0:
570 return
571
572 raise MissingAttribute('. '.join(errors))
573
574 @property
575 def priority(self):
576 """Read the priority for this activity."""
577 return self.elem.parse_float(ELEM_PRIORITY, DEFAULT_PRIORITY)
578
579 @property
580 def weight(self):
581 """Read the weighting for this activity."""
582 return self.elem.parse_float(ELEM_WEIGHT, DEFAULT_WEIGHT)
583
584 @property
585 def timeout(self):
586 """Read the timeout for this activity."""
587 return self.elem.parse_timedelta(ELEM_TIMEOUT, None)
588
589 # @memoized2 - maybe worth it
590 @property
591 def configuration(self):
592 """Read activity <configuration> options"""
593 config_elem = self.elem.find(ELEM_CONFIGURATION)
594 if config_elem is None:
595 return None
596
597 result = Configuration()
598 for param_elem in config_elem.findall(ELEM_PARAMETER):
599 name = param_elem.parse_str(ELEM_NAME)
600 # <description> is ignored and this return type is nice and simple
601 # for clients to use
602 datatype = param_elem.parse_str(ELEM_DATATYPE)
603 # really, really should use a clever function from chart.common.traits here
604 # I'm only doing this because it's the initial implementation and only SYNTHETICS.xml
605 # is using configuration for now.
606 # If the system can used for anything else then it needs to call traits.py functions
607 # here - do not extend this code for more datatypes
608 if datatype == 'path':
609 setattr(result, name, Path(param_elem.parse_str(ELEM_VALUE)))
610
611 return result
612
613@memoized
614def all_activities():
615 """Return a list of all Activity objects.
616
617 Can also be accessed as Activity.all()
618
619 >>> print len(Activity.all()) # doctest: +SKIP
620 ...
621 """
622 res = []
623 for activity_dir in settings.ACTIVITY_DIRS:
624 for filename in sorted(activity_dir.glob('*.xml')):
625 res.append(Activity(filename.stem))
626
627 return res
628
629
630Activity.all = all_activities # (redefining builtin all) pylint: disable=W0622, W0612
631Activity.UnknownActivity = UnknownActivity # (unused variable) pylint: disable=W0612
632
633
634@memoized
635def all_reports():
636 """Return all report Activities."""
637 return [a for a in all_activities() if a.classname == 'report']
638
639
640Activity.all_reports = all_reports
641
642
643@memoized
644def find_by_exe(exe):
645 """Given an executable name, find the Activity which refers to it."""
646 res = []
647 for a in Activity.all():
648 if a.executable == exe:
649 res.append(a)
650
651 return res
652
653
654Activity.find_by_exe = find_by_exe
655
656
657def display_activity(activity_name):
658 """Display all information on `activity_name`."""
659 from chart.common.prettyprint import Table
660 a = Activity(activity_name)
661 t = Table()
662 t.append(('Name', a.name))
663 t.append(('Description', a.description))
664 t.append(('URL', a.url))
665 t.append(('Enabled', a.enabled))
666 t.append(('Calling convention', a.convention.value))
667 t.append(('Docstring', a.docstring))
668 if len(a.sids) > 0:
669 t.append(('SID', ', '.join(str(s) for s in a.sids)))
670
671 else:
672 t.append(('SID', 'unrestricted'))
673
674 if a.template:
675 t.append(('Template description', a.template_description))
676
677 t.append(('Browse source url', a.browse_source_url))
678 t.append(('Browse exe url', a.browse_executable_url))
679 t.append(('Browse test url', a.browse_test_url))
680 t.append(('Browse template url', a.browse_template_url))
681
682 t.write()
683
684
685def list_activities():
686 """Show list of all activities as a nice table."""
687 from chart.common.prettyprint import Table
688 t = Table(headings=('Name', 'Calling', 'Description'))
689 for a in Activity.all():
690 t.append((a.name, a.convention.value, a.description))
691
692 t.write()
693
694
695def list_orbital_activities():
696 """Show the names of all activities with per-orbit triggering."""
697 for a in Activity.all():
698 has_orbital_trigger = False
699 for t in a.triggers:
700 if t['type'] == 'orbital':
701 has_orbital_trigger = True
702
703 if has_orbital_trigger:
704 print(a.name)
705
706
707def test_match(activity, sid):
708 """Check if `activity` is enabled for `sid`."""
709 print('Testing {a} against {s}...'.format(a=activity, s=sid))
710 print(activity.match_sid(sid))