1#!/usr/bin/env python3
  2
  3"""Retrieve the ACTIVITY xml files, with caching."""
  4
  5import ast
  6import logging
  7from enum import Enum
  8
  9from chart.common.path import Path
 10from chart.project import settings
 11from chart.common.xml import XMLElement
 12from chart.common.decorators import memoized
 13from chart.common.decorators import memoized2
 14from chart.common.exceptions import ConfigError
 15from chart.common.xml import XMLSyntaxError
 16from chart.browse.make_url import make_url
 17from chart.project import SID
 18
 19CLASS_ALGORITHM = 'algorithm'
 20CLASS_REPORT = 'report'
 21
 22ELEM_EXECUTABLE = 'executable'
 23ELEM_URL = 'url'
 24ELEM_TEST = 'test'
 25ELEM_EVENT = 'event'
 26ELEM_RELATED = 'related'
 27ELEM_GROUP = 'group'
 28ELEM_DESCRIPTION = 'description'
 29ELEM_REPORT = 'report'
 30ELEM_SID = 'sid'
 31ELEM_USE_REPORT_LOG = 'use-report-log'
 32ELEM_TYPE = 'type'
 33ELEM_TABLE = 'table'
 34ELEM_WITH_STATS = 'with-stats'
 35ELEM_CLASS = 'class'
 36ELEM_CALLING_CONVENTION = 'calling-convention'
 37ELEM_ENABLED = 'enabled'
 38ELEM_EXECUTABLE = 'executable'
 39ELEM_TRIGGER = 'trigger'
 40ELEM_OUTPUT_TABLE = 'output-table'
 41ELEM_TEMPLATE = 'template'
 42ELEM_DESCRIPTION = 'description'
 43ELEM_LATENCY = 'latency'
 44ELEM_PRIORITY = 'priority'
 45ELEM_WEIGHT = 'weight'
 46ELEM_CONFIGURATION = 'configuration'
 47ELEM_PARAMETER = 'parameter'
 48ELEM_NAME = 'name'
 49ELEM_DATATYPE = 'datatype'
 50ELEM_VALUE = 'value'
 51ELEM_TIMEOUT = 'timeout'
 52
 53# Allow high priority (higher numbers) or lower (lower or negative numbers) activities
 54DEFAULT_PRIORITY = 1.0
 55
 56# Allow an activity to class itself as needing more resources than others. Each worker process
 57# is started with a maximum weight it will accept
 58DEFAULT_WEIGHT = 1.0
 59
 60logger = logging.getLogger()
 61
 62
 63class Configuration:
 64    """Activity configuration options."""
 65    pass
 66
 67class CallingConvention(Enum):
 68    """Allowed <calling-convention> values in the activities XML file."""
 69
 70    NONE = 'none'
 71    FILENAME = 'filename'
 72    SID_TIMERANGE = 'sid+timerange'
 73    TIMERANGE = 'timerange'
 74    SID_TIMERANGE_TABLE = 'sid+timerange+table'
 75    EDL = 'edl'
 76
 77CallingConvention.NONE.description = 'System job with no parameters'
 78CallingConvention.FILENAME.description = 'Process a file from a local directory'
 79CallingConvention.SID_TIMERANGE.description = 'Pass a Source-ID and start/stop times'
 80CallingConvention.SID_TIMERANGE_TABLE.description = \
 81    'Pass a Source-ID, start/stop times and a table name'
 82CallingConvention.EDL.description = \
 83    'Process a product from the EUMETSAT DataLake via stored collection and name. '\
 84    'SID and start time should be set as job is created'
 85
 86
 87class UnknownActivity(Exception):
 88    """Attempt to create an Activity with an unknown name."""
 89
 90    def __init__(self, name):
 91        super(UnknownActivity, self).__init__()
 92        self.name = name
 93
 94    def __str__(self):
 95        return 'Unknown activity {name}'.format(name=self.name)
 96
 97
 98class MissingAttribute(Exception):
 99    """A client tried to validate a job to an activity but an attribute was missing."""
100
101    def __init__(self, message):
102        super(MissingAttribute, self).__init__()
103        self.message = message
104
105    def __str__(self):
106        return 'Missing attribute: {message}'.format(message=self.message)
107
108
109class ActivityTrigger:
110    """Representation of a <trigger> element of an activity."""
111
112    def __init__(self, trigger_type=None, table=None, with_stats=None, latency=None):
113        self.trigger_type = trigger_type
114        self.table = table
115        self.with_stats = with_stats
116        self.latency = latency
117
118    def __repr__(self):
119        result = []
120        if self.table is not None:
121            result.append(self.table.name)
122
123        if self.trigger_type is not None:
124            result.append(self.trigger_type.value)
125
126        if self.with_stats:
127            result.append('with-stats')
128
129        return 'ActivityTrigger({parts})'.format(parts=', '.join(result))
130
131    def __getitem__(self, arg):
132        if arg == 'type':
133            return self.trigger_type.value
134
135        return self.__dict__[arg]
136
137
138class ActivityTriggerType(Enum):
139    """Represent the <activity> <type> text."""
140
141    ORBITAL = 'orbital'
142    DAILY = 'daily'
143    THREEDAILY = 'threedaily'
144    HOURLY = 'hourly'
145    TWENTYMINUTE = '20min'
146
147
148# don't use memoized because it breaks isinstance
149class Activity:
150    """Implementation of Activity class."""
151
152    _cache = {}
153
154    class __metaclass__(type):
155        """Cached constructor implementation."""
156
157        def __call__(cls, *args, **kwargs):
158            return cls.__new__(cls, *args, **kwargs)
159
160    def __new__(cls, name):
161        res = cls._cache.get(name)
162        if res is not None:
163            return res
164
165        res = object.__new__(cls)
166        cls._cache[name] = res
167        res.__init__(name)
168        return res
169
170    def __init__(self, name):
171        """Contructor.
172
173        `name` can be either an actual filename of an activity file, or the
174        name of an XML file in the project Activities directory."""
175        if '/' in name or '.' in name:
176            filename = Path(name)
177            try:
178                if filename.exists():
179                    # local activity file
180                    try:
181                        self.elem = XMLElement(filename=filename)
182                    except XMLSyntaxError:
183                        raise UnknownActivity(name)
184
185                else:
186                    raise IOError('{name} not found'.format(name=name))
187
188            except TypeError:
189                # can happen if a web client sends a really weird string
190                raise UnknownActivity('<>')
191
192        else:
193            # from the project activities directory
194            name = name.upper()
195            self.elem = None
196            for activity_dir in settings.ACTIVITY_DIRS:
197                fullname = activity_dir.child(name + '.xml')
198                if fullname.exists():
199                    self.elem = XMLElement(filename=fullname)
200
201            if self.elem is None:
202                if '-' in name and '_' not in name:
203                    raise UnknownActivity(
204                        'Unknown activity "{name}" (did you mean "{attempt}"?)'.format(
205                            name=name, attempt=name.replace('-', '_')))
206
207                raise UnknownActivity(name)
208
209        self.name = name
210
211        self.classname = self.elem.parse_str(ELEM_CLASS)
212        if self.classname not in (CLASS_ALGORITHM, CLASS_REPORT):
213            raise ConfigError('<class> in activity {activity} is not one of: '
214                            'algorithm, report'.format(activity=self.name))
215
216        self.convention = CallingConvention(self.elem.parse_str(ELEM_CALLING_CONVENTION))
217
218    def __lt__(self, other):
219        return self.name < other.name
220
221    def __eq__(self, other):
222        return self.name == other.name
223
224    def __str__(self):
225        return 'Activity({name})'.format(name=self.name)
226
227    def __repr__(self):
228        return self.__str__()
229
230    def __hash__(self):
231        return hash(self.name)
232
233    @property
234    def description(self):
235        """Return the <description> of the activity."""
236        return self.elem.parse_str(ELEM_DESCRIPTION, None)
237
238    @property
239    def group(self):
240        """Return the <group> of the activity.
241
242        Should exist for reports only, used by the report viewer.
243        """
244        return self.elem.parse_str(ELEM_GROUP, None)
245
246    @property
247    def related(self):
248        """Like all <related><report> nodes."""
249        related_elem = self.elem.find(ELEM_RELATED)
250        if related_elem is None:
251            return []
252
253        return related_elem.parse_strs(ELEM_REPORT)
254
255    @property
256    def docstring(self):
257        """Pull the module level docstring from the code."""
258        # generate abstract syntax tree of the executable
259        # (assumes it is Python code)
260        tree = ast.parse(self.abs_executable.open().read())
261        # walk through the top level elements
262        # for elem in tree.body:
263            # look for strings
264            # if type(elem.value) == ast.Str:
265                # return the first one found as the docstring
266                # return elem.value.s
267
268        # no top level string objects found
269        return ast.get_docstring(tree)
270
271    @property
272    def enabled(self):
273        """Return the <enabled> node of the activity as a boolean."""
274        return self.elem.parse_bool(ELEM_ENABLED, True)
275
276    @property
277    def use_report_log(self):
278        """Return the <use-report-log> node of the activity as a boolean."""
279        return self.elem.parse_bool(ELEM_USE_REPORT_LOG, False)
280
281    @property
282    def url(self):
283        """Return the <url> element of the activity."""
284        return self.elem.parse_str(ELEM_URL, None, expand_settings=True)
285
286    @property
287    def browse_url(self):
288        """Return a URL which can browse an auto-generated description of the activity."""
289        from django.urls import reverse
290        return reverse('backend:activities/single',
291                       kwargs=dict(activityname=self.name))
292
293    @property
294    def browse_source_url(self):
295        """See the Activity XML source code in the browser."""
296        return make_url(self.elem.filename())
297
298    @property
299    def browse_executable_url(self):
300        """Return a url which can be used to browse to the activity source XML file."""
301        exe = self.abs_executable
302        if exe is None:
303            return None
304
305        return make_url(exe)
306
307    @property
308    def browse_template_url(self):
309        """Return a browseable URL to the report template."""
310        if self.template is not None:
311            return make_url(settings.REPORT_TEMPLATE_DIR.child(self.template))
312
313        return None
314
315    @property
316    def executable(self):
317        """Return the content of the <executable> element."""
318        exe = self.elem.parse_str(ELEM_EXECUTABLE, None, expand_settings=True)
319        if exe is None and self.template is not None:
320            # Allow the executable name to be ommited for report Activities
321            exe = 'cmd/report.py'
322
323        return Path(exe)
324
325    @property
326    def abs_executable(self):
327        """Return the absolute activity executable filename."""
328        exe = self.executable
329
330        # Check for absolute path
331        if exe.is_file():
332            return exe
333
334        # Check for relative to DU root
335        if getattr(settings, 'DU_DIR', None) is not None and settings.DU_DIR.child(exe).exists():
336            return settings.DU_DIR.joinpath(exe)
337
338        # Check for relative to project root
339        if settings.PROJECT_HOME_DIR.child(exe).exists():
340            return settings.PROJECT_HOME_DIR.joinpath(exe)
341
342        # Check for relative to core root
343        if settings.CORE_HOME_DIR.child(exe).exists():
344            return settings.CORE_HOME_DIR.joinpath(exe)
345
346        raise ConfigError('Algorithm {alg} for activity {act} not found in {dirs}'.format(
347            alg=exe,
348            act=self.name,
349            dirs=', '.join((str(settings.PROJECT_HOME_DIR), str(settings.CORE_HOME_DIR)))))
350
351    @property
352    def test(self):
353        """Return the content of the <test> element.
354
355        This gives the location of an automated test verifying this activity.
356        """
357        # I don't think this is used anywhere
358        res = self.elem.parse_str(ELEM_TEST, None)
359        if res is None:
360            return None
361
362        return settings.PROJECT_HOME_DIR.joinpath(res)
363
364    @property
365    def browse_test_url(self):
366        """Return the a source browseable link to the file pointed to by the <test>    element."""
367        if self.test is None:
368            return None
369
370        return make_url(self.test)
371
372    @property
373    @memoized2
374    def triggers(self):
375        """Return the trigger tables for this activity as a list.
376
377        Each element consists of a dictionary with keys
378        `type` (should be 'orbital')
379        and `tablename` (a string).
380
381        There is a weird bit of the algorithm that means if the activity has something like
382
383        <trigger>
384         <table>A</>
385         <table>B</>
386        </>
387
388        This gets yielded as:
389
390        ActivityTrigger(table='A'),
391        ActivityTrigger(table='B'),
392
393        I'm not sure if this is needed we should probably just block <trigger> elements
394        from containing multiple tables but the code allows it.
395        """
396        # avoid circular dependency
397        from chart.db.model.table import TableInfo
398
399        res = []
400        for trigger_elem in self.elem.findall(ELEM_TRIGGER):
401            if self.convention not in (CallingConvention.SID_TIMERANGE,
402                                       CallingConvention.SID_TIMERANGE_TABLE):
403                raise ConfigError('{name}: Actions with triggers must have calling '
404                                  'convention sid+timerange'.format(name=self.name))
405
406            trigger_types = trigger_elem.parse_str(ELEM_TYPE, None)
407            if trigger_types is None:
408                trigger_type = None
409
410            else:
411                trigger_type = ActivityTriggerType(trigger_types)
412
413            tables = trigger_elem.parse_str(ELEM_TABLE, None)
414            if tables is None:
415                table = None
416
417            else:
418                table = TableInfo(tables)
419
420            res.append(ActivityTrigger(
421                trigger_type=trigger_type,
422                table=table,
423                with_stats=trigger_elem.parse_bool(ELEM_WITH_STATS, False),
424                latency=trigger_elem.parse_timedelta(ELEM_LATENCY, None)))
425
426        return res
427
428    @property
429    def output_tables(self):
430        """List of tables written to by this activity."""
431        # avoid circular dependency
432        from chart.db.model.table import TableInfo
433        res = []
434
435        for t in self.elem.parse_strs(ELEM_OUTPUT_TABLE):
436            res.append(TableInfo(t))
437
438        return res
439
440    @property
441    def allow_multiple(self):
442        """Return true if this activity can process more than one job in a single invocation."""
443        return self.classname == CLASS_ALGORITHM
444
445    @property
446    @memoized2
447    def sids(self):
448        """Return a list of strings covering any <sid> elements. Wildcards not expanded."""
449        result = []
450        for sid_elem in self.elem.findall(ELEM_SID):
451            result.append(SID.from_string(sid_elem.text.strip(), wildcard=True))
452
453        return result
454
455    def match_sid(self, other):
456        """Test if this activity applies to this sid.
457
458        Some activity.xml files contain <sid> elements restricting triggers
459        to only certain sids.
460        Wildcards are allowed.
461        """
462        # if there were no clauses, the test passes
463        # return not has_clauses
464        count = 0
465        for sid in self.sids:
466            if sid.match(other):
467                return True
468
469            count += 1
470
471        # with no filters, everything matches
472        return count == 0
473
474    @property
475    def events(self):
476        """List of events raised by this activity."""
477        # circular dependancy
478        from chart.events.eventclass import EventClass
479        res = []
480        for e in self.elem.parse_strs(ELEM_EVENT):
481            res.append(EventClass(e))
482
483        return res
484
485    @property
486    def eventnames(self):
487        """List of events raised by this activity."""
488        return self.elem.parse_strs(ELEM_EVENT)
489
490    @property
491    def template(self):
492        """Name of template, used by xml_report type activities."""
493        tmpl = self.elem.parse_str(ELEM_TEMPLATE, None)
494        if tmpl is None:
495            return None
496
497        return Path(tmpl)
498
499    @property
500    def template_description(self):
501        """Return description field of the report template."""
502        if self.template is None:
503            return None
504
505        root = XMLElement(filename=settings.REPORT_TEMPLATE_DIR.child(self.template))
506        return root.parse_str(ELEM_DESCRIPTION, None)
507
508    def validate_call(self,
509                      sensing_start=None,
510                      sensing_stop=None,
511                      sid=None,
512                      orbit=None,
513                      tablename=None,
514                      filename=None):
515        """Check this job can be called with the supplied parameters."""
516        if sid == SID():
517            sid = None
518
519        missing = []
520        superflous = []
521        if self.convention in (CallingConvention.TIMERANGE,
522                               CallingConvention.SID_TIMERANGE,
523                               CallingConvention.SID_TIMERANGE_TABLE):
524            if sensing_start is None and orbit is None:
525                missing.append('sensing_start')
526
527            if sensing_stop is None and orbit is None:
528                missing.append('sensing_stop')
529
530        else:
531            if orbit is not None:
532                superflous.append('orbit')
533
534            if sensing_start is not None:
535                superflous.append('sensing_start')
536
537            if sensing_stop is not None:
538                superflous.append('sensing_stop')
539
540        if self.convention is CallingConvention.FILENAME:
541            if filename is None:
542                missing.append('filename')
543
544        elif filename is not None:
545            superflous.append('filename')
546
547        if self.convention in (CallingConvention.SID_TIMERANGE,
548                               CallingConvention.SID_TIMERANGE_TABLE):
549            if sid is None:
550                missing.append('sid')
551
552        # elif sid is not None:
553            # superflous.append('sid')
554
555        if self.convention is CallingConvention.SID_TIMERANGE_TABLE:
556            if tablename is None:
557                missing.append('tablename')
558
559        elif tablename is not None:
560            superflous.append('tablename')
561
562        errors = []
563        if len(missing) > 0:
564            errors.append('Missing attributes: {miss}'.format(miss=', '.join(missing)))
565
566        if len(superflous) > 0:
567            errors.append('Superflous attributes: {super}'.format(super=', '.join(superflous)))
568
569        if len(errors) == 0:
570            return
571
572        raise MissingAttribute('. '.join(errors))
573
574    @property
575    def priority(self):
576        """Read the priority for this activity."""
577        return self.elem.parse_float(ELEM_PRIORITY, DEFAULT_PRIORITY)
578
579    @property
580    def weight(self):
581        """Read the weighting for this activity."""
582        return self.elem.parse_float(ELEM_WEIGHT, DEFAULT_WEIGHT)
583
584    @property
585    def timeout(self):
586        """Read the timeout for this activity."""
587        return self.elem.parse_timedelta(ELEM_TIMEOUT, None)
588
589    # @memoized2 - maybe worth it
590    @property
591    def configuration(self):
592        """Read activity <configuration> options"""
593        config_elem = self.elem.find(ELEM_CONFIGURATION)
594        if config_elem is None:
595            return None
596
597        result = Configuration()
598        for param_elem in config_elem.findall(ELEM_PARAMETER):
599            name = param_elem.parse_str(ELEM_NAME)
600            # <description> is ignored and this return type is nice and simple
601            # for clients to use
602            datatype = param_elem.parse_str(ELEM_DATATYPE)
603            # really, really should use a clever function from chart.common.traits here
604            # I'm only doing this because it's the initial implementation and only SYNTHETICS.xml
605            # is using configuration for now.
606            # If the system can used for anything else then it needs to call traits.py functions
607            # here - do not extend this code for more datatypes
608            if datatype == 'path':
609                setattr(result, name, Path(param_elem.parse_str(ELEM_VALUE)))
610
611        return result
612
613@memoized
614def all_activities():
615    """Return a list of all Activity objects.
616
617    Can also be accessed as Activity.all()
618
619    >>> print len(Activity.all())  # doctest: +SKIP
620    ...
621    """
622    res = []
623    for activity_dir in settings.ACTIVITY_DIRS:
624        for filename in sorted(activity_dir.glob('*.xml')):
625            res.append(Activity(filename.stem))
626
627    return res
628
629
630Activity.all = all_activities  # (redefining builtin all) pylint: disable=W0622, W0612
631Activity.UnknownActivity = UnknownActivity  # (unused variable) pylint: disable=W0612
632
633
634@memoized
635def all_reports():
636    """Return all report Activities."""
637    return [a for a in all_activities() if a.classname == 'report']
638
639
640Activity.all_reports = all_reports
641
642
643@memoized
644def find_by_exe(exe):
645    """Given an executable name, find the Activity which refers to it."""
646    res = []
647    for a in Activity.all():
648        if a.executable == exe:
649            res.append(a)
650
651    return res
652
653
654Activity.find_by_exe = find_by_exe
655
656
657def display_activity(activity_name):
658    """Display all information on `activity_name`."""
659    from chart.common.prettyprint import Table
660    a = Activity(activity_name)
661    t = Table()
662    t.append(('Name', a.name))
663    t.append(('Description', a.description))
664    t.append(('URL', a.url))
665    t.append(('Enabled', a.enabled))
666    t.append(('Calling convention', a.convention.value))
667    t.append(('Docstring', a.docstring))
668    if len(a.sids) > 0:
669        t.append(('SID', ', '.join(str(s) for s in a.sids)))
670
671    else:
672        t.append(('SID', 'unrestricted'))
673
674    if a.template:
675        t.append(('Template description', a.template_description))
676
677    t.append(('Browse source url', a.browse_source_url))
678    t.append(('Browse exe url', a.browse_executable_url))
679    t.append(('Browse test url', a.browse_test_url))
680    t.append(('Browse template url', a.browse_template_url))
681
682    t.write()
683
684
685def list_activities():
686    """Show list of all activities as a nice table."""
687    from chart.common.prettyprint import Table
688    t = Table(headings=('Name', 'Calling', 'Description'))
689    for a in Activity.all():
690        t.append((a.name, a.convention.value, a.description))
691
692    t.write()
693
694
695def list_orbital_activities():
696    """Show the names of all activities with per-orbit triggering."""
697    for a in Activity.all():
698        has_orbital_trigger = False
699        for t in a.triggers:
700            if t['type'] == 'orbital':
701                has_orbital_trigger = True
702
703        if has_orbital_trigger:
704            print(a.name)
705
706
707def test_match(activity, sid):
708    """Check if `activity` is enabled for `sid`."""
709    print('Testing {a} against {s}...'.format(a=activity, s=sid))
710    print(activity.match_sid(sid))