1#!/usr/bin/env python3
  2
  3"""Utilities for handling EPS native format products."""
  4
  5import os
  6import re
  7import sys
  8import mmap
  9import logging
 10from datetime import datetime, timedelta
 11
 12import numpy as np
 13
 14from chart.common.path import Path
 15from chart.products.eps import isp
 16from chart.common.util import nvl
 17from chart.common.exceptions import BadFileError
 18from chart.project import SID
 19
 20# if we see timestamps jumps forwards by THRESH,
 21# filter out the record and all subsequenct records until the timestamps drop back
 22# to within THRESH
 23
 24JUMP_FORWARD_THRESH = timedelta(days=100*365)
 25# JUMP_FORWARD_THRESH = timedelta(seconds=1000000)
 26
 27
 28
 29def gentime_to_datetime(gentime):
 30    """Convert the EPS time, either a 14 character string with optional 'Z'
 31    prefix, accurate to seconds, or 17 character string with optional 'Z' prefix
 32    accurate to milliseconds into a datetime.
 33    """
 34    try:
 35        if len(gentime) == 14:
 36            return datetime.strptime(gentime, '%Y%m%d%H%M%S')
 37
 38        elif len(gentime) == 15:
 39            return datetime.strptime(gentime[:14], '%Y%m%d%H%M%S')
 40
 41        elif len(gentime) == 17:
 42            return (datetime.strptime(gentime[:14], '%Y%m%d%H%M%S').
 43                    replace(microsecond=int(gentime[14:17]) * 1000))
 44
 45        elif len(gentime) == 18:
 46            return (datetime.strptime(gentime[:14], '%Y%m%d%H%M%S').
 47                    replace(microsecond=int(gentime[14:17]) * 1000))
 48
 49        else:
 50            raise ValueError('Cannot interpret time "{time}" (expected format: YYYYMMDDhhmmss[Z] '
 51                             'or YYYYMMDDhhmmssuuu[Z])'.format(time=gentime))
 52
 53    except ValueError:
 54        raise ValueError('Cannot interpret time "{time}" (expected format: YYYYMMDDhhmmss[Z] or '
 55                         'YYYYMMDDhhmmssuuu[Z])'.format(time=gentime))
 56
 57
 58def datetime_to_gentime(t):
 59    """Convert datetime into the the EPS format (15 character string)."""
 60    return t.strftime('%Y%m%d%H%M%SZ')
 61
 62
 63def datetime_to_gentime_ms(t):
 64    """Convert datetime into the the EPS long datetime format (18 character string).
 65
 66    >>> datetime_to_gentime_ms(datetime(2011, 7, 11, 12, 6, 12, 456000))
 67    '20110711120612456Z'
 68    """
 69    return t.strftime('%Y%m%d%H%M%S') + '{0:03}Z'.format(t.microsecond // 1000)
 70
 71
 72# 'cdstime' objects are numpy arrays containing a timestamp in the
 73# for 2-byte days since 2001-01-01, 4-byte milliseconds of day.
 74# This is the format used by the start and stop time fields of the
 75# EPS GRH records.
 76# Is it also (?) the same format used for CCSDS UTC timestamps
 77# (which should not be used)
 78
 79cdstime_desc = np.dtype([('DAYS', '>u2'),
 80                         ('MILLISECONDS', '>u4')])
 81
 82
 83def datetime_to_cdstime(dt):
 84    """Convert a datetime object into a modified cdstime object
 85    with a microseconds field added.
 86
 87    >>> datetime_to_cdstime(datetime(2010, 9, 4)) # doctest: +NORMALIZE_WHITESPACE
 88    array([(3899, 0)], dtype=[('DAYS', '>u2'), ('MILLISECONDS', '>u4')])
 89    """
 90
 91    diff = dt - isp.CCSDS_REFTIME
 92    cdstime = np.zeros((1,), dtype=cdstime_desc)
 93    cdstime['DAYS'] = diff.days
 94    cdstime['MILLISECONDS'] = diff.seconds * 1000 + diff.microseconds // 1000
 95    # cdstime['MICROSECONDS'] = diff.microseconds % 1000
 96    return cdstime
 97
 98
 99def cdstime_to_datetime(cdstime):
100    """Convert a cdstime object into a standard datetime."""
101    return isp.CCSDS_REFTIME + timedelta(days=int(cdstime['DAYS']),
102                                   milliseconds=int(cdstime['MILLISECONDS']))
103
104
105mphr_desc = np.dtype({
106        'PARENT_PRODUCT_NAME': ('S67', 32),
107        'PARENT_PRODUCT_NAME_1': ('S67', 100 + 32),
108        'PARENT_PRODUCT_NAME_2': ('S67', 200 + 32),
109        'PARENT_PRODUCT_NAME_3': ('S67', 300 + 32),
110        'PARENT_PRODUCT_NAME_4': ('S67', 400 + 32),
111        'INSTRUMENT_ID': ('S4', 500 + 32),
112        'INSTRUMENT_MODEL': ('S3', 537 + 32),
113        'PRODUCT_TYPE': ('S3', 573 + 32),
114        'PROCESSING_LEVEL': ('S2', 609 + 32),
115        'SPACECRAFT_ID': ('S3', 644 + 32),
116        'SENSING_START': ('S15', 680 + 32),
117        'SENSING_END': ('S15', 728 + 32),
118        'SENSING_START_THEORETICAL': ('S15', 776 + 32),
119        'SENSING_END_THEORETICAL': ('S15', 824 + 32),
120        'PROCESSING_CENTRE': ('S4', 872 + 32),
121        'PROCESSOR_MAJOR_VERSION': ('S5', 909 + 32),
122        'PROCESSOR_MINOR_VERSION': ('S5', 947 + 32),
123        'FORMAT_MAJOR_VERSION': ('S5', 985 + 32),
124        'FORMAT_MINOR_VERSION': ('S5', 1023 + 32),
125        'PROCESSING_TIME_START': ('S15', 1061 + 32),
126        'PROCESSING_TIME_END': ('S15', 1109 + 32),
127        'PROCESSING_MODE': ('S1', 1157 + 32),
128        'DISPOSITION_MODE': ('S1', 1191 + 32),
129        'RECEIVING_GROUND_STATION': ('S3', 1225 + 32),
130        'RECEIVE_TIME_START': ('S15', 1261 + 32),
131        'RECEIVE_TIME_END': ('S15', 1309 + 32),
132        'ORBIT_START': ('S5', 1357 + 32),
133        'ORBIT_END': ('S5', 1395 + 32),
134        'ACTUAL_PRODUCT_SIZE': ('S11', 1433 + 32),
135        'STATE_VECTOR_TIME': ('S18', 1477 + 32),
136        'SEMI_MAJOR_AXIS': ('S11', 1528 + 32),
137        'ECCENTRICITY': ('S11', 1572 + 32),
138        'INCLINATION': ('S11', 1616 + 32),
139        'PERIGEE_ARGUMENT': ('S11', 1660 + 32),
140        'RIGHT_ASCENSION': ('S11', 1704 + 32),
141        'MEAN_ANOMALY': ('S11', 1748 + 32),
142        'X_POSITION': ('S11', 1792 + 32),
143        'Y_POSITION': ('S11', 1836 + 32),
144        'Z_POSITION': ('S11', 1880 + 32),
145        'X_VELOCITY': ('S11', 1924 + 32),
146        'Y_VELOCITY': ('S11', 1968 + 32),
147        'Z_VELOCITY': ('S11', 2012 + 32),
148        'EARTH_SUN_DISTANCE_RATIO': ('S11', 2056 + 32),
149        'LOC_TOLERANCE_RADIAL': ('S11', 2100 + 32),
150        'LOC_TOLERANCE_CROSSTRACK': ('S11', 2144 + 32),
151        'LOC_TOLERANCE_ALONGTRACK': ('S11', 2188 + 32),
152        'YAW_ERROR': ('S11', 2232 + 32),
153        'ROLL_ERROR': ('S11', 2276 + 32),
154        'PITCH_ERROR': ('S11', 2320 + 32),
155        'SUBSAT_LATITUDE_START': ('S11', 2364 + 32),
156        'SUBSAT_LONGITUDE_START': ('S11', 2408 + 32),
157        'SUBSAT_LATITUDE_END': ('S11', 2452 + 32),
158        'SUBSAT_LONGITUDE_END': ('S11', 2496 + 32),
159        'LEAP_SECOND': ('S2', 2540 + 32),
160        'LEAP_SECOND_UTC': ('S15', 2575 + 32),
161        'TOTAL_RECORDS': ('S6', 2623 + 32),
162        'TOTAL_MPHR': ('S6', 2662 + 32),
163        'TOTAL_SPHR': ('S6', 2701 + 32),
164        'TOTAL_IPR': ('S6', 2740 + 32),
165        'TOTAL_GEADR': ('S6', 2779 + 32),
166        'TOTAL_GIADR': ('S6', 2818 + 32),
167        'TOTAL_VEADR': ('S6', 2857 + 32),
168        'TOTAL_VIADR': ('S6', 2896 + 32),
169        'TOTAL_MDR': ('S6', 2935 + 32),
170        'COUNT_DEG_INST_MDR': ('S6', 2974 + 32),
171        'COUNT_DEG_PROC_MDR': ('S6', 3013 + 32),
172        'COUNT_DEG_INST_MDR_BLOCKS': ('S6', 3052 + 32),
173        'COUNT_DEG_PROC_MDR_BLOCKS': ('S6', 3091 + 32),
174        'DURATION_OF_PRODUCT': ('S8', 3130 + 32),
175        'MILLISECONDS_OF_DATA_PRESENT': ('S8', 3171 + 32),
176        'MILLISECONDS_OF_DATA_MISSING': ('S8', 3212 + 32),
177        'SUBSETTED_PRODUCT': ('S1', 3253 + 32),
178        })
179
180
181mphr_types = {
182    'PARENT_PRODUCT_NAME': str,
183    'PARENT_PRODUCT_NAME_1': str,
184    'PARENT_PRODUCT_NAME_2': str,
185    'PARENT_PRODUCT_NAME_3': str,
186    'PARENT_PRODUCT_NAME_4': str,
187    'INSTRUMENT_ID': str,
188    'INSTRUMENT_MODEL': int,
189    'PRODUCT_TYPE': str,
190    'PROCESSING_LEVEL': str,
191    'SPACECRAFT_ID': str,
192    'SENSING_START': datetime,
193    'SENSING_END': datetime,
194    'SENSING_START_THEORETICAL': datetime,
195    'SENSING_END_THEORETICAL': datetime,
196    'PROCESSING_CENTRE': str,
197    'PROCESSOR_MAJOR_VERSION': int,
198    'PROCESSOR_MINOR_VERSION': int,
199    'FORMAT_MAJOR_VERSION': int,
200    'FORMAT_MINOR_VERSION': int,
201    'PROCESSING_TIME_START': datetime,
202    'PROCESSING_TIME_END': datetime,
203    'PROCESSING_MODE': str,
204    'DISPOSITION_MODE': str,
205    'RECEIVING_GROUND_STATION': str,
206    'RECEIVE_TIME_START': datetime,
207    'RECEIVE_TIME_END': datetime,
208    'ORBIT_START': int,
209    'ORBIT_END': int,
210    'ACTUAL_PRODUCT_SIZE': int,
211    'STATE_VECTOR_TIME': datetime,
212    'SEMI_MAJOR_AXIS': int,
213    'ECCENTRICITY': int,
214    'INCLINATION': int,
215    'PERIGEE_ARGUMENT': int,
216    'RIGHT_ASCENSION': int,
217    'MEAN_ANOMALY': int,
218    'X_POSITION': int,
219    'Y_POSITION': int,
220    'Z_POSITION': int,
221    'X_VELOCITY': int,
222    'Y_VELOCITY': int,
223    'Z_VELOCITY': int,
224    'EARTH_SUN_DISTANCE_RATIO': int,
225    'LOC_TOLERANCE_RADIAL': int,
226    'LOC_TOLERANCE_CROSSTRACK': int,
227    'LOC_TOLERANCE_ALONGTRACK': int,
228    'YAW_ERROR': int,
229    'ROLL_ERROR': int,
230    'PITCH_ERROR': int,
231    'SUBSAT_LATITUDE_START': int,
232    'SUBSAT_LONGITUDE_START': int,
233    'SUBSAT_LATITUDE_END': int,
234    'SUBSAT_LONGITUDE_END': int,
235    'LEAP_SECOND': int,
236    'LEAP_SECOND_UTC': str,
237    'TOTAL_RECORDS': int,
238    'TOTAL_MPHR': int,
239    'TOTAL_SPHR': int,
240    'TOTAL_IPR': int,
241    'TOTAL_GEADR': int,
242    'TOTAL_GIADR': int,
243    'TOTAL_VEADR': int,
244    'TOTAL_VIADR': int,
245    'TOTAL_MDR': int,
246    'COUNT_DEG_INST_MDR': int,
247    'COUNT_DEG_PROC_MDR': int,
248    'COUNT_DEG_INST_MDR_BLOCKS': int,
249    'COUNT_DEG_PROC_MDR_BLOCKS': int,
250    'DURATION_OF_PRODUCT': int,
251    'MILLISECONDS_OF_DATA_PRESENT': int,
252    'MILLISECONDS_OF_DATA_MISSING': int,
253    'SUBSETTED_PRODUCT': str,
254    }
255
256
257grh_desc = np.dtype([('RECORD_CLASS', np.uint8),
258                      ('INSTRUMENT_GROUP', np.uint8),
259                      ('RECORD_SUBCLASS', np.uint8),
260                      ('RECORD_SUBCLASS_VERSION', np.uint8),
261                      ('RECORD_SIZE', '>u4'),
262                      ('RECORD_START_TIME', cdstime_desc),
263                      ('RECORD_STOP_TIME', cdstime_desc)])
264
265
266mdr_classnames = {
267    8: 'MDR'}
268
269CLASS_MPHR = 1
270CLASS_MDR = 8
271
272INST_DMDR = 13
273
274
275class TextRecord:
276    """Representation of an EPS product text record (MPHR)."""
277
278    def __init__(self, np_desc, types, buff, offset):
279        self.types = types
280        self.grh = np.frombuffer(buff[offset:offset + 20], dtype=grh_desc, count=1)[0]
281        self.data = np.frombuffer(buff[offset + 20:offset + 3306], dtype=np_desc, count=1)[0]
282
283    def __getitem__(self, name):
284        if sys.version_info.major == 2:
285            data = self.data[name]
286
287        else:
288            data = self.data[name].decode()
289
290        datatype = self.types[name]
291        if datatype == str:
292            return data.strip()
293
294        elif datatype == int:
295            return int(data.strip())
296
297        elif datatype == datetime:
298            return gentime_to_datetime(data)
299
300
301class FakeMmap:
302    """This object pretends to be an mmap.
303    This allows files >2GB to be opened on AIX machines.
304    At runtime if the mmap() call fails we create a FakeMmap instead.
305    """
306
307    def __init__(self, filename):
308        self.handle = open(str(filename), 'w+b')
309
310    def get_slice(self, start, stop):
311        """Replicate the mmap get_slice function."""
312
313        # check for read relative to file end
314        if start < 0:
315            # do not use len(self) here as it can throw overflow errors
316            # on large files
317            self.handle.seek(0, os.SEEK_END)
318            size = self.handle.tell()
319            start = size - start
320
321        if stop < 0:
322            # do not use len(self) here as it can throw overflow errors
323            # on large files
324            self.handle.seek(0, os.SEEK_END)
325            size = self.handle.tell()
326            stop = size - stop
327
328        # try:
329        # MAX_INT32 = 2147483647
330        # if start <= MAX_INT32:
331        self.handle.seek(start)
332        # else:
333            # self.handle.seek(0)
334            # while start > MAX_INT32:
335                # self.handle.seek(MAX_INT32, os.SEEK_REL)
336                # start -= MAX_INT32
337
338            # self.handle.seek(start, os.SEEK_REL)
339        # except IOError:
340            # string invalid argument error
341            # self.handle.seek(0, os.SEEK_END)
342            # logging.error('seeking to ' + str(start) + ' in file of len ' +
343            # str(self.handle.tell()))
344            # raise
345
346        return self.handle.read(stop - start)
347
348    def __getitem__(self, slices):
349        if isinstance(slices, tuple):
350            return sum(self.get_slice(nvl(s.start, 0), s.stop) for s in slices)
351
352        elif isinstance(slices, slice):
353            return self.get_slice(nvl(slices.start, 0), slices.stop)
354
355        else:
356            raise Exception('bad parameter to __getitem__')
357
358    def __len__(self):
359        self.handle.seek(0, os.SEEK_END)
360        return self.handle.tell()
361
362
363class EPSReader:
364    """Read EPS L0 products and extract certain MPHR fields and CCSDS packets,
365    with optional APID filtering."""
366
367    class BadEPSFile(BadFileError):
368        """Exception, cannot open file as an EPS product."""
369
370        pass
371
372    def __init__(self,
373                 obj,
374                 level=None,
375                 instrument=None,
376                 ignore_truncation=False):
377        """Read MDRs from `obj`, checking product has `level` and 'instrument`."""
378        self.ignore_truncation = ignore_truncation
379
380        if isinstance(obj, Path):
381            obj = str(obj)
382
383        if isinstance(obj, str):
384            self.filename = obj
385            if os.stat(obj).st_size == 0:
386                raise BadFileError('Cannot open zero length file ' + obj)
387
388            fd = open(obj, 'rb')
389            try:
390                self.buf = mmap.mmap(fd.fileno(), 0, access=mmap.ACCESS_READ)
391            except mmap.error as e:
392                if e.errno == 12:  # "Not enough space", mmap has failed due to large file size
393                    logging.debug('Cannot mmap file, opening as regular file object')
394                    self.buf = FakeMmap(obj)
395
396        elif isinstance(obj, file):
397            self.buf = mmap.mmap(obj.fileno(), 0, access=mmap.ACCESS_READ)
398            self.filename = None
399
400        elif str(type(obj)) == '<type \'mmap.mmap\'>':  # ! really?
401            self.buf = obj
402            self.filename = None
403
404        else:
405            raise IOError('Could not read input object')
406
407        if len(self.buf) < 3307:
408            raise IOError('Input file is too small ({size}, min size is 3307 bytes)'.format(
409                    size=len(self.buf)))
410
411        self.mphr_grh = np.frombuffer(self.buf[:3306], dtype=grh_desc, count=1)[0]
412        # self.mphr_grh = np.frombuffer(self.buf, dtype=grh_desc, count=1, offset=0)[0]
413        # self.mphr = np.frombuffer(self.buf, dtype=mphr_desc, count=1, offset=20)[0]
414        self.mphr = TextRecord(mphr_desc, mphr_types, self.buf, 0)
415
416        if instrument is not None and instrument != self.mphr['INSTRUMENT_ID']:
417            raise EPSReader.BadEPSFile('Require MPHR instrument {req}, found {act}'.format(
418                    req=instrument,
419                    act=self.mphr['INSTRUMENT_ID']))
420
421        if self.mphr.grh['RECORD_CLASS'] != 1 or self.mphr.grh['INSTRUMENT_GROUP'] != 0 or \
422                self.mphr.grh['RECORD_SUBCLASS'] != 0 or self.mphr.grh['RECORD_SIZE'] != 3307:
423            raise EPSReader.BadEPSFile("Input is not an EPS product")
424
425        if level is not None and level != self.mphr['PROCESSING_LEVEL']:
426            raise EPSReader.BadEPSFile('Product level is {act} not required level of {req}'.format(
427                    act=self.mphr['PROCESSING_LEVEL'],
428                    req=level))
429
430    def get_scid(self):
431        """Read SCID value from MPHR."""
432        return self.mphr['SPACECRAFT_ID']
433
434    scid = property(get_scid)
435
436    @property
437    def sid(self):
438        """Return a SID for this product."""
439        return SID(self.scid)
440
441    def get_instrument(self):
442        """Read instrument ID from MPHR."""
443        return self.mphr['INSTRUMENT_ID']
444
445    instrument = property(get_instrument)
446
447    def gen_ccsds(self, apid=None):
448        """Generator to extract CCSDS packets.
449        Returned values are tuples of (timestamp, payload) where `payload` is the complete CCSDS
450        packet.
451        `apid`, if specified, is a list of APID to be returned with other APIDs ignored.
452        """
453
454        offset = 3307
455        cc = 0
456        while offset < len(self.buf):
457            grh = np.frombuffer(self.buf[offset:offset + 20], dtype=grh_desc)[0]
458
459            if grh['RECORD_SIZE'] <= 20:
460                logging.warning('Found record with size ' + str(grh['RECORD_SIZE']))
461                return
462
463            if grh['RECORD_CLASS'] != CLASS_MDR:
464                offset += grh['RECORD_SIZE']
465                continue
466
467            if apid is not None:
468                pi = self.buf[offset + 26:offset + 28]
469                ccsds_apid = (ord(pi[0]) & 7) << 8 | ord(pi[1])
470                if ccsds_apid != apid:
471                    offset += grh['RECORD_SIZE']
472                    continue
473
474            if grh['INSTRUMENT_GROUP'] == INST_DMDR:
475                offset += grh['RECORD_SIZE']
476                continue
477
478            record_size = grh['RECORD_SIZE']
479            if len(self.buf) < offset + record_size:
480                logging.error('Truncated file, {cc} record were recovered but some may have been '
481                              'lost'.format(cc=cc))
482                if not self.ignore_truncation:
483                    raise BadFileError('Truncated product')
484
485            # we read this as a numpy array because later on SFWriter calls .sum() on it.
486            # ? change to allow fake mmap objects may cause a speed hit
487            # ccsds_payload = np.frombuffer(self.buf,
488                                          # dtype=np.uint8,
489                                          # offset=offset + 26,
490                                          # count=record_size - 26)
491            ccsds_payload = np.frombuffer(self.buf[offset + 26:offset + record_size],
492                                          dtype=np.uint8)
493                                          # count=record_size - 26)
494
495            yield cdstime_to_datetime(grh['RECORD_START_TIME']), ccsds_payload
496            offset += grh['RECORD_SIZE']
497
498            cc += 1
499
500
501def time_filter(gen):
502    """Filter to convert a generator returned by `EPSReader.gen_ccsds` or SFReader.gen_snacks.
503    The output results will always be time-ordered, and MDRs with timestamps going backwards in time
504    are removed. Some MHS 2008 and 2009 products and GRAS 2007 products have this problem.
505    """
506
507    prev_timestamp = None
508    for timestamp, payload in gen:
509        if prev_timestamp is not None:
510            if timestamp == prev_timestamp:
511                logging.debug('Found duplicated timestamp {t}'.format(t=timestamp))
512                continue
513
514            elif timestamp <= prev_timestamp:
515                logging.debug('Found timestamps going backwards from {prev} to {now}'.format(
516                        prev=prev_timestamp, now=timestamp))
517                continue
518                # raise ValueError('Found timestamps going backwards from {prev} to {now}'.format(
519                        # prev=prev_timestamp, now=timestamp))
520
521            elif timestamp > (prev_timestamp + JUMP_FORWARD_THRESH):
522                logging.debug('Timestamp has jumped forwards by {jump} (threshold {thresh})'.format(
523                        jump=timestamp - prev_timestamp,
524                        thresh=JUMP_FORWARD_THRESH))
525                continue
526
527        yield timestamp, payload
528        prev_timestamp = timestamp
529
530
531class EPSFilename:
532    """Decode an EPS standard filename into SCID, sensing start and sensing stop."""
533
534    # don't move to eps.py because that imports numpy
535
536    class NotEPSFilename(Exception):
537        """Attempt for open a file which is not an SF(00) file."""
538
539        def __str__(self):
540            return 'Not an EPS filename'
541
542    matcher = re.compile(
543        r'^(?P<dtype>...._..._..)_(?P<scid>...)_(?P<start>[0-9]{14})Z_(?P<stop>[0-9]{14})Z')
544
545    def __init__(self, filename):
546        match = EPSFilename.matcher.match(filename.name)
547        if match is None:
548            raise EPSFilename.NotEPSFilename
549
550        self.dtype = match.group('dtype')
551        self.sid = SID(match.group('scid'))
552        self.sensing_start = gentime_to_datetime(match.group('start'))
553        self.sensing_stop = gentime_to_datetime(match.group('stop'))