1#!/usr/bin/env python3
2
3"""Utilities for handling EPS native format products."""
4
5import os
6import re
7import sys
8import mmap
9import logging
10from datetime import datetime, timedelta
11
12import numpy as np
13
14from chart.common.path import Path
15from chart.products.eps import isp
16from chart.common.util import nvl
17from chart.common.exceptions import BadFileError
18from chart.project import SID
19
20# if we see timestamps jumps forwards by THRESH,
21# filter out the record and all subsequenct records until the timestamps drop back
22# to within THRESH
23
24JUMP_FORWARD_THRESH = timedelta(days=100*365)
25# JUMP_FORWARD_THRESH = timedelta(seconds=1000000)
26
27
28
29def gentime_to_datetime(gentime):
30 """Convert the EPS time, either a 14 character string with optional 'Z'
31 prefix, accurate to seconds, or 17 character string with optional 'Z' prefix
32 accurate to milliseconds into a datetime.
33 """
34 try:
35 if len(gentime) == 14:
36 return datetime.strptime(gentime, '%Y%m%d%H%M%S')
37
38 elif len(gentime) == 15:
39 return datetime.strptime(gentime[:14], '%Y%m%d%H%M%S')
40
41 elif len(gentime) == 17:
42 return (datetime.strptime(gentime[:14], '%Y%m%d%H%M%S').
43 replace(microsecond=int(gentime[14:17]) * 1000))
44
45 elif len(gentime) == 18:
46 return (datetime.strptime(gentime[:14], '%Y%m%d%H%M%S').
47 replace(microsecond=int(gentime[14:17]) * 1000))
48
49 else:
50 raise ValueError('Cannot interpret time "{time}" (expected format: YYYYMMDDhhmmss[Z] '
51 'or YYYYMMDDhhmmssuuu[Z])'.format(time=gentime))
52
53 except ValueError:
54 raise ValueError('Cannot interpret time "{time}" (expected format: YYYYMMDDhhmmss[Z] or '
55 'YYYYMMDDhhmmssuuu[Z])'.format(time=gentime))
56
57
58def datetime_to_gentime(t):
59 """Convert datetime into the the EPS format (15 character string)."""
60 return t.strftime('%Y%m%d%H%M%SZ')
61
62
63def datetime_to_gentime_ms(t):
64 """Convert datetime into the the EPS long datetime format (18 character string).
65
66 >>> datetime_to_gentime_ms(datetime(2011, 7, 11, 12, 6, 12, 456000))
67 '20110711120612456Z'
68 """
69 return t.strftime('%Y%m%d%H%M%S') + '{0:03}Z'.format(t.microsecond // 1000)
70
71
72# 'cdstime' objects are numpy arrays containing a timestamp in the
73# for 2-byte days since 2001-01-01, 4-byte milliseconds of day.
74# This is the format used by the start and stop time fields of the
75# EPS GRH records.
76# Is it also (?) the same format used for CCSDS UTC timestamps
77# (which should not be used)
78
79cdstime_desc = np.dtype([('DAYS', '>u2'),
80 ('MILLISECONDS', '>u4')])
81
82
83def datetime_to_cdstime(dt):
84 """Convert a datetime object into a modified cdstime object
85 with a microseconds field added.
86
87 >>> datetime_to_cdstime(datetime(2010, 9, 4)) # doctest: +NORMALIZE_WHITESPACE
88 array([(3899, 0)], dtype=[('DAYS', '>u2'), ('MILLISECONDS', '>u4')])
89 """
90
91 diff = dt - isp.CCSDS_REFTIME
92 cdstime = np.zeros((1,), dtype=cdstime_desc)
93 cdstime['DAYS'] = diff.days
94 cdstime['MILLISECONDS'] = diff.seconds * 1000 + diff.microseconds // 1000
95 # cdstime['MICROSECONDS'] = diff.microseconds % 1000
96 return cdstime
97
98
99def cdstime_to_datetime(cdstime):
100 """Convert a cdstime object into a standard datetime."""
101 return isp.CCSDS_REFTIME + timedelta(days=int(cdstime['DAYS']),
102 milliseconds=int(cdstime['MILLISECONDS']))
103
104
105mphr_desc = np.dtype({
106 'PARENT_PRODUCT_NAME': ('S67', 32),
107 'PARENT_PRODUCT_NAME_1': ('S67', 100 + 32),
108 'PARENT_PRODUCT_NAME_2': ('S67', 200 + 32),
109 'PARENT_PRODUCT_NAME_3': ('S67', 300 + 32),
110 'PARENT_PRODUCT_NAME_4': ('S67', 400 + 32),
111 'INSTRUMENT_ID': ('S4', 500 + 32),
112 'INSTRUMENT_MODEL': ('S3', 537 + 32),
113 'PRODUCT_TYPE': ('S3', 573 + 32),
114 'PROCESSING_LEVEL': ('S2', 609 + 32),
115 'SPACECRAFT_ID': ('S3', 644 + 32),
116 'SENSING_START': ('S15', 680 + 32),
117 'SENSING_END': ('S15', 728 + 32),
118 'SENSING_START_THEORETICAL': ('S15', 776 + 32),
119 'SENSING_END_THEORETICAL': ('S15', 824 + 32),
120 'PROCESSING_CENTRE': ('S4', 872 + 32),
121 'PROCESSOR_MAJOR_VERSION': ('S5', 909 + 32),
122 'PROCESSOR_MINOR_VERSION': ('S5', 947 + 32),
123 'FORMAT_MAJOR_VERSION': ('S5', 985 + 32),
124 'FORMAT_MINOR_VERSION': ('S5', 1023 + 32),
125 'PROCESSING_TIME_START': ('S15', 1061 + 32),
126 'PROCESSING_TIME_END': ('S15', 1109 + 32),
127 'PROCESSING_MODE': ('S1', 1157 + 32),
128 'DISPOSITION_MODE': ('S1', 1191 + 32),
129 'RECEIVING_GROUND_STATION': ('S3', 1225 + 32),
130 'RECEIVE_TIME_START': ('S15', 1261 + 32),
131 'RECEIVE_TIME_END': ('S15', 1309 + 32),
132 'ORBIT_START': ('S5', 1357 + 32),
133 'ORBIT_END': ('S5', 1395 + 32),
134 'ACTUAL_PRODUCT_SIZE': ('S11', 1433 + 32),
135 'STATE_VECTOR_TIME': ('S18', 1477 + 32),
136 'SEMI_MAJOR_AXIS': ('S11', 1528 + 32),
137 'ECCENTRICITY': ('S11', 1572 + 32),
138 'INCLINATION': ('S11', 1616 + 32),
139 'PERIGEE_ARGUMENT': ('S11', 1660 + 32),
140 'RIGHT_ASCENSION': ('S11', 1704 + 32),
141 'MEAN_ANOMALY': ('S11', 1748 + 32),
142 'X_POSITION': ('S11', 1792 + 32),
143 'Y_POSITION': ('S11', 1836 + 32),
144 'Z_POSITION': ('S11', 1880 + 32),
145 'X_VELOCITY': ('S11', 1924 + 32),
146 'Y_VELOCITY': ('S11', 1968 + 32),
147 'Z_VELOCITY': ('S11', 2012 + 32),
148 'EARTH_SUN_DISTANCE_RATIO': ('S11', 2056 + 32),
149 'LOC_TOLERANCE_RADIAL': ('S11', 2100 + 32),
150 'LOC_TOLERANCE_CROSSTRACK': ('S11', 2144 + 32),
151 'LOC_TOLERANCE_ALONGTRACK': ('S11', 2188 + 32),
152 'YAW_ERROR': ('S11', 2232 + 32),
153 'ROLL_ERROR': ('S11', 2276 + 32),
154 'PITCH_ERROR': ('S11', 2320 + 32),
155 'SUBSAT_LATITUDE_START': ('S11', 2364 + 32),
156 'SUBSAT_LONGITUDE_START': ('S11', 2408 + 32),
157 'SUBSAT_LATITUDE_END': ('S11', 2452 + 32),
158 'SUBSAT_LONGITUDE_END': ('S11', 2496 + 32),
159 'LEAP_SECOND': ('S2', 2540 + 32),
160 'LEAP_SECOND_UTC': ('S15', 2575 + 32),
161 'TOTAL_RECORDS': ('S6', 2623 + 32),
162 'TOTAL_MPHR': ('S6', 2662 + 32),
163 'TOTAL_SPHR': ('S6', 2701 + 32),
164 'TOTAL_IPR': ('S6', 2740 + 32),
165 'TOTAL_GEADR': ('S6', 2779 + 32),
166 'TOTAL_GIADR': ('S6', 2818 + 32),
167 'TOTAL_VEADR': ('S6', 2857 + 32),
168 'TOTAL_VIADR': ('S6', 2896 + 32),
169 'TOTAL_MDR': ('S6', 2935 + 32),
170 'COUNT_DEG_INST_MDR': ('S6', 2974 + 32),
171 'COUNT_DEG_PROC_MDR': ('S6', 3013 + 32),
172 'COUNT_DEG_INST_MDR_BLOCKS': ('S6', 3052 + 32),
173 'COUNT_DEG_PROC_MDR_BLOCKS': ('S6', 3091 + 32),
174 'DURATION_OF_PRODUCT': ('S8', 3130 + 32),
175 'MILLISECONDS_OF_DATA_PRESENT': ('S8', 3171 + 32),
176 'MILLISECONDS_OF_DATA_MISSING': ('S8', 3212 + 32),
177 'SUBSETTED_PRODUCT': ('S1', 3253 + 32),
178 })
179
180
181mphr_types = {
182 'PARENT_PRODUCT_NAME': str,
183 'PARENT_PRODUCT_NAME_1': str,
184 'PARENT_PRODUCT_NAME_2': str,
185 'PARENT_PRODUCT_NAME_3': str,
186 'PARENT_PRODUCT_NAME_4': str,
187 'INSTRUMENT_ID': str,
188 'INSTRUMENT_MODEL': int,
189 'PRODUCT_TYPE': str,
190 'PROCESSING_LEVEL': str,
191 'SPACECRAFT_ID': str,
192 'SENSING_START': datetime,
193 'SENSING_END': datetime,
194 'SENSING_START_THEORETICAL': datetime,
195 'SENSING_END_THEORETICAL': datetime,
196 'PROCESSING_CENTRE': str,
197 'PROCESSOR_MAJOR_VERSION': int,
198 'PROCESSOR_MINOR_VERSION': int,
199 'FORMAT_MAJOR_VERSION': int,
200 'FORMAT_MINOR_VERSION': int,
201 'PROCESSING_TIME_START': datetime,
202 'PROCESSING_TIME_END': datetime,
203 'PROCESSING_MODE': str,
204 'DISPOSITION_MODE': str,
205 'RECEIVING_GROUND_STATION': str,
206 'RECEIVE_TIME_START': datetime,
207 'RECEIVE_TIME_END': datetime,
208 'ORBIT_START': int,
209 'ORBIT_END': int,
210 'ACTUAL_PRODUCT_SIZE': int,
211 'STATE_VECTOR_TIME': datetime,
212 'SEMI_MAJOR_AXIS': int,
213 'ECCENTRICITY': int,
214 'INCLINATION': int,
215 'PERIGEE_ARGUMENT': int,
216 'RIGHT_ASCENSION': int,
217 'MEAN_ANOMALY': int,
218 'X_POSITION': int,
219 'Y_POSITION': int,
220 'Z_POSITION': int,
221 'X_VELOCITY': int,
222 'Y_VELOCITY': int,
223 'Z_VELOCITY': int,
224 'EARTH_SUN_DISTANCE_RATIO': int,
225 'LOC_TOLERANCE_RADIAL': int,
226 'LOC_TOLERANCE_CROSSTRACK': int,
227 'LOC_TOLERANCE_ALONGTRACK': int,
228 'YAW_ERROR': int,
229 'ROLL_ERROR': int,
230 'PITCH_ERROR': int,
231 'SUBSAT_LATITUDE_START': int,
232 'SUBSAT_LONGITUDE_START': int,
233 'SUBSAT_LATITUDE_END': int,
234 'SUBSAT_LONGITUDE_END': int,
235 'LEAP_SECOND': int,
236 'LEAP_SECOND_UTC': str,
237 'TOTAL_RECORDS': int,
238 'TOTAL_MPHR': int,
239 'TOTAL_SPHR': int,
240 'TOTAL_IPR': int,
241 'TOTAL_GEADR': int,
242 'TOTAL_GIADR': int,
243 'TOTAL_VEADR': int,
244 'TOTAL_VIADR': int,
245 'TOTAL_MDR': int,
246 'COUNT_DEG_INST_MDR': int,
247 'COUNT_DEG_PROC_MDR': int,
248 'COUNT_DEG_INST_MDR_BLOCKS': int,
249 'COUNT_DEG_PROC_MDR_BLOCKS': int,
250 'DURATION_OF_PRODUCT': int,
251 'MILLISECONDS_OF_DATA_PRESENT': int,
252 'MILLISECONDS_OF_DATA_MISSING': int,
253 'SUBSETTED_PRODUCT': str,
254 }
255
256
257grh_desc = np.dtype([('RECORD_CLASS', np.uint8),
258 ('INSTRUMENT_GROUP', np.uint8),
259 ('RECORD_SUBCLASS', np.uint8),
260 ('RECORD_SUBCLASS_VERSION', np.uint8),
261 ('RECORD_SIZE', '>u4'),
262 ('RECORD_START_TIME', cdstime_desc),
263 ('RECORD_STOP_TIME', cdstime_desc)])
264
265
266mdr_classnames = {
267 8: 'MDR'}
268
269CLASS_MPHR = 1
270CLASS_MDR = 8
271
272INST_DMDR = 13
273
274
275class TextRecord:
276 """Representation of an EPS product text record (MPHR)."""
277
278 def __init__(self, np_desc, types, buff, offset):
279 self.types = types
280 self.grh = np.frombuffer(buff[offset:offset + 20], dtype=grh_desc, count=1)[0]
281 self.data = np.frombuffer(buff[offset + 20:offset + 3306], dtype=np_desc, count=1)[0]
282
283 def __getitem__(self, name):
284 if sys.version_info.major == 2:
285 data = self.data[name]
286
287 else:
288 data = self.data[name].decode()
289
290 datatype = self.types[name]
291 if datatype == str:
292 return data.strip()
293
294 elif datatype == int:
295 return int(data.strip())
296
297 elif datatype == datetime:
298 return gentime_to_datetime(data)
299
300
301class FakeMmap:
302 """This object pretends to be an mmap.
303 This allows files >2GB to be opened on AIX machines.
304 At runtime if the mmap() call fails we create a FakeMmap instead.
305 """
306
307 def __init__(self, filename):
308 self.handle = open(str(filename), 'w+b')
309
310 def get_slice(self, start, stop):
311 """Replicate the mmap get_slice function."""
312
313 # check for read relative to file end
314 if start < 0:
315 # do not use len(self) here as it can throw overflow errors
316 # on large files
317 self.handle.seek(0, os.SEEK_END)
318 size = self.handle.tell()
319 start = size - start
320
321 if stop < 0:
322 # do not use len(self) here as it can throw overflow errors
323 # on large files
324 self.handle.seek(0, os.SEEK_END)
325 size = self.handle.tell()
326 stop = size - stop
327
328 # try:
329 # MAX_INT32 = 2147483647
330 # if start <= MAX_INT32:
331 self.handle.seek(start)
332 # else:
333 # self.handle.seek(0)
334 # while start > MAX_INT32:
335 # self.handle.seek(MAX_INT32, os.SEEK_REL)
336 # start -= MAX_INT32
337
338 # self.handle.seek(start, os.SEEK_REL)
339 # except IOError:
340 # string invalid argument error
341 # self.handle.seek(0, os.SEEK_END)
342 # logging.error('seeking to ' + str(start) + ' in file of len ' +
343 # str(self.handle.tell()))
344 # raise
345
346 return self.handle.read(stop - start)
347
348 def __getitem__(self, slices):
349 if isinstance(slices, tuple):
350 return sum(self.get_slice(nvl(s.start, 0), s.stop) for s in slices)
351
352 elif isinstance(slices, slice):
353 return self.get_slice(nvl(slices.start, 0), slices.stop)
354
355 else:
356 raise Exception('bad parameter to __getitem__')
357
358 def __len__(self):
359 self.handle.seek(0, os.SEEK_END)
360 return self.handle.tell()
361
362
363class EPSReader:
364 """Read EPS L0 products and extract certain MPHR fields and CCSDS packets,
365 with optional APID filtering."""
366
367 class BadEPSFile(BadFileError):
368 """Exception, cannot open file as an EPS product."""
369
370 pass
371
372 def __init__(self,
373 obj,
374 level=None,
375 instrument=None,
376 ignore_truncation=False):
377 """Read MDRs from `obj`, checking product has `level` and 'instrument`."""
378 self.ignore_truncation = ignore_truncation
379
380 if isinstance(obj, Path):
381 obj = str(obj)
382
383 if isinstance(obj, str):
384 self.filename = obj
385 if os.stat(obj).st_size == 0:
386 raise BadFileError('Cannot open zero length file ' + obj)
387
388 fd = open(obj, 'rb')
389 try:
390 self.buf = mmap.mmap(fd.fileno(), 0, access=mmap.ACCESS_READ)
391 except mmap.error as e:
392 if e.errno == 12: # "Not enough space", mmap has failed due to large file size
393 logging.debug('Cannot mmap file, opening as regular file object')
394 self.buf = FakeMmap(obj)
395
396 elif isinstance(obj, file):
397 self.buf = mmap.mmap(obj.fileno(), 0, access=mmap.ACCESS_READ)
398 self.filename = None
399
400 elif str(type(obj)) == '<type \'mmap.mmap\'>': # ! really?
401 self.buf = obj
402 self.filename = None
403
404 else:
405 raise IOError('Could not read input object')
406
407 if len(self.buf) < 3307:
408 raise IOError('Input file is too small ({size}, min size is 3307 bytes)'.format(
409 size=len(self.buf)))
410
411 self.mphr_grh = np.frombuffer(self.buf[:3306], dtype=grh_desc, count=1)[0]
412 # self.mphr_grh = np.frombuffer(self.buf, dtype=grh_desc, count=1, offset=0)[0]
413 # self.mphr = np.frombuffer(self.buf, dtype=mphr_desc, count=1, offset=20)[0]
414 self.mphr = TextRecord(mphr_desc, mphr_types, self.buf, 0)
415
416 if instrument is not None and instrument != self.mphr['INSTRUMENT_ID']:
417 raise EPSReader.BadEPSFile('Require MPHR instrument {req}, found {act}'.format(
418 req=instrument,
419 act=self.mphr['INSTRUMENT_ID']))
420
421 if self.mphr.grh['RECORD_CLASS'] != 1 or self.mphr.grh['INSTRUMENT_GROUP'] != 0 or \
422 self.mphr.grh['RECORD_SUBCLASS'] != 0 or self.mphr.grh['RECORD_SIZE'] != 3307:
423 raise EPSReader.BadEPSFile("Input is not an EPS product")
424
425 if level is not None and level != self.mphr['PROCESSING_LEVEL']:
426 raise EPSReader.BadEPSFile('Product level is {act} not required level of {req}'.format(
427 act=self.mphr['PROCESSING_LEVEL'],
428 req=level))
429
430 def get_scid(self):
431 """Read SCID value from MPHR."""
432 return self.mphr['SPACECRAFT_ID']
433
434 scid = property(get_scid)
435
436 @property
437 def sid(self):
438 """Return a SID for this product."""
439 return SID(self.scid)
440
441 def get_instrument(self):
442 """Read instrument ID from MPHR."""
443 return self.mphr['INSTRUMENT_ID']
444
445 instrument = property(get_instrument)
446
447 def gen_ccsds(self, apid=None):
448 """Generator to extract CCSDS packets.
449 Returned values are tuples of (timestamp, payload) where `payload` is the complete CCSDS
450 packet.
451 `apid`, if specified, is a list of APID to be returned with other APIDs ignored.
452 """
453
454 offset = 3307
455 cc = 0
456 while offset < len(self.buf):
457 grh = np.frombuffer(self.buf[offset:offset + 20], dtype=grh_desc)[0]
458
459 if grh['RECORD_SIZE'] <= 20:
460 logging.warning('Found record with size ' + str(grh['RECORD_SIZE']))
461 return
462
463 if grh['RECORD_CLASS'] != CLASS_MDR:
464 offset += grh['RECORD_SIZE']
465 continue
466
467 if apid is not None:
468 pi = self.buf[offset + 26:offset + 28]
469 ccsds_apid = (ord(pi[0]) & 7) << 8 | ord(pi[1])
470 if ccsds_apid != apid:
471 offset += grh['RECORD_SIZE']
472 continue
473
474 if grh['INSTRUMENT_GROUP'] == INST_DMDR:
475 offset += grh['RECORD_SIZE']
476 continue
477
478 record_size = grh['RECORD_SIZE']
479 if len(self.buf) < offset + record_size:
480 logging.error('Truncated file, {cc} record were recovered but some may have been '
481 'lost'.format(cc=cc))
482 if not self.ignore_truncation:
483 raise BadFileError('Truncated product')
484
485 # we read this as a numpy array because later on SFWriter calls .sum() on it.
486 # ? change to allow fake mmap objects may cause a speed hit
487 # ccsds_payload = np.frombuffer(self.buf,
488 # dtype=np.uint8,
489 # offset=offset + 26,
490 # count=record_size - 26)
491 ccsds_payload = np.frombuffer(self.buf[offset + 26:offset + record_size],
492 dtype=np.uint8)
493 # count=record_size - 26)
494
495 yield cdstime_to_datetime(grh['RECORD_START_TIME']), ccsds_payload
496 offset += grh['RECORD_SIZE']
497
498 cc += 1
499
500
501def time_filter(gen):
502 """Filter to convert a generator returned by `EPSReader.gen_ccsds` or SFReader.gen_snacks.
503 The output results will always be time-ordered, and MDRs with timestamps going backwards in time
504 are removed. Some MHS 2008 and 2009 products and GRAS 2007 products have this problem.
505 """
506
507 prev_timestamp = None
508 for timestamp, payload in gen:
509 if prev_timestamp is not None:
510 if timestamp == prev_timestamp:
511 logging.debug('Found duplicated timestamp {t}'.format(t=timestamp))
512 continue
513
514 elif timestamp <= prev_timestamp:
515 logging.debug('Found timestamps going backwards from {prev} to {now}'.format(
516 prev=prev_timestamp, now=timestamp))
517 continue
518 # raise ValueError('Found timestamps going backwards from {prev} to {now}'.format(
519 # prev=prev_timestamp, now=timestamp))
520
521 elif timestamp > (prev_timestamp + JUMP_FORWARD_THRESH):
522 logging.debug('Timestamp has jumped forwards by {jump} (threshold {thresh})'.format(
523 jump=timestamp - prev_timestamp,
524 thresh=JUMP_FORWARD_THRESH))
525 continue
526
527 yield timestamp, payload
528 prev_timestamp = timestamp
529
530
531class EPSFilename:
532 """Decode an EPS standard filename into SCID, sensing start and sensing stop."""
533
534 # don't move to eps.py because that imports numpy
535
536 class NotEPSFilename(Exception):
537 """Attempt for open a file which is not an SF(00) file."""
538
539 def __str__(self):
540 return 'Not an EPS filename'
541
542 matcher = re.compile(
543 r'^(?P<dtype>...._..._..)_(?P<scid>...)_(?P<start>[0-9]{14})Z_(?P<stop>[0-9]{14})Z')
544
545 def __init__(self, filename):
546 match = EPSFilename.matcher.match(filename.name)
547 if match is None:
548 raise EPSFilename.NotEPSFilename
549
550 self.dtype = match.group('dtype')
551 self.sid = SID(match.group('scid'))
552 self.sensing_start = gentime_to_datetime(match.group('start'))
553 self.sensing_stop = gentime_to_datetime(match.group('stop'))