1#!/usr/bin/env python3
2
3"""CHART website 'ingestion' page.
4Displays a summary of recent file ingestions into the DB.
5"""
6
7from io import StringIO
8from datetime import datetime, timedelta
9
10from django.shortcuts import render
11
12from chart.db.connection import db_connect
13from chart.project import settings
14from chart.common.util import timedelta_div
15from chart.common.prettyprint import Table
16from chart.common.prettyprint import show_timedelta_s
17
18db_conn = db_connect('JOBS')
19
20
21def index(request): # (unused argument) pylint: disable=W0613
22 """Show summary of recent ingestion activity."""
23 from chart.products import eps # to speed up webserver startup
24
25 max_gap = timedelta(seconds=60) # only report a gap if the delta from the sensing_stop
26 # of the last PDU to the sensing_start of the next is greater than max_gap
27
28 res = StringIO.StringIO()
29 #start_time = user_time_to_datetime(request.GET['start'])
30 start_time = datetime.utcnow() - timedelta(days=1)
31 #stop_time = user_time_to_datetime(request.GET['stop'])
32 stop_time = datetime.utcnow()
33
34 t = Table(headings=('Datatype',
35 'PDUs',
36 'Max latency',
37 'Avg latency',
38 'Gaps'))
39
40 dtypes = set(row[0] for row in db_conn.query(
41 "SELECT DISTINCT substr(filename,1,15) "
42 "FROM jobs "
43 "WHERE category=:category "
44 "AND gen_time BETWEEN :start_time AND :stop_time "
45 "AND activity='PDU_INGESTER'",
46 category=settings.DEFAULT_CATEGORY,
47 start_time=start_time,
48 stop_time=stop_time)) | set(
49 ('AVHR_xxx_00_M02',
50 'HKTM_xxx_00_M02',
51 'HIRS_xxx_00_M02',
52 'MHSx_xxx_00_M02',
53 'NOAA_GAC_00_N19',
54 'AMSA_xxx_00_M02',
55 'SEMx_xxx_00_M02',
56 'GOME_xxx_00_M02',
57 'GRAS_xxx_00_M02'))
58
59 for dtype in sorted(dtypes):
60 pdus = db_conn.query(
61 "SELECT jobs.filename,processes.execute_stop "
62 "FROM jobs "
63 "LEFT OUTER JOIN processes ON jobs.process_id=processes.id "
64 "WHERE category=:category "
65 "AND jobs.status='COMPLETED' "
66 "AND jobs.gen_time BETWEEN :start_time AND :stop_time "
67 "AND substr(jobs.filename,1,15)=:dtype "
68 "ORDER BY jobs.filename",
69 category=settings.DEFAULT_CATEGORY,
70 dtype=dtype,
71 start_time=start_time,
72 stop_time=stop_time).fetchall()
73
74 pdu_count = len(pdus)
75 if pdu_count == 0:
76 t.append((dtype, 0, '', '', ''))
77 continue
78
79 max_latency = None
80 total_latency = timedelta()
81 gaps = []
82 last_stop_time = start_time
83 max_pdu_duration = timedelta(seconds=200) # any file with longer duration than this
84 # is assumed to not be a PDU and excluded from the gaps report
85 for filename, ingestion_time in pdus:
86 sensing_start = eps.gentime_to_datetime(filename[16:30])
87 sensing_stop = eps.gentime_to_datetime(filename[32:46])
88 if (sensing_stop - sensing_start) > max_pdu_duration:
89 continue
90
91 latency = ingestion_time - sensing_start
92 max_latency = max(max_latency, latency) if max_latency is not None else latency
93 total_latency += latency
94 if (sensing_start - last_stop_time) > max_gap:
95 gaps.append((last_stop_time, sensing_start))
96
97 last_stop_time = sensing_stop
98
99 # check for final gaps
100 if last_stop_time < stop_time and \
101 (stop_time - last_stop_time) > max_gap:
102 gaps.append((last_stop_time, stop_time))
103
104 avg_latency = total_latency / pdu_count
105 gap_str = []
106 for gap in gaps:
107 gap_str.append('{num_pdus} PDUs gap: {start} - {stop}'.format(
108 num_pdus=int(timedelta_div(gap[1] - gap[0], timedelta(seconds=180)) + 0.5),
109 start=gap[0],
110 stop=gap[1]))
111
112 t.append((dtype,
113 pdu_count,
114 show_timedelta_s(max_latency),
115 show_timedelta_s(avg_latency),
116 '<ul>' + ''.join('<li>' + g + '</li>' for g in gap_str) + '</ul>'))
117
118 t.write_html(res)
119
120 return render(request,
121 'system/ingestion.html',
122 {'table': res.getvalue()})