def generate_missingdays_select(tabname: str = "readings") -> Tuple[str, list]: fields = ["dwdts"] fields.extend(get_data_fields(tabname)) sep = ", \n" cs = [ f"y.days - case when r.{f} is not null then r.{f} else 0 end as {f}" for f in fields ] ss = [ f"sum(case when {f} is not null then 1 else 0 end) as {f}" for f in fields ] sql = "select " + f"""y.year, y.days, {sep.join(cs)} from (select year, days from years) y left outer join (select year, {sep.join(ss)} from {tabname} where station = ? group by year) r on y.year = r.year join stations s on s.station = ? where y.year between substr(s.isodate_from, 1, 4) and substr(s.isodate_to, 1, 4) order by y.year desc; """ # print(sql) return sql, fields
def show_overview(station: int, tabname: str = "readings", fields: List[str] = None, with_rows: bool = False) -> List[Timeframe]: assert isinstance(station, int) assert isinstance(tabname, str) if not fields: fields = get_data_fields(tabname=tabname) assert isinstance(fields, list) assert isinstance(with_rows, bool) tfs = overview(station=station, tabname=tabname, fields=fields) for tf in tfs: tf.rows = get_two(station, tf.ts_to.dwdts(), tabname=tabname, fields=fields) print() for tf in tfs: tf_str = f"{tf.ts_from} -{tf.days}-> {tf.ts_to}" print(f"{tf_str:30s} {tf.indicators}") if with_rows: print(" " + f"{tf.rows[0]}"[1:-1]) if len(tf.rows) == 2: print(" " + f"{tf.rows[1]}"[1:-1]) # remove tuple brackets print(f"{len(tfs)} timeframes") print(fields) print() return tfs
def get_indicator_select(tabname: str = "readings", fields: List[str] = None) -> str: if not fields: fields = get_data_fields(tabname) fl = ", \n".join([ f" case when {f} is not null then 'x' else '-' end as {f}" for f in fields ]) return "select \n dwdts, \n" + f"{fl} \nfrom {tabname} \nwhere station = ? \norder by dwdts"
def get_two(station: int, dwdts: str, tabname: str = "readings", fields: List[str] = None): if "-" in dwdts: dwdts = dwdts.replace("-", "") if not fields: fields = get_data_fields(tabname) sql = "select " + f"dwdts, {', '.join(fields)} from {tabname} where station = ? and dwdts >= ? order by dwdts limit 2" # logging.info(sql) with johanna.Connection(f"from dwdts = {dwdts}", quiet=True) as c: rows = c.cur.execute(sql, (station, dwdts)).fetchall() return rows
def spot_check_overview(): tabname = "readings" fields = get_data_fields() # fields = ['resp', 'resp_form', 'temp2m_max', 'temp2m_min'] # fields = ['temp2m_avg', 'temp2m_max', 'temp2m_min'] print(fields) with_rows = True for station in [2444, 2290, 5906]: tfs = overview(station=station, tabname=tabname, fields=fields, with_rows=with_rows) show_timeframes(tfs, fields, with_rows=with_rows) _persist(tfs, fields, station, with_rows=with_rows)
def overview(station: int, tabname: str = "readings", fields: List[str] = None, with_rows: bool = False) -> List[Timeframe]: assert isinstance(station, int) assert isinstance(tabname, str) if not fields: fields = get_data_fields(tabname=tabname) assert isinstance(fields, list) assert isinstance(with_rows, bool) sql = get_indicator_select(tabname=tabname, fields=fields) with johanna.Connection(f"select from {tabname}") as c: rows = c.cur.execute(sql, (station, )).fetchall() tfs = [] ts0 = PointInTime(rows[0][0]) srow0 = "".join(rows[0][1:]) # indicator string tf = Timeframe(ts0, None, srow0, None, None) tfs.append(tf) for i, row in enumerate(rows[1:]): ts = PointInTime(row[0]) srow = "".join(row[1:]) # indicator string if ts - ts0 > 1: # not next day # we passed an occurence of '---------' ('-' only) # -> insert n/a interval: [x, _, old] -> [x, ts0, old], [ts0+1, ts-1, n/a], [ts, _, new] tf.ts_to = ts0 tfs.append(Timeframe(ts0.next(), ts.prev(), "no data", None, None)) tf = Timeframe(ts, None, srow, None, None) tfs.append(tf) elif srow != srow0: tf.ts_to = ts0 tf = Timeframe(ts, None, srow, None, None) tfs.append(tf) ts0 = ts srow0 = srow tf.ts_to = ts for tf in tfs: tf.days = tf.ts_to - tf.ts_from + 1 if with_rows: for tf in tfs: tf.rows = get_two(station, tf.ts_to.dwdts(), tabname=tabname, fields=fields) return tfs