def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) # Hold our parsing results as an array of dicts self.data = [] self.regime = None # Sometimes, we get products that are not really in CLI format but # are RER (record event reports) with a CLI AWIPS ID if self.wmo[:2] != "CD": LOG.info("Product %s skipped due to wrong header", self.get_product_id()) return for section in self.find_sections(): if not HEADLINE_RE.findall(section.replace("\n", " ")): continue # We have meat! self.compute_diction(section) valid, station = self.parse_cli_headline(section) data = self.parse_data(section) self.data.append( dict( cli_valid=valid, cli_station=station, db_station=None, data=data, ))
def database_save(self, txn): """Save this product to the database""" table = "mcd" if self.afos == "SWOMCD" else "mpd" # Remove any previous entries sql = f"DELETE from {table} where product_id = %s and num = %s" txn.execute(sql, (self.get_product_id(), self.discussion_num)) if txn.rowcount > 0: LOG.info( "mcd.database_save %s %s removed %s entries", self.get_product_id(), self.discussion_num, txn.rowcount, ) giswkt = "SRID=4326;%s" % (self.geometry.wkt, ) sql = ( f"INSERT into {table} (product, product_id, geom, issue, expire, " "num, year, watch_confidence, concerning) " "values (%s, %s, %s, %s, %s, %s, %s, %s, %s)") args = ( self.text, self.get_product_id(), giswkt, self.sts, self.ets, self.discussion_num, self.valid.year, self.find_watch_probability(), self.concerning, ) txn.execute(sql, args)
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """Constructor Args: text (string): the raw PTS product that is to be parsed utcnow (datetime, optional): in case of ambuigity with time ugc_provider (dict, optional): unused in this class nwsli_provider (dict, optional): unused in this class """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) LOG.info("==== SPCPTS Processing: %s", self.get_product_id()) load_conus_data(self.valid) self.issue = None self.expire = None self.day = None self.outlook_type = None self.outlook_collections = dict() self.set_metadata() self.find_issue_expire() self.find_outlooks() self.quality_control()
def init_projection(self): """ Setup Grid and projection details """ if self.metadata["map_projection"] == 3: self.init_llc() elif self.metadata["map_projection"] == 1: self.init_mercator() elif self.metadata["map_projection"] == 5: self.init_stereo() else: LOG.info("Unknown Projection: %s", self.metadata["map_projection"])
def do_sql_observed(self, cursor, _hml): """Process the observed portion of the dataset""" ob = _hml.data["observed"] if ob["dataframe"] is None: return df = ob["dataframe"] if df.empty: return for col in ["primary", "secondary"]: if ob[col + "Name"] is None: continue key = "%s[%s]" % (ob[col + "Name"], ob[col + "Units"]) # Check that we have some non-null data df2 = df[pd.notnull(df[col])] if df2.empty: continue minvalid = df2["valid"].min() maxvalid = df2["valid"].max() cursor.execute( """ DELETE from hml_observed_data WHERE station = %s and valid >= %s and valid <= %s and key = get_hml_observed_key(%s) """, (_hml.station, minvalid, maxvalid, key), ) for _, row in df2.iterrows(): val = row[col] if val is None: continue cursor.execute( ("INSERT into hml_observed_data " "(station, valid, key, value) " "VALUES (%s, %s, get_hml_observed_key(%s), %s) " "RETURNING key"), (_hml.station, row["valid"], key, val), ) if cursor.fetchone()[0] is not None: continue # Delete the bad row cursor.execute( "DELETE from hml_observed_data WHERE station = %s and " "valid = %s and key is null", (_hml.station, row["valid"]), ) # Need to create a new unit! cursor.execute( "INSERT into hml_observed_keys(id, label) VALUES (" "(SELECT coalesce(max(id) + 1, 0) from hml_observed_keys)," "%s) RETURNING id", (key, ), ) LOG.info("Created key %s for %s", cursor.fetchone()[0], key)
def _resent_match(prod, txn, warning_table, vtec): """Check if this is a resent match.""" txn.execute( f"SELECT max(updated) as maxtime from {warning_table} " "WHERE eventid = %s and significance = %s and wfo = %s and " "phenomena = %s", (vtec.etn, vtec.significance, vtec.office, vtec.phenomena), ) maxtime = txn.fetchone()["maxtime"] if maxtime is not None and maxtime == prod.valid: LOG.info("RESENT Match, skipping SQL for %s!", prod.get_product_id()) return True return False
def convert_key(text): """ Convert a key value to something we store """ if text is None: return None if text == "YESTERDAY": return "today" if text == "TODAY": return "today" if text == "MONTH TO DATE": return "month" if text.startswith("SINCE "): return text.replace("SINCE ", "").replace(" ", "").lower() LOG.info("convert_key() failed for |%s|", text) return "fail"
def str2multipolygon(s): """Convert string PTS data into a polygon. Args: s (str): the cryptic string that we attempt to make valid polygons from """ segments = get_segments_from_text(s) # Simple case whereby the segment is its own circle, thank goodness if len(segments) == 1: res = look_for_closed_polygon(segments[0]) if res: return res # Keep track of generated polygons polys = [] # currentpoly is our present subject of interest currentpoly = copy.deepcopy(CONUS["poly"]) for i, segment in enumerate(segments): # debug_draw(segment, currentpoly) LOG.info( " Iterate: %s/%s, len(segment): %s (%.2f %.2f) (%.2f %.2f)", i + 1, len(segments), len(segment), segment[0][0], segment[0][1], segment[-1][0], segment[-1][1], ) currentpoly = segment_logic(segment, currentpoly, polys) polys.append(currentpoly) res = [] LOG.info(" Resulted in len(polys): %s, now quality controlling", len(polys)) for i, poly in enumerate(polys): if not poly.is_valid: LOG.info(" ERROR: polygon %s is invalid!", i) continue if poly.area == CONUS["poly"].area: LOG.info(" polygon %s is just CONUS, skipping", i) continue LOG.info(" polygon: %s has area: %s", i, poly.area) res.append(poly) if not res: raise Exception(("Processed no geometries, this is a bug!\n" " s is %s\n" " segments is %s" % (repr(s), repr(segments)))) return MultiPolygon(res)
def get_folders(drive): """Return a dict of Google Drive Folders""" f = {} # Whoa, just because maxResults=999 and the returned items is less # than 999, it does not mean the list was complete folders = ( drive.files() .list( q="mimeType = 'application/vnd.google-apps.folder'", maxResults=999 ) .execute() ) folder_list = folders["items"] i = 0 while "nextPageToken" in folders: folders = ( drive.files() .list( pageToken=folders["nextPageToken"], q="mimeType = 'application/vnd.google-apps.folder'", maxResults=999, ) .execute() ) folder_list = folder_list + folders["items"] i += 1 if i > 10: LOG.info("get_folders iterator reached 10, aborting") break for _, item in enumerate(folder_list): f[item["id"]] = dict(title=item["title"], parents=[], basefolder=None) for parent in item["parents"]: f[item["id"]]["parents"].append(parent["id"]) for thisfolder in f: # title = f[thisfolder]['title'] if not f[thisfolder]["parents"]: continue parentfolder = f[thisfolder]["parents"][0] if parentfolder not in f: LOG.info("ERROR: parentfolder: %s not in f", parentfolder) continue while parentfolder in f and len(f[parentfolder]["parents"]) > 0: parentfolder = f[parentfolder]["parents"][0] f[thisfolder]["basefolder"] = parentfolder return f
def cleanvalue(val): """cleanup the mess that is found in the Google Sheets for values Args: val (str): The value to clean up Returns: the cleaned value! """ if val is None or val.strip() == "": return None if NUMBER_RE.match(val): return float(val) if CLEANVALUE_XREF.get(val): return CLEANVALUE_XREF[val] if val.lower() in [ "did not collect", ".", "n/a", "clay", "silty clay", "silty clay loam", "clay loam", "sandy clay loam", "silt loam", "silty loam", "sandy loam", "sandy clay", "sand", "loam", "silt", "loamy sand", ]: return val.lower() if val.find("%") > -1: val = val.replace("%", "") if NUMBER_RE.match(val): return float(val) if val.find("<") > -1: return "< %s" % (val.replace("<", "").strip(),) if val not in CLEANVALUE_COMPLAINED: LOG.info( "cscap_utils.cleanvalue(%s) is unaccounted for, return None", repr(val), ) CLEANVALUE_COMPLAINED.append(val) return None
def init_llc(self): """ Initialize Lambert Conic Comformal """ self.metadata["proj"] = pyproj.Proj( proj="lcc", lat_0=self.metadata["latin"], lat_1=self.metadata["latin"], lat_2=self.metadata["latin"], lon_0=self.metadata["lov"], a=6371200.0, b=6371200.0, ) # s = 1.0 # if self.metadata['proj_center_flag'] != 0: # s = -1.0 psi = M_PI_2 - abs(math.radians(self.metadata["latin"])) cos_psi = math.cos(psi) # r_E = RE_METERS / cos_psi alpha = math.pow(math.tan(psi / 2.0), cos_psi) / math.sin(psi) x0, y0 = self.metadata["proj"](self.metadata["lon1"], self.metadata["lat1"]) self.metadata["x0"] = x0 self.metadata["y0"] = y0 # self.metadata['dx'] *= alpha # self.metadata['dy'] *= alpha self.metadata["y1"] = y0 + (self.metadata["dy"] * self.metadata["ny"]) (self.metadata["lon_ul"], self.metadata["lat_ul"]) = self.metadata["proj"](self.metadata["x0"], self.metadata["y1"], inverse=True) LOG.info( ("lat1: %.5f y0: %5.f y1: %.5f lat_ul: %.3f " "lat_ur: %.3f lon_ur: %.3f alpha: %.5f dy: %.3f"), self.metadata["lat1"], y0, self.metadata["y1"], self.metadata["lat_ul"], self.metadata["lat_ur"], self.metadata["lon_ur"], alpha, self.metadata["dy"], )
def parser(msg, call_id, add_metar=False): """Parse the message(single line) into a dict Args: msg (str): the single line of data to parse into a dict call_id (str): hard coded call_id as the data can't be trusted, sigh add_metar (bool,optional): should a METAR be generated? Default: False Returns: dict or None """ match = DS3505_RE.match(msg) if not match: return data = match.groupdict() # Seems like these obs with this flag are 'bad' if data["srcflag"] in ["A", "B"]: return data["valid"] = datetime.strptime( "%s %s" % (data["yyyymmdd"], data["hhmi"]), "%Y%m%d %H%M").replace(tzinfo=timezone.utc) data["call_id"] = call_id data["lat"] = _d1000(data["lat"]) data["lon"] = _d1000(data["lon"]) data["wind_speed_mps"] = _d10(data["wind_speed_mps"]) data["airtemp_c"] = _d10(data["airtemp_c"]) data["dewpointtemp_c"] = _d10(data["dewpointtemp_c"]) data["mslp_hpa"] = _d10(data["mslp_hpa"]) for elem in ["drct", "ceiling_m", "vsby_m", "elevation"]: data[elem] = _tonumeric(data[elem]) data["extra"] = {} try: parse_extra(data, msg[105:]) except Exception: pass if add_metar: try: gen_metar(data) except Exception: LOG.info(json.dumps(data, indent=True, sort_keys=True, default=str)) raise return data
def init_stereo(self): """ Compute Polar Stereographic """ self.metadata["proj"] = pyproj.Proj( proj="stere", lat_ts=60, lat_0=90, lon_0=self.metadata["lov"], x_0=0, y_0=0, a=6371200.0, b=6371200.0, ) # First point! x0, y0 = self.metadata["proj"](self.metadata["lon1"], self.metadata["lat1"]) self.metadata["x0"] = x0 self.metadata["y0"] = y0 self.metadata["y1"] = y0 + (self.metadata["dy"] * self.metadata["ny"]) (self.metadata["lon_ul"], self.metadata["lat_ul"]) = self.metadata["proj"](x0, self.metadata["y1"], inverse=True) LOG.info( ("lon_ul: %.2f lat_ul: %.2f " "lon_ll: %.2f lat_ll: %.2f " " lov: %.2f latin: %.2f lat1: %.2f lat2: %.2f " "y0: %5.f y1: %.5f dx: %.3f dy: %.3f"), self.metadata["lon_ul"], self.metadata["lat_ul"], self.metadata["lon1"], self.metadata["lat1"], self.metadata["lov"], self.metadata["latin"], self.metadata["lat1"], self.metadata["lat2"], y0, self.metadata["y1"], self.metadata["dx"], self.metadata["dy"], )
def look_for_closed_polygon(segment): """Simple logic to see if our polygon is already closed.""" if segment[0][0] == segment[-1][0] and segment[0][1] == segment[-1][1]: LOG.info("Single closed polygon found, done and done") return MultiPolygon([Polygon(segment)]) # Slightly bad line-work, whereby the start and end points are very close # to each other if ((segment[0][0] - segment[-1][0])**2 + (segment[0][1] - segment[-1][1])**2)**0.5 < 0.05: LOG.info( "assuming linework error, begin: (%.2f %.2f) end: (%.2f %.2f)", segment[0][0], segment[0][1], segment[-1][0], segment[-1][1], ) segment[-1] = segment[0] return MultiPolygon([Polygon(segment)])
def draw_outlooks(self): """ For debugging, draw the outlooks on a simple map for inspection!""" from descartes.patch import PolygonPatch import matplotlib.pyplot as plt for day, collect in self.outlook_collections.items(): for outlook in collect.outlooks: fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) # pylint: disable=unsubscriptable-object ax.plot( CONUS["line"][:, 0], CONUS["line"][:, 1], color="b", label="Conus", ) for poly in outlook.geometry: patch = PolygonPatch( poly, fc="tan", label="Outlook %.1f" % (poly.area, ), zorder=2, ) ax.add_patch(patch) ax.plot( poly.exterior.xy[0], poly.exterior.xy[1], lw=2, color="r", ) ax.set_title(("Day %s Category %s Threshold %s") % (day, outlook.category, outlook.threshold)) ax.legend(loc=3) fn = (("/tmp/%s_%s_%s_%s.png") % ( day, self.issue.strftime("%Y%m%d%H%M"), outlook.category, outlook.threshold, )).replace(" ", "_") LOG.info(":: creating plot %s", fn) fig.savefig(fn) del fig del ax
def init_mercator(self): """ Compute mercator projection stuff """ self.metadata["proj"] = pyproj.Proj( proj="merc", lat_ts=self.metadata["latin"], x_0=0, y_0=0, a=6371200.0, b=6371200.0, ) x0, y0 = self.metadata["proj"](self.metadata["lon1"], self.metadata["lat1"]) self.metadata["x0"] = x0 self.metadata["y0"] = y0 x1, y1 = self.metadata["proj"](self.metadata["lon2"], self.metadata["lat2"]) self.metadata["x1"] = x1 self.metadata["y1"] = y1 self.metadata["dx"] = (x1 - x0) / self.metadata["nx"] self.metadata["dy"] = (y1 - y0) / self.metadata["ny"] (self.metadata["lon_ul"], self.metadata["lat_ul"]) = self.metadata["proj"](self.metadata["x0"], self.metadata["y1"], inverse=True) LOG.info( ("latin: %.2f lat_ul: %.3f lon_ul: %.3f " "y0: %5.f y1: %.5f dx: %.3f dy: %.3f"), self.metadata["latin"], self.metadata["lat_ul"], self.metadata["lon_ul"], y0, y1, self.metadata["dx"], self.metadata["dy"], )
def process_latlon(self): """Parse the segment looking for the 'standard' LAT...LON encoding""" data = self.unixtext.replace("\n", " ") search = LAT_LON_PREFIX.search(data) if search is None: return None pos = search.start() newdata = data[pos + 9:] # Go find our next non-digit, non-space character, if we find it, we # should truncate our string, this could be improved, I suspect search = re.search(r"[^\s0-9]", newdata) if search is not None: pos2 = search.start() newdata = newdata[:pos2] poly = str2polygon(newdata) if poly is None: return None # check 0, PGUM polygons are east longitude akrherz/pyIEM#74 if self.tp.source == "PGUM": newpts = [[0 - pt[0], pt[1]] for pt in poly.exterior.coords] poly = Polygon(newpts) # check 1, is the polygon valid? if not poly.is_valid: self.tp.warnings.append( ("LAT...LON polygon is invalid!\n%s") % (poly.exterior.xy, )) return None # check 2, is the exterior ring of the polygon clockwise? if poly.exterior.is_ccw: # No longer a warning as it was too much noise LOG.info( "LAT...LON polygon exterior is CCW, reversing\n%s", poly.exterior.xy, ) poly = Polygon( zip(poly.exterior.xy[0][::-1], poly.exterior.xy[1][::-1])) self.giswkt = "SRID=4326;%s" % (dumps(MultiPolygon([poly]), rounding_precision=6), ) return poly
def get_number(text): """ Convert a string into a number, preferable a float! """ if text is None: return None text = text.strip() if text == "": retval = None elif text == "MM": retval = None elif text == "T": retval = TRACE_VALUE else: number = re.findall(r"[\-\+]?\d*\.\d+|[\-\+]?\d+", text) if len(number) == 1: if text.find(".") > 0: retval = float(number[0]) else: retval = int(number[0]) else: LOG.info("get_number() failed for |%s|", text) retval = None return retval
def sql(self, txn): """Do database work Args: txn (psycopg2.cursor): database cursor """ for day, collect in self.outlook_collections.items(): txn.execute( """ DELETE from spc_outlooks where product_issue = %s and expire = %s and outlook_type = %s and day = %s """, (self.valid, self.expire, self.outlook_type, day), ) if txn.rowcount > 0: LOG.info("Removed %s previous spc_outlook entries", txn.rowcount) for outlook in collect.outlooks: if outlook.geometry.is_empty: continue sql = """ INSERT into spc_outlooks(product_issue, issue, expire, threshold, category, day, outlook_type, geom) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """ args = ( self.valid, collect.issue, collect.expire, outlook.threshold, outlook.category, collect.day, self.outlook_type, "SRID=4326;%s" % (outlook.geometry.wkt, ), ) txn.execute(sql, args)
def __init__(self, fobj): """Create a GNIFile instance with a compressed file object Args: fobj (file): A fileobject """ fobj.seek(0) # WMO HEADER self.wmo = (fobj.read(21)).strip().decode("utf-8") d = zlib.decompressobj() hdata = d.decompress(fobj.read()) self.metadata = self.read_header(hdata[21:]) self.init_projection() totsz = len(d.unused_data) # 5120 value chunks, so we need to be careful! sdata = b"" chunk = b"x\xda" i = 0 for part in d.unused_data.split(b"x\xda"): if part == b"" and i == 0: continue chunk += part try: sdata += zlib.decompress(chunk) i += 1 totsz -= len(chunk) chunk = b"x\xda" except Exception: chunk += b"x\xda" if totsz != 0: LOG.info("Totalsize left: %s", totsz) self.data = np.reshape( np.fromstring(sdata, np.int8), (self.metadata["numlines"] + 1, self.metadata["linesize"]), )
def compute_wfos(self, txn): """Figure out which WFOs are impacted by this polygon""" for day, collect in self.outlook_collections.items(): for outlook in collect.outlooks: if outlook.geometry.is_empty: continue sql = """ select distinct wfo from ugcs WHERE st_contains(ST_geomFromEWKT('SRID=4326;%s'), centroid) and substr(ugc,3,1) = 'C' and wfo is not null and end_ts is null ORDER by wfo ASC """ % (outlook.geometry.wkt, ) txn.execute(sql) for row in txn.fetchall(): outlook.wfos.append(row["wfo"]) LOG.info( "Day: %s Category: %s Threshold: %s #WFOS: %s %s", day, outlook.category, outlook.threshold, len(outlook.wfos), ",".join(outlook.wfos), )
def segment_logic(segment, currentpoly, polys): """Our segment parsing logic.""" if segment[0] == segment[-1] and len(segment) > 2: LOG.info(" segment is closed polygon!") lr = LinearRing(LineString(segment)) if not lr.is_ccw: LOG.info(" polygon is clockwise (exterior), done.") polys.append(currentpoly) return Polygon(segment) LOG.info(" polygon is CCW (interior), testing intersection") if currentpoly.intersection(lr).is_empty: LOG.info(" failed intersection with currentpoly, abort") return currentpoly interiors = [ln for ln in currentpoly.interiors] interiors.append(lr) newp = Polygon(currentpoly.exterior, interiors) if not newp.is_valid: LOG.info(" adding interior invalid, buffering") newp = newp.buffer(0) if newp.is_valid: LOG.info( " polygon is interior to currentpoly, area: %.2f ", currentpoly.area, ) return newp raise Exception( "Adding interior polygon resulted in an invalid geometry, aborting" ) # All open lines need to intersect the CONUS, ensure that happens ls = LineString(segment) ls = clean_segment(ls) if isinstance(ls, MultiLineString): for _ls in ls: LOG.info(" look out below, recursive we go.") currentpoly = segment_logic(_ls.coords, currentpoly, polys) return currentpoly if ls is None: LOG.info(" aborting as clean_segment failed...") return currentpoly LOG.info( " new segment start: %.4f %.4f end: %.4f %.4f", ls.coords[0][0], ls.coords[0][1], ls.coords[-1][0], ls.coords[-1][1], ) # If this line segment does not intersect the current polygon of interest, # we should check any previous polygons to see if it intersects it. We # could be dealing with invalid ordering in the file, sigh. if currentpoly.intersection(ls).is_empty: LOG.info(" ls does not intersect currentpoly, looking for match") found = False for i, poly in enumerate(polys): intersect = poly.intersection(ls) if intersect.is_empty or isinstance(intersect, MultiLineString): continue LOG.info( " found previous polygon i:%s area: %.1f that intersects", i, poly.area, ) found = True polys.append(currentpoly) currentpoly = polys.pop(i) break if not found: LOG.info(" setting currentpoly back to CONUS") polys.append(currentpoly) currentpoly = copy.deepcopy(CONUS["poly"]) # Results in either [currentpoly] or [polya, polyb, ...] geomcollect = split(currentpoly, ls) if len(geomcollect) > 2: LOG.info(" line intersects polygon 3+ times, can't handle") return currentpoly if len(geomcollect) == 1: res = geomcollect.geoms[0] else: (polya, polyb) = geomcollect.geoms[0], geomcollect.geoms[1] # Linear reference our splitter's start and end distance startdist = polya.exterior.project(Point(ls.coords[0])) enddist = polya.exterior.project(Point(ls.coords[-1])) # if the end is further down the line, we want this polygon res = polya if enddist > startdist else polyb if res.area > 0.01: LOG.info(" taking polygon.area = %.4f", res.area) return res return currentpoly
def process_metar(mstr, now): """ Do the METAR Processing """ mtr = None while mtr is None: try: mtr = Metar(mstr, now.month, now.year) except MetarParserError as exp: msg = str(exp) tokens = ERROR_RE.findall(str(exp)) orig_mstr = mstr if tokens: for token in tokens[0].split(): mstr = mstr.replace(" %s" % (token, ), "") if orig_mstr == mstr: LOG.info("Can't fix badly formatted metar: %s", mstr) return None else: LOG.info("MetarParserError: %s", msg) return None except Exception as exp: LOG.info("Double Fail: %s %s", mstr, exp) return None if mtr is None or mtr.time is None: return None ob = OB() ob.metar = mstr[:254] ob.valid = now if mtr.temp: ob.tmpf = mtr.temp.value("F") if mtr.dewpt: ob.dwpf = mtr.dewpt.value("F") if mtr.wind_speed: ob.sknt = mtr.wind_speed.value("KT") if mtr.wind_gust: ob.gust = mtr.wind_gust.value("KT") # Calc some stuff if ob.tmpf is not None and ob.dwpf is not None: ob.relh = (relative_humidity_from_dewpoint( ob.tmpf * units("degF"), ob.dwpf * units("degF")).to(units("percent")).magnitude) if ob.sknt is not None: ob.feel = (mcalc_feelslike( ob.tmpf * units.degF, ob.dwpf * units.degF, ob.sknt * units("knots"), ).to(units("degF")).magnitude) if mtr.wind_dir and mtr.wind_dir.value() != "VRB": ob.drct = mtr.wind_dir.value() if mtr.vis: ob.vsby = mtr.vis.value("SM") # see pull request #38 if mtr.press and mtr.press != mtr.press_sea_level: ob.alti = mtr.press.value("IN") if mtr.press_sea_level: ob.mslp = mtr.press_sea_level.value("MB") if mtr.precip_1hr: ob.p01i = mtr.precip_1hr.value("IN") # Do something with sky coverage for i in range(len(mtr.sky)): (c, h, _) = mtr.sky[i] setattr(ob, "skyc%s" % (i + 1), c) if h is not None: setattr(ob, "skyl%s" % (i + 1), h.value("FT")) if mtr.max_temp_6hr: ob.max_tmpf_6hr = mtr.max_temp_6hr.value("F") if mtr.min_temp_6hr: ob.min_tmpf_6hr = mtr.min_temp_6hr.value("F") if mtr.max_temp_24hr: ob.max_tmpf_24hr = mtr.max_temp_24hr.value("F") if mtr.min_temp_24hr: ob.min_tmpf_6hr = mtr.min_temp_24hr.value("F") if mtr.precip_3hr: ob.p03i = mtr.precip_3hr.value("IN") if mtr.precip_6hr: ob.p06i = mtr.precip_6hr.value("IN") if mtr.precip_24hr: ob.p24i = mtr.precip_24hr.value("IN") # Presentwx if mtr.weather: pwx = [] for wx in mtr.weather: val = "".join([a for a in wx if a is not None]) if val == "" or val == len(val) * "/": continue pwx.append(val) ob.wxcodes = pwx return ob
def sql(txn, stid, data): """Persist what data we have to the IEM schema database In general, the IEM database's atomic data is based on the parsing of the METAR product. So we wouldn't want the two to conflict, so the METAR format is again used to drive the data used for the database insert. Args: txn (cursor): database transaction stid (str): station identifier to use with the database data (dict): what we got from previous parsing Returns: int or None: number of rows inserted """ # First problem, which metar source to use? # If this is a US site, likely best to always use it metar = data.get("metar") if metar is None: metar = data["extra"].get("REM", {}).get("MET", "") if len(metar) > 20 and (len(stid) == 3 or stid[0] == "P"): # Split off the cruft metar = metar.strip().replace(";", " ").replace("METAR ", "") metar = metar.replace("COR ", "").rstrip("=") table = "t%s" % (data["valid"].year, ) ob = process_metar(metar, data["valid"]) if ob is None: return stid = stid if len(stid) == 4 and stid[0] != "K" else stid[-3:] _sql = f""" INSERT into {table} (station, valid, tmpf, dwpf, vsby, drct, sknt, gust, p01i, alti, skyc1, skyc2, skyc3, skyc4, skyl1, skyl2, skyl3, skyl4, metar, mslp, wxcodes, p03i, p06i, p24i, max_tmpf_6hr, max_tmpf_24hr, min_tmpf_6hr, min_tmpf_24hr, report_type, relh, feel) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,%s,%s, %s, %s, %s, %s, %s, %s, %s, %s, 2, %s, %s) RETURNING valid """ args = ( stid, ob.valid, ob.tmpf, ob.dwpf, ob.vsby, ob.drct, ob.sknt, ob.gust, ob.p01i, ob.alti, ob.skyc1, ob.skyc2, ob.skyc3, ob.skyc4, ob.skyl1, ob.skyl2, ob.skyl3, ob.skyl4, metar, ob.mslp, ob.wxcodes, ob.p03i, ob.p06i, ob.p24i, ob.max_tmpf_6hr, ob.max_tmpf_24hr, ob.min_tmpf_6hr, ob.min_tmpf_24hr, ob.relh, ob.feel, ) try: txn.execute(_sql, args) except Exception: LOG.info(metar) LOG.info(args) raise return txn.rowcount
def clean_segment(ls): """Attempt to get this segment cleaned up. Args: segment (list): inbound data Returns: segment (list) """ def _test(val): """Our tester.""" return isinstance(val, MultiPoint) and len(val) == 2 # If this intersects twice, we are golden res = LineString(CONUS["poly"].exterior.coords).intersection(ls) if _test(res): return ls # First and last point of the ls need to be exterior to the CONUS for idx in [0, -1]: pt = Point(ls.coords[idx]) if not pt.within(CONUS["poly"]): continue pt = CONUS["poly"].exterior.interpolate( CONUS["poly"].exterior.project(pt)) if pt.within(CONUS["poly"]): LOG.info(" idx: %s is still within, evasive action", idx) for xoff, yoff in [ [-0.01, -0.01], [-0.01, 0.0], [-0.01, 0.01], [0.0, -0.01], [0.0, 0.0], [0.0, 0.01], [0.01, -0.01], [0.01, 0.0], [0.01, 0.01], ]: pt2 = translate(pt, xoff=xoff, yoff=yoff) if not pt2.within(CONUS["poly"]): pt = pt2 LOG.info(" idx: %s is now %s", idx, pt) break LOG.info( " fix idx: %s to new: %.4f %.4f Inside: %s", idx, pt.x, pt.y, pt.within(CONUS["poly"]), ) coords = list(ls.coords) coords[idx] = (pt.x, pt.y) ls = LineString(coords) res = LineString(CONUS["poly"].exterior.coords).intersection(ls) if _test(res): return ls # Are we doing 3+ intersections already if isinstance(res, MultiPoint) and len(res) > 2: return MultiLineString([ r for r in ls.intersection(CONUS["poly"]) if isinstance(r, LineString) ]) LOG.info(" clean_segment failed with res: %s", res) return None
def to_metar(textprod, text): """Create a METAR object, if possible""" # Do some cleaning and whitespace trimming text = sanitize(text) if len(text) < 10: return attempt = 1 mtr = None original_text = text valid = textprod.valid while attempt < 6 and mtr is None: try: mtr = METARReport(text, month=valid.month, year=valid.year) except MetarParserError as inst: tokens = ERROR_RE.findall(str(inst)) if tokens: if tokens[0] == text or text.startswith(tokens[0]): if not SA_RE.match(text): LOG.info( "%s Aborting due to non-replace %s", textprod.get_product_id(), str(inst), ) return # So tokens contains a series of groups that needs updated newtext = text for token in tokens[0].split(): newtext = newtext.replace(" %s" % (token, ), "") if newtext != text: text = newtext else: LOG.info("unparsed groups regex fail: %s", inst) if str(inst).find("day is out of range for month") > -1: if valid.day < 10: valid = valid.replace(day=1) - timedelta(days=1) attempt += 1 if mtr is not None: # Attempt to figure out more things if mtr.station_id is None: LOG.info("Aborting due to station_id being None |%s|", text) return None if mtr.time is None: LOG.info("Aborting due to time being None |%s|", text) return None # don't allow data more than an hour into the future ceiling = (textprod.utcnow + timedelta(hours=1)).replace(tzinfo=None) if mtr.time > ceiling: # careful, we may have obs from the previous month if ceiling.day < 5 and mtr.time.day > 15: prevmonth = ceiling - timedelta(days=10) mtr.time = mtr.time.replace(year=prevmonth.year, month=prevmonth.month) else: LOG.info( "Aborting due to time in the future " "ceiling: %s mtr.time: %s", ceiling, mtr.time, ) return None mtr.code = original_text mtr.iemid = (mtr.station_id[-3:] if mtr.station_id[0] == "K" else mtr.station_id) mtr.network = textprod.nwsli_provider.get(mtr.iemid, dict()).get("network") mtr.tzname = textprod.nwsli_provider.get(mtr.iemid, dict()).get("tzname") return mtr
def get_jabbers(self, uri, _uri2=None): """Make this into jabber messages""" jmsgs = [] for mtr in self.metars: msg = None for weatheri in mtr.weather: for wx in weatheri: if wx is not None and "GR" in wx: msg = "Hail" if TORNADO_RE.findall(mtr.code): msg = "Tornado" elif FUNNEL_RE.findall(mtr.code): msg = "Funnel Cloud" # Search for Peak wind gust info.... elif mtr.over_wind_threshold(): _msg = mtr.wind_message() if _msg: msg = _msg elif mtr.station_id in JABBER_SITES: # suck if JABBER_SITES[mtr.station_id] != mtr.time: JABBER_SITES[mtr.station_id] = mtr.time channels = ["METAR.%s" % (mtr.station_id, )] if mtr.type == "SPECI": channels.append("SPECI.%s" % (mtr.station_id, )) mstr = "%s %s" % (mtr.type, mtr.code) jmsgs.append( [mstr, mstr, dict(channels=",".join(channels))]) if msg: row = self.nwsli_provider.get(mtr.iemid, dict()) wfo = row.get("wfo") if wfo is None or wfo == "": LOG.info("Unknown WFO for id: %s, skipping alert", mtr.iemid) continue channels = ["METAR.%s" % (mtr.station_id, )] if mtr.type == "SPECI": channels.append("SPECI.%s" % (mtr.station_id, )) channels.append(wfo) st = row.get("state") nm = row.get("name") extra = "" if mtr.code.find("$") > 0: extra = "(Caution: Maintenance Check Indicator)" url = ("%s%s") % (uri, mtr.network) jtxt = ("%s,%s (%s) ASOS %s reports %s\n%s %s") % ( nm, st, mtr.iemid, extra, msg, mtr.code, url, ) jhtml = ( f'<p><a href="{url}">{nm},{st}</a> ({mtr.iemid}) ASOS ' f"{extra} reports <strong>{msg}</strong>" f"<br/>{mtr.code}</p>") xtra = { "channels": ",".join(channels), "lat": str(row.get("lat")), "long": str(row.get("lon")), } xtra["twitter"] = ( ("%s,%s (%s) ASOS reports %s -- %s") % (nm, st, mtr.iemid, msg, mtr.code))[:TWEET_CHARS] jmsgs.append([jtxt, jhtml, xtra]) return jmsgs
def to_iemaccess(self, txn, force_current_log=False, skip_current=False): """Persist parsed data to IEMAccess Database. Args: txn (psycopg2.cursor): database cursor / transaction force_current_log (boolean): should this ob always go to current_log skip_current (boolean): should this ob always skip current table """ gts = self.time.replace(tzinfo=timezone.utc) iem = Observation(self.iemid, self.network, gts) # Load the observation from the database, if the same time exists! iem.load(txn) # Need to figure out if we have a duplicate ob, if so, check # the length of the raw data, if greater, take the temps if iem.data["raw"] is None or len(iem.data["raw"]) < len(self.code): if self.temp: val = self.temp.value("F") # Place reasonable bounds on the temperature before saving it! if val > -90 and val < 150: iem.data["tmpf"] = round(val, 1) if self.dewpt: val = self.dewpt.value("F") # Place reasonable bounds on the temperature before saving it! if val > -150 and val < 100: iem.data["dwpf"] = round(val, 1) # Database only allows len 254 iem.data["raw"] = self.code[:254] # Always take a COR if self.code.find(" COR ") > -1: iem.data["raw"] = self.code[:254] wind_logic(iem, self) if self.max_temp_6hr: iem.data["max_tmpf_6hr"] = round(self.max_temp_6hr.value("F"), 1) if self.tzname and _is_same_day(iem.data["valid"], self.tzname): iem.data["max_tmpf_cond"] = iem.data["max_tmpf_6hr"] if self.min_temp_6hr: iem.data["min_tmpf_6hr"] = round(self.min_temp_6hr.value("F"), 1) if self.tzname and _is_same_day(iem.data["valid"], self.tzname): iem.data["min_tmpf_cond"] = iem.data["min_tmpf_6hr"] if self.max_temp_24hr: iem.data["max_tmpf_24hr"] = round(self.max_temp_24hr.value("F"), 1) if self.min_temp_24hr: iem.data["min_tmpf_24hr"] = round(self.min_temp_24hr.value("F"), 1) if self.precip_3hr: iem.data["p03i"] = trace(self.precip_3hr) if self.precip_6hr: iem.data["p06i"] = trace(self.precip_6hr) if self.precip_24hr: iem.data["p24i"] = trace(self.precip_24hr) # We assume the value is zero, sad! iem.data["phour"] = 0 if self.precip_1hr: iem.data["phour"] = trace(self.precip_1hr) if self.snowdepth: iem.data["snowd"] = self.snowdepth.value("IN") if self.vis: iem.data["vsby"] = self.vis.value("SM") if self.press: iem.data["alti"] = self.press.value("IN") if self.press_sea_level: iem.data["mslp"] = self.press_sea_level.value("MB") if self.press_sea_level and self.press: alti = self.press.value("MB") mslp = self.press_sea_level.value("MB") if abs(alti - mslp) > 25: LOG.info( "PRESSURE ERROR %s %s ALTI: %s MSLP: %s", iem.data["station"], iem.data["valid"], alti, mslp, ) if alti > mslp: iem.data["mslp"] += 100.0 else: iem.data["mslp"] -= 100.0 # Do something with sky coverage for i in range(len(self.sky)): (cov, hgh, _) = self.sky[i] iem.data["skyc%s" % (i + 1)] = cov if hgh is not None: iem.data["skyl%s" % (i + 1)] = hgh.value("FT") # Presentwx if self.weather: pwx = [] for wx in self.weather: val = "".join([a for a in wx if a is not None]) if val == "" or val == len(val) * "/": continue pwx.append(val) iem.data["wxcodes"] = pwx # Ice Accretion for hr in [1, 3, 6]: key = "ice_accretion_%shr" % (hr, ) iem.data[key] = trace(getattr(self, key)) return iem, iem.save(txn, force_current_log, skip_current)
def quality_control(self): """Run some checks against what was parsed""" # 1. Do polygons overlap for the same outlook LOG.info("==== Running Quality Control Checks") for day, collect in self.outlook_collections.items(): # Everything should be smaller than General Thunder, for conv tstm = self.get_outlook("CATEGORICAL", "TSTM", day) for outlook in collect.outlooks: rewrite = False # case of single polygon if tstm and len(outlook.geometry) == 1: if outlook.geometry.area > tstm.geometry.area: rewrite = True msg = ("Discarding polygon as it is larger than TSTM: " "Day: %s %s %s Area: %.2f TSTM Area: %.2f") % ( day, outlook.category, outlook.threshold, outlook.geometry.area, tstm.geometry.area, ) LOG.info(msg) self.warnings.append(msg) # clip polygons to the CONUS good_polys = [] for poly in outlook.geometry: intersect = CONUS["poly"].intersection(poly) if isinstance(intersect, GeometryCollection): for p in intersect: if isinstance(p, Polygon): good_polys.append(p) else: LOG.info("Discarding %s as not polygon", p) else: if isinstance(intersect, Polygon): good_polys.append(intersect) else: LOG.info("Discarding %s as not polygon", intersect) outlook.geometry = MultiPolygon(good_polys) good_polys = [] for poly1, poly2 in itertools.permutations( outlook.geometry, 2): if poly1.contains(poly2): rewrite = True msg = ("Discarding overlapping exterior polygon: " "Day: %s %s %s Area: %.2f") % ( day, outlook.category, outlook.threshold, poly1.area, ) LOG.info(msg) self.warnings.append(msg) elif tstm is not None and poly1.area > tstm.geometry.area: rewrite = True msg = ("Discarding polygon as it is larger than TSTM: " "Day: %s %s %s Area: %.2f") % ( day, outlook.category, outlook.threshold, poly1.area, ) LOG.info(msg) self.warnings.append(msg) else: if poly1 not in good_polys: good_polys.append(poly1) if rewrite: outlook.geometry = MultiPolygon(good_polys) # 2. Do the time bounds make sense, limited scope here if (self.day == 1 and (self.issue - self.valid).total_seconds() > 8 * 3600): self.warnings.append( ("time_bounds_check: day: %s issue: %s valid: %s expire: %s") % (self.day, self.issue, self.valid, self.expire))
def find_outlooks(self): """ Find the outlook sections within the text product! """ if self.text.find("&&") == -1: self.warnings.append("Product contains no &&, adding...") self.text = self.text.replace("\n... ", "\n&&\n... ") self.text += "\n&& " for segment in self.text.split("&&")[:-1]: day = self.day if day is None: day = get_day(segment) # We need to figure out the probabilistic or category tokens = re.findall(r"\.\.\.\s+(.*)\s+\.\.\.", segment) if not tokens: continue category = tokens[0].strip() point_data = {} # Now we loop over the lines looking for data threshold = None for line in segment.split("\n"): if (re.match( (r"^(D[3-8]\-?[3-8]?|EXTM|MRGL|ENH|SLGT|MDT|ELEV|" r"HIGH|CRIT|TSTM|SIGN|IDRT|SDRT|0\.[0-9][0-9]) "), line, ) is not None): newthreshold = line.split()[0] if threshold is not None and threshold == newthreshold: point_data[threshold] += " 99999999 " threshold = newthreshold if threshold is None: continue if threshold not in point_data: point_data[threshold] = "" point_data[threshold] += line.replace(threshold, " ") if day is not None: issue, expire = compute_times(self.afos, self.issue, self.expire, day) collect = self.outlook_collections.setdefault( day, SPCOutlookCollection(issue, expire, day)) # We need to duplicate, in the case of day-day spans for threshold in list(point_data.keys()): if threshold == "TSTM" and self.afos == "PFWF38": LOG.info(("Failing to parse TSTM in PFWF38")) del point_data[threshold] continue match = DMATCH.match(threshold) if match: data = match.groupdict() if data.get("day2") is not None: day1 = int(data["day1"]) day2 = int(data["day2"]) LOG.info("Duplicating threshold %s-%s", day1, day2) for i in range(day1, day2 + 1): key = "D%s" % (i, ) point_data[key] = point_data[threshold] del point_data[threshold] for threshold in point_data: match = DMATCH.match(threshold) if match: day = int(match.groupdict()["day1"]) issue, expire = compute_times(self.afos, self.issue, self.expire, day) collect = self.outlook_collections.setdefault( day, SPCOutlookCollection(issue, expire, day)) LOG.info( "--> Start Day: %s Category: '%s' Threshold: '%s' =====", day, category, threshold, ) mp = str2multipolygon(point_data[threshold]) if DMATCH.match(threshold): threshold = "0.15" LOG.info("----> End threshold is: %s", threshold) collect.outlooks.append(SPCOutlook(category, threshold, mp))