def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider)
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """constructor""" TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) self.data = [] self.parse_data()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """Constructor Args: text (string): the raw PTS product that is to be parsed utcnow (datetime, optional): in case of ambuigity with time ugc_provider (dict, optional): unused in this class nwsli_provider (dict, optional): unused in this class """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) LOG.info("==== SPCPTS Processing: %s", self.get_product_id()) load_conus_data(self.valid) self.issue = None self.expire = None self.day = None self.outlook_type = None self.outlook_collections = dict() self.set_metadata() self.find_issue_expire() self.find_outlooks() self.quality_control()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) # Hold our parsing results as an array of dicts self.data = [] self.regime = None # Sometimes, we get products that are not really in CLI format but # are RER (record event reports) with a CLI AWIPS ID if self.wmo[:2] != 'CD': print(('Product %s skipped due to wrong header') % (self.get_product_id(), )) return for section in self.find_sections(): if len(HEADLINE_RE.findall(section.replace("\n", " "))) == 0: continue # We have meat! self.compute_diction(section) valid, station = self.parse_cli_headline(section) data = self.parse_data(section) self.data.append( dict(cli_valid=valid, cli_station=station, data=data))
def __init__(self, text, utcnow=None): """Constructor Args: text (str): text to parse """ TextProduct.__init__(self, text, utcnow=utcnow) self.data = _processor(self)
def process(order): """ Process this timestamp """ cursor = PGCONN.cursor() ts = datetime.datetime.strptime(order[:6], "%y%m%d").replace(tzinfo=pytz.utc) base = ts - datetime.timedelta(days=2) ceiling = ts + datetime.timedelta(days=2) subprocess.call("tar -xzf %s" % (order, ), shell=True) inserts = 0 deletes = 0 filesparsed = 0 bad = 0 for fn in glob.glob("%s[0-2][0-9].*" % (order[:6], )): content = re.sub(BAD_CHARS, "", open(fn, 'rb').read().decode('ascii', 'ignore')) # Now we are getting closer, lets split by the delimter as we # may have multiple products in one file! for bulletin in content.split("\001"): if bulletin == '': continue try: bulletin = noaaport_text(bulletin) prod = TextProduct(bulletin, utcnow=ts, parse_segments=False) prod.source = XREF_SOURCE.get(prod.source, prod.source) except Exception as exp: if DEBUG: print('Parsing Failure %s' % (exp, )) bad += 1 continue if prod.valid < base or prod.valid > ceiling: # print('Timestamp out of bounds %s %s %s' % (base, prod.valid, # ceiling)) bad += 1 continue table = "products_%s_%s" % (prod.valid.year, ("0712" if prod.valid.month > 6 else "0106")) cursor.execute( """ DELETE from """ + table + """ WHERE pil = %s and entered = %s and source = %s and data = %s """, (prod.afos, prod.valid, prod.source, bulletin)) deletes += cursor.rowcount cursor.execute( """INSERT into """ + table + """ (data, pil, entered, source, wmo) values (%s,%s,%s,%s,%s) """, (bulletin, prod.afos, prod.valid, prod.source, prod.wmo)) inserts += 1 os.unlink(fn) filesparsed += 1 print(("%s Files Parsed: %s Inserts: %s Deletes: %s Bad: %s") % (order, filesparsed, inserts, deletes, bad)) cursor.close() PGCONN.commit()
def __init__(self, text): ''' constructor ''' TextProduct.__init__(self, text) self.geometry = self.parse_geometry() self.discussion_num = self.parse_discussion_num() self.attn_wfo = self.parse_attn_wfo() self.attn_rfc = self.parse_attn_rfc() self.areas_affected = self.parse_areas_affected() self.watch_prob = self.find_watch_probability() self.cwsus = []
def __init__(self, text, utcnow=None): """Constructor Args: text (str): text to parse """ TextProduct.__init__(self, text, utcnow=utcnow) self.data = None self.issue = None self.do_parsing()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """Constructor Args: text (string): the raw string to process""" TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) self.metars = [] self.split_and_parse()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): ''' constructor ''' TextProduct.__init__(self, text, utcnow=utcnow, ugc_provider=ugc_provider, nwsli_provider=nwsli_provider)
def mycmp(orig, mod, data, data2, line, line2): p1 = findp(orig) p2 = findp(mod) if p1 == 'T' and p2 == 'M': tp = TextProduct(data) tp2 = TextProduct(data2) print(("%s %s -> %s\n" "%s -> %s\nhttps://mesonet.agron.iastate.edu/p.php?pid=%s\n" "%s -> %s\nhttps://mesonet.agron.iastate.edu/p.php?pid=%s\n") % ( orig[0], p1, p2, tp.afos, line, tp.get_product_id(), tp2.afos, line2, tp2.get_product_id()))
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): ''' constructor ''' TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) self.sigmets = [] if self.afos in ['SIGC', 'SIGW', 'SIGE', 'SIGAK1', 'SIGAK2']: self.process_SIGC() elif self.afos[:2] == 'WS': self.process_WS() else: self.process_ocean()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) # Hold our parsing results as an array of dicts self.station = "%s%s" % (self.source[0], self.afos[3:]) self.df = None self.parser()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): ''' constructor ''' # Make sure we are CRLF above all else if text.find("\r\r\n") == -1: text = text.replace("\n", "\r\r\n") # Get rid of extraneous whitespace on right hand side only text = "\r\r\n".join([a.rstrip() for a in text.split("\r\r\n")]) TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) self.nwsli_provider = nwsli_provider self.skip_con = self.get_skip_con()
def __init__(self, text, utcnow=None): """Constructor Args: text (str): text to parse """ TextProduct.__init__(self, text, utcnow=utcnow) self.saw = int(self.afos[3:].strip()) self.action = self.find_action() self.geometry = self.find_polygon() self.ww_num = self.find_ww_num() (self.sts, self.ets) = self.find_time() self.ww_type = self.find_ww_type()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): ''' constructor ''' TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) self.geometry = self.parse_geometry() self.discussion_num = self.parse_discussion_num() self.attn_wfo = self.parse_attn_wfo() self.attn_rfc = self.parse_attn_rfc() self.areas_affected = self.parse_areas_affected() self.watch_prob = self.find_watch_probability() self.sts, self.ets = self.find_valid_times() self.cwsus = []
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) self.sigmets = [] if self.afos in ["SIGC", "SIGW", "SIGE", "SIGAK1", "SIGAK2"]: self.process_SIGC() elif self.afos[:2] == "WS": self.process_WS() else: self.process_ocean()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): ''' constructor ''' TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) self.geometry = self.parse_geometry() self.discussion_num = self.parse_discussion_num() self.attn_wfo = self.parse_attn_wfo() self.attn_rfc = self.parse_attn_rfc() self.areas_affected = self.parse_areas_affected() self.watch_prob = self.find_watch_probability() self.cwsus = []
def __init__(self, text, utcnow=None): """Constructor Args: text (str): text to parse """ TextProduct.__init__(self, text, utcnow=utcnow) self.saw = int(self.afos[3:].strip()) self.action = self.find_action() self.geometry = self.find_polygon() self.ww_num = self.find_ww_num() (self.sts, self.ets) = self.find_time() self.ww_type = self.find_ww_type() self.affected_wfos = []
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): ''' constructor ''' # Make sure we are CRLF above all else if text.find("\r\r\n") == -1: text = text.replace("\n", "\r\r\n") # Get rid of extraneous whitespace on right hand side only text = "\r\r\n".join([a.rstrip() for a in text.split("\r\r\n")]) TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) # Which time partitioned table does this product belong to # defaults to current UTC valid self.db_year = self.valid.year self.nwsli_provider = nwsli_provider self.skip_con = self.get_skip_con()
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ self.lsrs = [] self.duplicates = 0 TextProduct.__init__( self, text, utcnow=utcnow, ugc_provider=ugc_provider, nwsli_provider=nwsli_provider, )
def main(): """Go""" os.chdir("/mesonet/tmp") sts = datetime.datetime(2011, 7, 19) ets = datetime.datetime(2011, 8, 3) interval = datetime.timedelta(days=1) now = sts while now < ets: out = open('%s.data' % (now.strftime("%Y%m%d"),), 'w') subprocess.call(("tar -zxf /mesonet/ARCHIVE/raw/noaaport/%s/%s.tgz" ) % (now.year, now.strftime("%Y%m%d")), shell=True) for q in range(0, 24): print("%s %s" % (now, q)) fn = "%s%02i.txt" % (now.strftime("%Y%m%d"), q) if not os.path.isfile(fn): print('Missing %s' % (fn,)) continue o = open(fn).read() prods = o.split("\003") for prod in prods: try: p = TextProduct(prod) except Exception as exp: continue if p.afos is not None and p.afos[:3] in ['HML', ]: out.write(prod + "\003") os.unlink(fn) out.close() now += interval
def main(): """Go""" os.chdir("/mesonet/tmp/noaaport") sts = datetime.datetime(2017, 10, 10) sts = sts.replace(tzinfo=pytz.utc) ets = datetime.datetime(2017, 11, 3) ets = ets.replace(tzinfo=pytz.utc) interval = datetime.timedelta(days=1) now = sts while now < ets: subprocess.call(("tar -zxf /mesonet/ARCHIVE/raw/noaaport/%s/%s.tgz") % (now.year, now.strftime("%Y%m%d")), shell=True) out = open("%s.txt" % (now.strftime("%Y%m%d"), ), 'w') for hour in tqdm.tqdm(range(0, 24), desc=now.strftime("%m%d")): fn = "%s%02i.txt" % (now.strftime("%Y%m%d"), hour) if not os.path.isfile(fn): print('Missing %s' % (fn, )) continue fp = open(fn).read() prods = fp.split("\003") for prod in prods: if prod.find("RRSTAR") == -1: continue try: tp = TextProduct(prod, utcnow=now) except Exception as _exp: continue if tp.afos == 'RRSTAR' and tp.source == 'KWOH': out.write(prod + "\003") os.unlink(fn) out.close() now += interval
def main(): """Go""" os.chdir("/mesonet/tmp/noaaport") sts = utc(2018, 4, 24) ets = utc(2018, 6, 28) interval = datetime.timedelta(days=1) now = sts while now < ets: subprocess.call( ("tar -zxf /mesonet/ARCHIVE/raw/noaaport/%s/%s.tgz") % (now.year, now.strftime("%Y%m%d")), shell=True, ) out = open("%s.txt" % (now.strftime("%Y%m%d"), ), "w") for hour in tqdm.tqdm(range(0, 24), desc=now.strftime("%m%d")): fn = "%s%02i.txt" % (now.strftime("%Y%m%d"), hour) if not os.path.isfile(fn): print("Missing %s" % (fn, )) continue # careful here to keep bad bytes from causing issues fp = open(fn, "rb").read() prods = fp.decode("utf-8", "ignore").split("\003") for prod in prods: if prod.find("RRSTAR") == -1: continue try: tp = TextProduct(prod, utcnow=now) except Exception: continue if tp.afos == "RRSTAR": out.write(prod + "\003") os.unlink(fn) out.close() now += interval
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) self.geometry = self.parse_geometry() self.discussion_num = self.parse_discussion_num() self.attn_wfo = self.parse_attn_wfo() self.attn_rfc = self.parse_attn_rfc() self.areas_affected = self.parse_areas_affected() self.concerning = self.parse_concerning() self.watch_prob = self.find_watch_probability() self.sts, self.ets = self.find_valid_times() self.cwsus = []
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ # Make sure we are CRLF above all else if text.find("\r\r\n") == -1: text = text.replace("\n", "\r\r\n") # Get rid of extraneous whitespace on right hand side only text = "\r\r\n".join([a.rstrip() for a in text.split("\r\r\n")]) TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) # Which time partitioned table does this product belong to # defaults to current UTC valid self.db_year = self.valid.year self.skip_con = self.get_skip_con() # If there was no/bad MND header, a backwards way to know is that the # product time zone will be None, add a warning if self.z is None: self.warnings.append("Could not find local timezone in text.")
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """Constructor Args: text (string): the raw PTS product that is to be parsed utcnow (datetime, optional): in case of ambuigity with time ugc_provider (dict, optional): unused in this class nwsli_provider (dict, optional): unused in this class """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) print("==== SPCPTS Processing: %s" % (self.get_product_id(), )) load_conus_data(self.valid) self.issue = None self.expire = None self.day = None self.outlook_type = None self.outlook_collections = dict() self.set_metadata() self.find_issue_expire() self.find_outlooks() self.quality_control()
def parser(text, utcnow=None, ugc_provider=None, nwsli_provider=None): """Omnibus parser of NWS Text Data This is intended to be a catch-all parser of text data. As it currently stands, it does not correctly hand products off to the correct sub-processor, but some day it will! Args: text (str): The actual product text, this can have the <cntr>-a character to start the string. utcnow (datetime, optional): What is the current time, this is useful for when ingesting old data. Many times, the product does not contain enough information to assign a current valid timestamp to it. So we need to know the current timestamp to do the relative computation. ugc_provider (dict, optional): Provides NWS UGC metadata, the dictionary keys are UGC codes. nwsli_provider (dict, optional): Provides NWS Location Identifiers to allow lookup of geographic information for station identifiers. Returns: TextProduct: A TextProduct instance """ tmp = text[:100].replace("\r\r\n", "\n") m = WMO_RE.search(tmp) if m is not None: d = m.groupdict() if d["cccc"] == "KWNP": return spacewx.parser(text, utcnow, ugc_provider, nwsli_provider) tokens = AFOSRE.findall(tmp) if not tokens: raise TextProductException("Could not locate AFOS Identifier") afos = tokens[0] if afos[:3] == "CLI": return cli.parser(text, utcnow, ugc_provider, nwsli_provider) elif afos[:3] == "TCP": return nhc.parser(text, utcnow, ugc_provider, nwsli_provider) elif afos[:3] == "HWO": return hwo.parser(text, utcnow, ugc_provider, nwsli_provider) elif afos in ["SWOMCD", "FFGMPD"]: return mcd.parser(text, utcnow, ugc_provider, nwsli_provider) elif afos[:3] == "LSR": return lsr.parser(text, utcnow, ugc_provider, nwsli_provider) elif afos[:3] == "TAF": return taf.parser(text, utcnow, ugc_provider, nwsli_provider) elif afos[:3] == "SPS": return sps.parser(text, utcnow, ugc_provider, nwsli_provider) return TextProduct(text, utcnow, ugc_provider, nwsli_provider)
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) # hold our parsing results self.data = [] lines = self.unixtext.split("\n") if len(lines[3]) < 10: meat = ("".join(lines[4:])).split("=") else: meat = ("".join(lines[3:])).split("=") for piece in meat: if piece == "": continue res = process(piece) if res is None: self.warnings.append("DSM RE Match Failure: '%s'" % (piece, )) continue res.compute_times(utcnow) self.data.append(res)
def __init__(self, text, utcnow=None, ugc_provider=None, nwsli_provider=None): """ constructor """ TextProduct.__init__(self, text, utcnow, ugc_provider, nwsli_provider) # Hold our parsing results as an array of dicts self.data = [] self.regime = None # Sometimes, we get products that are not really in CLI format but # are RER (record event reports) with a CLI AWIPS ID if self.wmo[:2] != 'CD': print(('Product %s skipped due to wrong header' ) % (self.get_product_id(),)) return for section in self.find_sections(): if not HEADLINE_RE.findall(section.replace("\n", " ")): continue # We have meat! self.compute_diction(section) valid, station = self.parse_cli_headline(section) data = self.parse_data(section) self.data.append(dict(cli_valid=valid, cli_station=station, data=data))
def really_process_data(txn, data): ''' We are called with a hard coded AFOS PIL ''' tp = TextProduct(data) if tp.afos is None: compute_afos(tp) utc = tp.valid table = "products_%s_0106" % (utc.year,) if utc.month > 6: table = "products_%s_0712" % (utc.year,) sql = """INSERT into """ + table + """ (pil, data, source, wmo, entered) values(%s,%s,%s,%s,%s)""" sqlargs = (tp.afos, tp.text, tp.source, tp.wmo, utc.strftime("%Y-%m-%d %H:%M+00")) txn.execute(sql, sqlargs) if tp.afos[:3] == 'FRH': return jmsgs = tp.get_jabbers( common.SETTINGS.get('pywwa_product_url', 'pywwa_product_url')) for jmsg in jmsgs: JABBER.send_message(*jmsg)
def really_process_data(txn, data): """ We are called with a hard coded AFOS PIL """ tp = TextProduct(data) if tp.afos is None: compute_afos(tp) sql = ("INSERT into products " "(pil, data, source, wmo, entered) values(%s,%s,%s,%s,%s)") sqlargs = ( tp.afos, tp.text, tp.source, tp.wmo, tp.valid.strftime("%Y-%m-%d %H:%M+00"), ) txn.execute(sql, sqlargs) if tp.afos[:3] == "FRH": return jmsgs = tp.get_jabbers( common.SETTINGS.get("pywwa_product_url", "pywwa_product_url")) for jmsg in jmsgs: JABBER.send_message(*jmsg)
def mycmp(orig, mod, data, data2, line, line2): p1 = findp(orig) p2 = findp(mod) if p1 == 'T' and p2 == 'M': tp = TextProduct(data) tp2 = TextProduct(data2) print(("%s %s -> %s\n" "%s -> %s\nhttps://mesonet.agron.iastate.edu/p.php?pid=%s\n" "%s -> %s\nhttps://mesonet.agron.iastate.edu/p.php?pid=%s\n") % (orig[0], p1, p2, tp.afos, line, tp.get_product_id(), tp2.afos, line2, tp2.get_product_id()))
def run(ts, data): p = TextProduct(data) today = p.valid indices = ['TONIGHT'] for i in range(1, 8): d2 = (today + datetime.timedelta(days=i)).strftime("%A").upper() indices.append(d2) indices.append(d2 + " NIGHT") def find_index(dstring): tokens = dstring.replace(".", "").split("AND") i = [] for token in tokens: d2 = token.strip() if d2 not in indices: #print '%s not in indices' % (d2,) i.append(None) else: i.append(indices.index(d2)) return i polk = ugc.UGC("IA", "Z", 60) for seg in p.segments: if polk not in seg.ugcs: continue tokens = seg.unixtext.split(".TONIGHT...") if len(tokens) != 2: continue meat = ".TONIGHT..." + tokens[1] running = "" sections = [] for line in meat.split("\n"): if len(line) > 0 and line[0] == '.': # new if running != "": sections.append(running) running = "" running += line + " " ids = [] for section in sections: if section.find(" 0 PERCENT") > 0: idexs = find_index(section.split("...", 1)[0]) for i in idexs: ids.append(i) return ids
def do(ts): """ Process this timestamp """ cursor = PGCONN.cursor() fn = ts.strftime("/mnt/mesonet2/data/nwstg/NWSTG_%Y%m%d.tar.Z") if not os.path.isfile(fn): print("MISSING FILE: %s" % (fn, )) return subprocess.call("uncompress %s" % (fn, ), shell=True) tar = tarfile.open(fn[:-2], 'r') for member in tar.getmembers(): f = tar.extractfile(member) tar2 = tarfile.open(fileobj=f, mode='r') for member2 in tar2.getmembers(): f2 = tar2.extractfile(member2) if not f2.name.startswith("TEXT_"): continue content = (re.sub(BAD_CHARS, "", f2.read())).replace("\r\r", "") parts = f2.name.strip().split("_") ttaaii = parts[1] source = parts[2] if source[0] not in ['K', 'P']: continue if source in ['KWBC', 'KWAL']: continue delimiter = "%s %s" % (ttaaii, source) # Filter content back to the start of the ttaaii pos = content.find(delimiter) if pos == -1: print 'Skipping can not find %s in product %s' % (delimiter, f2.name) continue content = content[pos:] awipsid = find_awipsid(content) if (awipsid is not None and (awipsid.startswith("RR") or awipsid.startswith("MTR") or awipsid in ["TSTNCF", "WTSNCF"])): print 'Skip', f2.name, awipsid continue # Now we are getting closer, lets split by the delimter as we # may have multiple products in one file! for bulletin in content.split(delimiter): if len(bulletin) == 0: continue bulletin = "000\n%s%s" % (delimiter, bulletin) try: prod = TextProduct(bulletin, utcnow=ts) except: print 'Parsing Failure', f2.name continue if prod.valid.year != ts.year: print 'Invalid timestamp, year mismatch' continue table = "products_%s_%s" % (prod.valid.year, "0712" if prod.valid.month > 6 else "0106") print 'SAVE', f2.name, prod.valid.strftime( "%Y%m%d%H%M"), awipsid, prod.afos, table cursor.execute( """INSERT into """ + table + """ (data, pil, entered, source, wmo) values (%s,%s,%s,%s,%s)""", (bulletin, prod.afos, prod.valid, source, ttaaii)) cursor.close() PGCONN.commit() subprocess.call("compress %s" % (fn[:-2], ), shell=True)
def process(): """ Process this timestamp """ for tarfn in glob.glob("9957*tar.Z"): cursor = PGCONN.cursor() subprocess.call("uncompress %s" % (tarfn, ), shell=True) ts = datetime.datetime.strptime(tarfn[9:17], '%Y%m%d') ts = ts.replace(hour=23, minute=59, tzinfo=pytz.utc) tar = tarfile.open(tarfn[:-2], 'r') memory = [] for member in tar.getmembers(): fobj = tar.extractfile(member) content = re.sub(BAD_CHARS, "", fobj.read()) + ENDDELIM pos = 0 good = 0 bad = 0 deleted = 0 for match in re.finditer(DELIMITER, content): pos1 = match.start() bulletin = "000 \r\r" + content[pos:pos1] pos = match.end() if len(bulletin) < 20: bad += 1 continue bulletin = noaaport_text(bulletin) try: prod = TextProduct(bulletin, utcnow=ts, parse_segments=False) except Exception as exp: bad += 1 print('Parsing Failure %s\n%s' % (fobj.name, exp)) continue if prod.valid.year != ts.year: bad += 1 print('Invalid timestamp, year mismatch') continue table = "products_%s_%s" % (prod.valid.year, ("0712" if prod.valid.month > 6 else "0106")) key = "%s_%s_%s" % ( prod.afos, prod.valid.strftime("%Y%m%d%H%M"), prod.source) if key not in memory: cursor.execute( """ DELETE from """ + table + """ WHERE pil = %s and entered = %s and source = %s """, (prod.afos, prod.valid, prod.source)) deleted += cursor.rowcount memory.append(key) cursor.execute( """INSERT into """ + table + """ (data, pil, entered, source, wmo) values (%s,%s,%s,%s,%s) """, (bulletin, prod.afos, prod.valid, prod.source, prod.wmo)) good += 1 subprocess.call("compress %s" % (tarfn[:-2], ), shell=True) print(("Processed %s Good: %s Bad: %s Deleted: %s") % (tarfn, good, bad, deleted)) if len(content) > 1000 and good < 5: print("ABORT!") sys.exit() cursor.close() PGCONN.commit()
def __init__(self, text, utcnow=None): ''' constructor ''' self.lsrs = [] self.duplicates = 0 TextProduct.__init__(self, text, utcnow=utcnow)
from pyiem.nws.product import TextProduct os.chdir("/mesonet/tmp") sts = datetime.datetime(2011, 7, 19) ets = datetime.datetime(2011, 8, 3) interval = datetime.timedelta(days=1) now = sts while now < ets: out = open('%s.data' % (now.strftime("%Y%m%d"),), 'w') subprocess.call(("tar -zxf /mesonet/ARCHIVE/raw/noaaport/%s/%s.tgz" ) % (now.year, now.strftime("%Y%m%d")), shell=True) for q in range(0, 24): print now, q fn = "%s%02i.txt" % (now.strftime("%Y%m%d"), q) if not os.path.isfile(fn): print 'Missing', fn continue o = open(fn).read() prods = o.split("\003") for prod in prods: try: p = TextProduct(prod) except: continue if p.afos is not None and p.afos[:3] in ['HML', ]: out.write(prod + "\003") os.unlink(fn) out.close() now += interval