def dump_xml(obs1, xmldata, iteration): """ dump the xml to a file for deugging """ trace_print(1, "dumpxml_entry") if (obs1.dump_xml_flag == True): trace_print(1, "dump_xml") file = "xml_dump" + str(iteration) + ".xml" fh = open(file, 'wb') fh.write(xmldata) fh.close()
def get_last_csv_row(st_file): """ helper function to get last row of csv file """ try: with open(st_file, "r", encoding="utf-8", errors="ignore") as csv_1: final_line = csv_1.readlines()[-1] trace_print(1, "final line:", final_line) csv_1.close() return final_line except: trace_print(3, "csv file not found... continue...") return ""
def main_obs_loop(obs1_list): """ main loop - runs schedule and test for cut csv condition """ run_minutes = datetime.now().minute if ((run_minutes == 59)): # every hour check to see if need to cut trace_print(1, "Num minutes running: ", str(run_minutes)) foreach_obs(obs_cut_csv_file, obs1_list) else: trace_print(1, "run pending") schedule.run_pending() # schedule.run_all() time.sleep(60)
def check_resume_file(obs_setting): today = datetime.now() day_1 = timedelta(hours=24) tomorrow = today + day_1 trace_print(4, "station_id", obs_setting.station_id) today_glob = create_station_glob_filter(obs_setting.station_id, "csv", today) # Guam or Hawaii might be actually ahead. tomorrow_glob = create_station_glob_filter(obs_setting.station_id, "csv", tomorrow) last_file = hunt_for_noaa_csv_files(obs_setting.data_dir, tomorrow_glob) if len(last_file) < 1: last_file = hunt_for_noaa_csv_files(obs_setting.data_dir, today_glob) return last_file
def obs_sanity_check(obs1, xml_data, data_row): """ checks wind to see if value is present """ # df[['observation_time','wind_mph','wind_dir','wind_string']] table_col_list = [9, 19, 17, 16] for col in table_col_list: if (data_row[col].startswith("<no") == True): now = datetime.now() midnight = now.replace(hour=0, minute=0, second=0, microsecond=0) seconds = (now - midnight).seconds obs1.dump_xml_flag = True dump_xml(obs1, xml_data, seconds) obs1.dump_xml_flag = False trace_print(4, "potential bad xml - see xml dump at ", str(seconds)) return False trace_print(1, "data checked: ", str(data_row[col])) return True
def create_station_file_name2( station="https://w1.weather.gov/xml/current_obs/KDCA.xml", ext='csv'): """ create_station_file from observation time """ w_xml = get_weather_from_NOAA(station) if (obs_check_xml_data(w_xml) == False): return "" headers, row = get_data_from_NOAA_xml(w_xml) obs_date = get_obs_time(row[9]) station_id = station[-8:-4] year, month, day, hour, min, am = map( str, obs_date.strftime("%Y %m %d %H %M %p").split()) file_n = station_id + '_Y' + year + '_M' + \ month + '_D' + day + '_H' + hour + "." + ext trace_print(4, "my_p", str(am)) return file_n
def get_data_from_NOAA_xml(xmldata): """ parse noaa observatin data from xml into list """ tree = ET.fromstring(xmldata) h1 = [] r1 = [] r1_final = [] global csv_headers trace_print(4, "parsing NOAA xml") for child in tree: h1.append(child.tag) r1.append(child.text) for ch in csv_headers: if not r1: r1_final.append('') elif (ch in h1): r1_final.append(transform_observation(ch, r1.pop(0))) else: r1_final.append('<no_value_provided>') h1 = csv_headers return h1, r1_final
def duration_cut_check(t_last, hour_cycle): """ see if new file is to be created or cut """ trace_print(1, "Duration check") t_now = datetime.now() if t_now.year > t_last.year: trace_print(1, "Duration year check") return True if t_now.month > t_last.month: trace_print(1, "Duration month check") return True if t_now.day > t_last.day: trace_print(1, "Duration day check") return True if (t_now.hour - t_last.hour == 0): return False if (hour_cycle > 0): if ((t_now.hour - t_last.hour) % hour_cycle == 0): trace_print(1, "Duration cycle check at ", str(hour_cycle)) return True return False
def weather_obs_app_append(obs1): """ append top level """ content = get_weather_from_NOAA(obs1.primary_station) if (obs_check_xml_data(content) == False): return False xmld1 = get_data_from_NOAA_xml(content) dump_xml(obs1, content, datetime.now().minute) """ test if last row and what is coming in are equal """ # if --resume is specified - then we need to set prior to current. try: obs1.prior_obs_time = obs1.current_obs_time except: obs1.prior_obs_time = get_obs_time(xmld1[1][9]) obs1.current_obs_time = get_obs_time(xmld1[1][9]) trace_print(4, "current_obs_time(append): ", str(obs1.current_obs_time)) trace_print(4, "prior_obs_time(append): ", str(obs1.prior_obs_time)) if (duplicate_observation(obs1, xmld1[1])): trace_print(3, 'duplicate append, exit up') # error on double start obs1.prior_obs_time = obs1.current_obs_time return weather_csv_driver(obs1, 'a', obs1.station_file, xmld1[0], xmld1[1]) return
def check_parms1(obs_setting, args): """ check standalong parms """ if (args.duration): obs_setting.duration_interval = int(args.duration) duration_interval = int(args.interval) trace_print(1, "duration interval: ", str(args.duration)) if (args.cut): obs_setting.set_cut_process() trace_print(1, "cut specified") if (args.append): obs_setting.set_append_processing() trace_print(1, "append specified") # collect asssumes append if (args.resume): obs_setting.set_resume_processing() trace_print(1, "resume specified") return True
def get_obs_time(obs_date): t_str = obs_date if (obs_time_debug): trace_print(4, "Local observation time ( get_obs_time): ", t_str) # actual timezone is not important for obs file output. # obs_date = datetime.strptime( t_str[:20], "%b %d %Y, %I:%M %p ") obs_date = parser.parse(t_str[:20]) # adjust stamp for specific test obs_date = obs_date + timedelta(hours=obs_debug_t_delta) trace_print(4, "Debug obs_date:", str(obs_date)) return obs_date trace_print(4, "Local observation time ( get_obs_time): ", t_str) # actual timezone is not important for obs file output. obs_date = parser.parse(t_str[:20]) # obs_date = datetime.strptime( t_str[:20], "%b %d %Y, %I:%M %p ") trace_print(4, "get_obs_time return()") return obs_date
def get_weather_from_NOAA(station): """ simple get xml data, and print the md5 """ trace_print(4, "url request") try: with urllib.request.urlopen(station) as response: xml = response.read() trace_print(4, "xml md5: ", hashlib.md5(xml).hexdigest()) except: trace_print(4, "URL request error") xml = "" return xml
def duplicate_observation(obs1, current_obs): """ test last line of csv for duplicate """ """ finds observation times and compares""" r_csv_file = get_obs_csv_path(obs1, obs1.station_file) last_one = get_last_csv_row(r_csv_file) if (len(last_one) < 4): return False last_obs = last_one.split(',\"') last_obs_dt = last_obs[7] last_obs_dt = last_obs_dt[:-1] trace_print(1, "last_obs:", last_obs_dt, "len ", str(len(last_obs_dt))) trace_print(1, "current_obs: ", current_obs[6], " ", current_obs[9], "len ", str(len(current_obs[9]))) if (current_obs[9] == last_obs_dt): trace_print(1, "Is equal") return True return False
def run_cut_operation(obs1, obs_cut_time): trace_print(4, "running cut operation") # sychronize obs_time for new day - so file name will be corrrect # last observation at 11:50 or so - add 10 minutes for file create. obs1.station_file = create_station_file_name(obs1.station_id, "csv", obs_cut_time) # start a new day cycle obs1.prior_obs_time = obs_cut_time obs1.current_obs_time = obs_cut_time trace_print(4, "New Station file (cut):", obs1.station_file) # create new file with cannocial headers weather_csv_driver(obs1, 'c', obs1.station_file, csv_headers, []) schedule.cancel_job(obs1.job1) # we rassigned the next station file # new writes should go there. obs1.job1 = None t_begin = datetime.now() trace_print(4, "Time of last cut:", t_begin.strftime("%A, %d. %B %Y %I:%M%p")) # this will reschedule job with new file. weather_obs_app_start(obs1)
def obs_check_xml_data(xmldata): if (len(xmldata) < 4): trace_print(4, "No XML data to process") return False else: return True
def weather_obs_init(): """ init the app, get args and establish globals """ parser = argparse.ArgumentParser(description='NOAA weather obsevation') parser.add_argument('--init', help='Initialize CSV') parser.add_argument('--station', help='URL of station') parser.add_argument('--collect', help='Run collectiion in background - Y/N', action="store_true") parser.add_argument('--append', help='Append data to CSV file - specifed') parser.add_argument('-d', '--duration', help='Duration cycle - default - 24 hours ') parser.add_argument('-c', '--cut', action="store_true") parser.add_argument('-x', '--xml', action="store_true") parser.add_argument('-r', '--resume', help='resume append and cut', action="store_true") parser.add_argument('-j', '--json', help="generate json data to file") parser.add_argument('-f', '--file', help="read stations from file specified") parser.add_argument('--dir', help='data directory offet- default is cwd ') args = parser.parse_args() trace_print(1, "parsing args...") # cannocial header # can't depend on xml feed to complete every value global csv_headers def check_resume_file(obs_setting): today = datetime.now() day_1 = timedelta(hours=24) tomorrow = today + day_1 trace_print(4, "station_id", obs_setting.station_id) today_glob = create_station_glob_filter(obs_setting.station_id, "csv", today) # Guam or Hawaii might be actually ahead. tomorrow_glob = create_station_glob_filter(obs_setting.station_id, "csv", tomorrow) last_file = hunt_for_noaa_csv_files(obs_setting.data_dir, tomorrow_glob) if len(last_file) < 1: last_file = hunt_for_noaa_csv_files(obs_setting.data_dir, today_glob) return last_file def check_params2(obs_setting, args): obs_setting.station_file = create_station_file_name2( obs_setting.primary_station) if (args.dir): obs_setting.set_data_dir(args.dir) # obs_setting.data_dir = args.dir # if (os.path.exists( os.getcwd() + os.sep + obs_setting.data_dir)): # trace_print(4, "data dir exists: ", str(obs_setting.data_dir)) # else: # trace_print(1, "Data dir does not exist") # os.mkdir( os.getcwd() + os.sep + obs_setting.data_dir) # trace_print(1, " directory created") if (obs_setting.append_data_specified == False): trace_print(4, "Station filename: ", obs_setting.station_file) obs_setting.init_csv = True # initialize a CSV until we prove we are appending. if (args.init): obs_setting.set_init_processing(args.init) if (obs_setting.append_data_specified == True): obs_setting.station_file = args.append obs_setting.init_csv = False if (obs_setting.resume == True): trace_print(4, "resume here") #now = datetime.now() #file_id = obs_setting.station_id + "_Y" + str(now.year) # file_id = obs_setting.station_file # TODO - support yesterday, today, and tomorrow. # Guam is actually tomorrow in many cases # so resume will not work if just today and yesterday # 24 hours +/- otherwise just create a new file data_path = obs_setting.get_data_dir_path() trace_print(3, "data path ", data_path) obs_setting.station_file = check_resume_file(obs_setting) trace_print(3, "station_file", obs_setting.station_file) if (len(obs_setting.station_file) < 4): obs_setting.station_file = create_station_file_name2( obs_setting.primary_station) obs_setting.init_csv = True obs_setting.append_data = False obs_setting.append_data_specified = True trace_print(3, "Resume - No file file on current day") trace_print(4, "Station id ( append ): ", obs_setting.station_file) if (args.xml == True): obs_setting.set_xml_dump_flag(True) if (args.collect): trace_print(4, "collect in station setting") obs_setting.collect_data = True if (obs_setting.init_csv == False) and (obs_setting.append_data_specified == False): obs_setting.station_file = create_station_file_name2( obs_setting.primary_station) trace_print(4, "Station filename (collect): ", obs_setting.station_file) return True if (args.file): try: with open(args.file, "r") as obs_file1: obs_entry_list = obs_file1.readlines() trace_print(4, str(obs_entry_list)) except: print("Unable to open: ", args.file) setting_list = [] # entries must be on the first 47 lines - no more or less - discard \n or other stuff for entry in obs_entry_list: setting_list.append(ObsSetting(entry[0:47])) trace_print(4, str(setting_list)) for entry in setting_list: check_parms1(entry, args) trace_print(4, "Station id: ", entry.station_id) check_params2(entry, args) return setting_list # check station and fill out appropriate values if (args.station): obs_setting = ObsSetting(args.station) check_parms1(obs_setting, args) trace_print(4, "Station id: ", obs_setting.station_id) check_params2(obs_setting, args) else: trace_print(3, "Error: No station given - please use --station") trace_print(3, " see readme") sys.exit(4) obs_setting_list = [] obs_setting_list.append(obs_setting) return obs_setting_list
def set_xml_dump_flag(self, flag): self.dump_xml_flag = flag trace_print(7, "Dump xml flag: ", str(self.dump_xml_flag))
def weather_obs_app_start(obs1): """ top level start of collection """ # if appending and scheduling - skip over to collect trace_print(3, "weather_obs_app_starT() enter ") if (obs1.append_data != True): content = get_weather_from_NOAA(obs1.primary_station) if (obs_check_xml_data(content) == False): return False xmld1 = get_data_from_NOAA_xml(content) obs_string = xmld1[1][9] trace_print(4, "raw observation string: ", obs_string) obs_time_stamp = get_obs_time(obs_string) obs1.prior_obs_time = obs_time_stamp obs1.current_obs_time = obs_time_stamp trace_print(4, "current_obs_time(start): ", str(obs1.current_obs_time)) trace_print(4, "prior_obs_time:(start) ", str(obs1.prior_obs_time)) weather_csv_driver(obs1, 'w', obs1.station_file, xmld1[0], xmld1[1]) trace_print(4, "Initializing new file (app_start): ", str(obs1.station_file)) dump_xml(obs1, content, datetime.now().minute) if (obs1.collect_data == True): if obs1.job1: trace_print(4, "schedule job set - exit()") return trace_print(4, "schedule job @ ", str(obs1.primary_station), " -> ", str(obs1.station_file)) obs1.append_data = True obs1.job1 = schedule.every().hour.at(":20").do(weather_collect_driver, obs1) return
def weather_collect_driver(obs1): """ Appends ( only ) csv file with data from obs xml """ trace_print(4, "weather_collect_driver") xmldata = get_weather_from_NOAA(obs1.primary_station) if (obs_check_xml_data(xmldata) == False): return False outdata = get_data_from_NOAA_xml(xmldata) # check data and dump xml for post-mortem # data feed from noaa has unexpected output # check to see if wind is missing. obs_sanity_check(obs1, xmldata, outdata[1]) # use for cut logic. # if local time crossed midnight - cut a new file. # save prior - obs_time_prior # curent to - obs_time_curent. trace_print(4, "current_obs_time(driver_before): ", str(obs1.current_obs_time)) trace_print(4, "prior_obs_time(driver_before): ", str(obs1.prior_obs_time)) # if it comes in at zero hour ( mindnight) then reset current and prior obs1.prior_obs_time = obs1.current_obs_time obs1.current_obs_time = get_obs_time(outdata[1][9]) if (obs1.prior_obs_time.hour == 23): trace_print(4, "Special driver processing at hour 23") obs1.prior_obs_time = obs1.current_obs_time trace_print(4, "current_obs_time(driver): ", str(obs1.current_obs_time)) trace_print(4, "prior_obs_time(driver): ", str(obs1.prior_obs_time)) if (duplicate_observation(obs1, outdata[1])): trace_print(3, " duplicate in collect. exiting...") return True weather_csv_driver(obs1, 'a', obs1.station_file, outdata[0], outdata[1]) obs1.obs_iteration = obs1.obs_iteration + 1 dump_xml(obs1, xmldata, obs1.obs_iteration) return True
def weather_csv_driver(obs1, mode, csv_file, w_header, w_row): """ write out csv data - mode is append, write or cut """ cut_mode = False trace_print(4, 'csv_driver') # if ( mode != 'w' ) and ( mode != 'a' ): # trace_print( 1, " mode is invalid") # return False if (len(csv_file) < 4): print("CSV file must contain station name") return False if (mode == 'c'): # cut file request is active # denote the special mode and change it to write. cut_mode = True mode = 'w' r_csv_file = get_obs_csv_path(obs1, csv_file) trace_print(4, "data_dir location: ", str(r_csv_file)) # newline parm so that excel in windows doesn't have blank line in csv # https://stackoverflow.com/questions/3348460/csv-file-written-with-python-has-blank-lines-between-each-row with open(r_csv_file, mode, newline='') as weather_file: weather_writer = csv.writer(weather_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) if (mode == 'w'): trace_print(4, "csv_driver: header") weather_writer.writerow(w_header) if (cut_mode == False): trace_print(4, "csv_driver: row_with_header") weather_writer.writerow(w_row) elif (mode == 'a'): trace_print(4, "csv_drver: row_only") weather_writer.writerow(w_row) # do I really need close??? with does this weather_file.close() csv_write_time = datetime.now() trace_print(4, "csv_write_time: ", csv_write_time.strftime("%A, %d. %B %Y %I:%M%p")) return True
def check_params2(obs_setting, args): obs_setting.station_file = create_station_file_name2( obs_setting.primary_station) if (args.dir): obs_setting.set_data_dir(args.dir) # obs_setting.data_dir = args.dir # if (os.path.exists( os.getcwd() + os.sep + obs_setting.data_dir)): # trace_print(4, "data dir exists: ", str(obs_setting.data_dir)) # else: # trace_print(1, "Data dir does not exist") # os.mkdir( os.getcwd() + os.sep + obs_setting.data_dir) # trace_print(1, " directory created") if (obs_setting.append_data_specified == False): trace_print(4, "Station filename: ", obs_setting.station_file) obs_setting.init_csv = True # initialize a CSV until we prove we are appending. if (args.init): obs_setting.set_init_processing(args.init) if (obs_setting.append_data_specified == True): obs_setting.station_file = args.append obs_setting.init_csv = False if (obs_setting.resume == True): trace_print(4, "resume here") #now = datetime.now() #file_id = obs_setting.station_id + "_Y" + str(now.year) # file_id = obs_setting.station_file # TODO - support yesterday, today, and tomorrow. # Guam is actually tomorrow in many cases # so resume will not work if just today and yesterday # 24 hours +/- otherwise just create a new file data_path = obs_setting.get_data_dir_path() trace_print(3, "data path ", data_path) obs_setting.station_file = check_resume_file(obs_setting) trace_print(3, "station_file", obs_setting.station_file) if (len(obs_setting.station_file) < 4): obs_setting.station_file = create_station_file_name2( obs_setting.primary_station) obs_setting.init_csv = True obs_setting.append_data = False obs_setting.append_data_specified = True trace_print(3, "Resume - No file file on current day") trace_print(4, "Station id ( append ): ", obs_setting.station_file) if (args.xml == True): obs_setting.set_xml_dump_flag(True) if (args.collect): trace_print(4, "collect in station setting") obs_setting.collect_data = True if (obs_setting.init_csv == False) and (obs_setting.append_data_specified == False): obs_setting.station_file = create_station_file_name2( obs_setting.primary_station) trace_print(4, "Station filename (collect): ", obs_setting.station_file) return True
def set_duration(self, duration): self.duration_interval = int(duration) trace_print(7, "duration interval: ", str(self.duration_interval))
def weather_obs_app(): obs1_list = weather_obs_init() # currently all options are same as first entry obs1 = obs1_list[0] if (obs1.init_csv == True): trace_print(4, "Init... ") foreach_obs(weather_obs_app_start, obs1_list) if (obs1.append_data_specified == True): if (obs1.resume == True): trace_print(1, "resume - with append") trace_print(1, "Appending data") # resume sets init_csv - have to retest again # resume sets thsi when a new file has to be created # resume starts next day. # try to resume same day - if not start a new day csv if (obs1.init_csv == False): trace_print(4, "Append processing start") foreach_obs(weather_obs_app_append, obs1_list) if (obs1.collect_data == True): run_minutes = 0 t_begin = datetime.now() trace_print(4, "starting time: ", t_begin.strftime("%A, %d. %B %Y %I:%M%p")) if (obs1.append_data_specified == True): foreach_obs(weather_obs_app_start, obs1_list) delay_t = 60 - t_begin.minute trace_print(4, "minutes till the next hour: ", str(delay_t)) while True: main_obs_loop(obs1_list)
def _trace(self, s, *t1): jstr = ''.join(t1) msg1 = " " + s + jstr trace_print(4, self.station_id, msg1)