def neighbour_checks(station_info, restart_id = "", end_id = "", distances=np.array([]), angles=np.array([]), second = False, masking = False, doZip=False, plots = False, diagnostics = False): """ Run through neighbour checks on list of stations passed :param list station_info: list of lists - [[ID, lat, lon, elev]] - strings :param array distances: array of distances between station pairs :param array angles: array of angles between station pairs :param bool second: do the second run :param bool masking: apply the flags to the data to mask the observations. """ first = not second qc_code_version = subprocess.check_output(['svnversion']).strip() # if distances and angles not calculated, then do so if (len(distances) == 0) or (len(angles) == 0): print "calculating distances and bearings matrix" distances, angles = get_distances_angles(station_info) # extract before truncate the array neighbour_elevations = np.array(station_info[:,3], dtype=float) neighbour_ids = np.array(station_info[:,0]) neighbour_info = np.array(station_info[:,:]) # sort truncated run startindex = 0 if restart_id != "": startindex, = np.where(station_info[:,0] == restart_id) if end_id != "": endindex, = np.where(station_info[:,0] == end_id) if endindex != len(station_info) -1: station_info = station_info[startindex: endindex+1] distances = distances[startindex:endindex+1,:] angles = angles[startindex:endindex+1,:] else: station_info = station_info[startindex:] distances = distances[startindex:,:] angles = angles[startindex:,:] else: station_info = station_info[startindex:] distances = distances[startindex:,:] angles = angles[startindex:,:] # process each neighbour for st, stat in enumerate(station_info): print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S") print "Neighbour Check" print "{:35s} {}".format("Station Identifier :", stat[0]) if not plots and not diagnostics: logfile = file(LOG_OUTFILE_LOCS+stat[0]+'.log','a') # append to file if second iteration. logfile.write(dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write("Neighbour Check\n") logfile.write("{:35s} {}\n".format("Station Identifier :", stat[0])) else: logfile = "" process_start_time = time.time() station = utils.Station(stat[0], float(stat[1]), float(stat[2]), float(stat[3])) # if running through the first time if first: if os.path.exists(os.path.join(NETCDF_DATA_LOCS, station.id + "_internal.nc.gz")): # if gzip file, unzip here subprocess.call(["gunzip",os.path.join(NETCDF_DATA_LOCS, station.id + "_internal.nc.gz")]) time.sleep(5) # make sure it is unzipped before proceeding # read in the data ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_internal.nc"), station, process_vars, carry_thru_vars, diagnostics = diagnostics) if plots or diagnostics: print "{:35s} {}\n".format("Total station record size :",len(station.time.data)) else: logfile.write("{:35s} {}\n".format("Total station record size :",len(station.time.data))) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, carry_thru_vars) # or if second pass through? elif second: if os.path.exists(os.path.join(NETCDF_DATA_LOCS, station.id + "internal2.nc.gz")): # if gzip file, unzip here subprocess.call(["gunzip",os.path.join(NETCDF_DATA_LOCS, station.id + "_internal2.nc.gz")]) time.sleep(5) # make sure it is unzipped before proceeding ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_internal2.nc"), station, process_vars, carry_thru_vars, diagnostics = diagnostics) if plots or diagnostics: print "{:35s} {}\n".format("Total station record size :",len(station.time.data)) else: logfile.write("{:35s} {}\n".format("Total station record size :",len(station.time.data))) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, carry_thru_vars) # select neighbours neighbour_distances = distances[st,:] neighbour_bearings = angles[st,:] # have to add in start index so that can use location in distance file. # neighbours = n_utils.get_neighbours(st+startindex, np.float(stat[3]), neighbour_distances, neighbour_bearings, neighbour_elevations) # return all neighbours up to a limit from the distance and elevation offsets (500km and 300m respectively) neighbours, neighbour_quadrants = n_utils.get_all_neighbours(st+startindex, np.float(stat[3]), neighbour_distances, neighbour_bearings, neighbour_elevations) if plots or diagnostics: print "{:14s} {:10s} {:10s}".format("Neighbour","Distance","Elevation") for n in neighbours: print "{:14s} {:10.1f} {:10.1f}".format(neighbour_ids[n],neighbour_distances[n],neighbour_elevations[n]) else: logfile.write("{:14s} {:10s} {:10s}\n".format("Neighbour","Distance","Elevation")) for n in neighbours: logfile.write("{:14s} {:10.1f} {:10.1f}\n".format(neighbour_ids[n],neighbour_distances[n],neighbour_elevations[n])) # if sufficient neighbours if len(neighbours) >= 3: for variable, col in FLAG_OUTLIER_DICT.items(): # NOTE - this requires multiple reads of the same file # but does make it easier to understand and code st_var = getattr(station, variable) if plots or diagnostics: print "Length of {} record: {}".format(variable, len(st_var.data.compressed())) else: logfile.write("Length of {} record: {}\n".format(variable, len(st_var.data.compressed()))) if len(st_var.data.compressed()) > 0: final_neighbours = n_utils.select_neighbours(station, variable, neighbour_info[neighbours], neighbours, neighbour_distances[neighbours], neighbour_quadrants, NETCDF_DATA_LOCS, DATASTART, DATAEND, logfile, second = second, diagnostics = diagnostics, plots = plots) # now read in final set of neighbours and process neigh_flags = np.zeros(len(station.time.data)) # count up how many neighbours think this obs is bad neigh_count = np.zeros(len(station.time.data)) # number of neighbours at each time stamp dpd_flags = np.zeros(len(station.time.data)) # number of neighbours at each time stamp reporting_accuracies = np.zeros(len(neighbours)) # reporting accuracy of each neighbour all_data = np.ma.zeros([len(final_neighbours), len(station.time.data)]) # store all the neighbour values for nn, nn_loc in enumerate(final_neighbours): neigh_details = neighbour_info[nn_loc] neigh = utils.Station(neigh_details[0], float(neigh_details[1]), float(neigh_details[2]), float(neigh_details[3])) if first: ncdfp.read(os.path.join(NETCDF_DATA_LOCS, neigh.id + "_internal.nc"), neigh, [variable], diagnostics = diagnostics, read_input_station_id = False) elif second: ncdfp.read(os.path.join(NETCDF_DATA_LOCS, neigh.id + "_internal2.nc"), neigh, [variable], diagnostics = diagnostics, read_input_station_id = False) dummy = utils.create_fulltimes(neigh, [variable], DATASTART, DATAEND, [], do_input_station_id = False) all_data[nn, :] = utils.apply_filter_flags(getattr(neigh, variable)) if diagnostics: print neigh_details n_utils.detect(station, neigh, variable, neigh_flags, neigh_count, DATASTART, DATAEND, distance = neighbour_distances[nn_loc], diagnostics = diagnostics, plots = plots) reporting_accuracies[nn] = utils.reporting_accuracy(getattr(neigh,variable).data) dpd_flags += neigh.qc_flags[:,31] # gone through all neighbours # if at least 2/3 of neighbours have flagged this point (and at least 3 neighbours) some_flags, = np.where(neigh_flags > 0) outlier_locs, = np.where(np.logical_and((neigh_count[some_flags] >= 3),(neigh_flags[some_flags].astype("float")/neigh_count[some_flags] > 2./3.))) # flag where < 3 neighbours locs = np.where(neigh_count[some_flags] < 3) station.qc_flags[some_flags[locs], col] = -1 if len(outlier_locs) >= 1: station.qc_flags[some_flags[outlier_locs], col] = 1 # print number flagged and copy into attribute if plots or diagnostics: utils.print_flagged_obs_number(logfile, "Neighbour", variable, len(outlier_locs), noWrite = True) else: utils.print_flagged_obs_number(logfile, "Neighbour", variable, len(outlier_locs)) st_var = getattr(station, variable) st_var.flags[some_flags[outlier_locs]] = 1 else: if plots or diagnostics: utils.print_flagged_obs_number(logfile, "Neighbour", variable, len(outlier_locs), noWrite = True) else: utils.print_flagged_obs_number(logfile, "Neighbour", variable, len(outlier_locs)) if plots: n_utils.plot_outlier(station, variable, some_flags[outlier_locs], all_data, DATASTART) # unflagging using neighbours n_utils.do_unflagging(station, variable, all_data, reporting_accuracies, neigh_count, dpd_flags, FLAG_COL_DICT, DATASTART, logfile, plots = plots, diagnostics = diagnostics) else: if plots or diagnostics: print "No observations to assess for {}".format(variable) else: logfile.write("No observations to assess for {}\n".format(variable)) # variable loop else: if plots or diagnostics: print "Fewer than 3 neighbours" else: logfile.write("Fewer than 3 neighbours\n") print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S") print "processing took {:4.0f}s\n\n".format(time.time() - process_start_time) # end of neighbour check utils.append_history(station, "Neighbour Outlier Check") # clean up months qc_tests.clean_up.clu(station, ["temperatures","dewpoints","windspeeds","winddirs","slp"], [44,45,46,47,48], FLAG_COL_DICT, DATASTART, DATAEND, logfile, plots = plots) if diagnostics or plots: raw_input("stop") # masking (at least call from here - optional call from internal?) # write to file if first: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_external.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = '', qc_code_version = qc_code_version) # gzip the raw file elif second: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_external2.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = '', qc_code_version = qc_code_version) # gzip the raw file # masking - apply the flags and copy masked data to flagged_obs attribute if masking: station = utils.mask(station, process_vars, logfile) # write to file if first: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_mask.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = '', qc_code_version = qc_code_version) elif second: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_mask2.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = '', qc_code_version = qc_code_version) if plots or diagnostics: print "Masking completed\n" print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n") print "processing took {:4.0f}s\n\n".format(time.time() - process_start_time) else: logfile.write("Masking completed\n") logfile.write(dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write("processing took {:4.0f}s\n\n".format(time.time() - process_start_time)) logfile.close() # gzip up all the raw files if doZip: for st, stat in enumerate(station_info): if first: subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_internal.nc")]) if masking: subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_external.nc")]) subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_mask.nc")]) elif second: subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_internal2.nc")]) if masking: subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_external2.nc")]) subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_mask2.nc")]) print "Neighbour Checks completed\n" return # neighbour_checks
def internal_checks(station_info, restart_id="", end_id="", second=False, all_checks=True, duplicate=False, odd=False, frequent=False, diurnal=False, gap=False, records=False, streaks=False, climatological=False, spike=False, humidity=False, cloud=False, variance=False, winds=False, diagnostics=False, plots=False): ''' Run through internal checks on list of stations passed :param list station_info: list of lists - [[ID, lat, lon, elev]] - strings :param str restart_id: which station to start on :param str end_id: which station to end on :param bool second: do the second run :param bool all_checks: run all the checks :param bool duplicate/odd/frequent/diurnal/gap/records/streaks/ climatological/spike/humidity/cloud/variance/winds: run each test separately :param bool diagnostics: print extra material to screen :param bool plots: create plots from each test [many files if all stations/all tests] ''' first = not second if all_checks: duplicate = True odd = True frequent = True diurnal = True gap = True records = True streaks = True climatological = True spike = True humidity = True cloud = True variance = True winds = True else: print "single tests selected" qc_code_version = subprocess.check_output(['svnversion']).strip() # sort truncated run startindex = 0 if restart_id != "": startindex, = np.where(station_info[:, 0] == restart_id) if end_id != "": endindex, = np.where(station_info[:, 0] == end_id) if endindex != len(station_info) - 1: station_info = station_info[startindex:endindex + 1] else: station_info = station_info[startindex:] else: station_info = station_info[startindex:] for st, stat in enumerate(station_info): # if st%100 != 0: continue # do every nth station print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S") print "{:35s} {:d}/{:d}".format("Station Number : ", st + 1, len(station_info)) print "{:35s} {}".format("Station Identifier :", stat[0]) if plots or diagnostics: logfile = "" else: if first: logfile = file(LOG_OUTFILE_LOCS + stat[0] + '.log', 'w') elif second: logfile = file(LOG_OUTFILE_LOCS + stat[0] + '.log', 'a') # append to file if second iteration. logfile.write( dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write("Internal Checks\n") logfile.write("{:35s} {}\n".format("Station Identifier :", stat[0])) process_start_time = time.time() station = utils.Station(stat[0], float(stat[1]), float(stat[2]), float(stat[3])) # latitude and longitude check if np.abs(station.lat) > 90.: if plots or diagnostics: print "{} {} {} {} {} {} {}\n".format(\ station.id,"Latitude Check",DATASTART.year, DATAEND.year,"All", "Unphysical latitude {}".format(station.lat)) else: logfile.write("{} {} {} {} {} {} {}\n".format(\ station.id,"Latitude Check",DATASTART.year, DATAEND.year,"All", "Unphysical latitude {}".format(station.lat))) logfile.close() continue if np.abs(station.lon) > 180.: if plots or diagnostics: print "{} {} {} {} {} {} {}\n".format(\ station.id,"Longitude Check",DATASTART.year, DATAEND.year,"All", "Unphysical longitude {}".format(station.lon)) else: logfile.write("{} {} {} {} {} {} {}\n".format(\ station.id,"Longitude Check",DATASTART.year, DATAEND.year,"All", "Unphysical longitude {}".format(station.lon))) logfile.close() continue # if running through the first time if first: if os.path.exists( os.path.join(NETCDF_DATA_LOCS, station.id + ".nc.gz")): # if gzip file, unzip here subprocess.call([ "gunzip", os.path.join(NETCDF_DATA_LOCS, station.id + ".nc.gz") ]) time.sleep(5) # make sure it is unzipped before proceeding # read in the data ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + ".nc"), station, process_vars, opt_var_list=carry_thru_vars, diagnostics=diagnostics) if plots or diagnostics: print "{:35s} {}\n".format("Total station record size :", len(station.time.data)) else: logfile.write("{:35s} {}\n".format( "Total station record size :", len(station.time.data))) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, carry_thru_vars) station.qc_flags = np.zeros( [len(station.time.data), 69]) # changed to include updated wind tests # get reporting accuracies and frequencies. for var in process_vars: st_var = getattr(station, var) st_var.reporting_stats = utils.monthly_reporting_statistics( st_var, DATASTART, DATAEND) # or if second pass through? elif second: ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_mask.nc"), station, process_vars, opt_var_list=carry_thru_vars, diagnostics=diagnostics) print "{:35s} {}\n".format("Total station record size :", len(station.time.data)) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, carry_thru_vars) # Add history text to netcdf file # Reporting Changes - TODO # Duplicate months - check on temperature ONLY if duplicate: qc_tests.duplicate_months.dmc(station, ['temperatures'], process_vars, [0], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots) # Odd Clusters if odd: qc_tests.odd_cluster.occ( station, ['temperatures', 'dewpoints', 'windspeeds', 'slp'], [54, 55, 56, 57], DATASTART, logfile, diagnostics=diagnostics, plots=plots, second=second) utils.apply_windspeed_flags_to_winddir(station, diagnostics=diagnostics) # Frequent Values if frequent: qc_tests.frequent_values.fvc(station, ['temperatures', 'dewpoints', 'slp'], [1, 2, 3], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots) # Diurnal Cycle if diurnal: if np.abs(station.lat) <= 60.: qc_tests.diurnal_cycle.dcc(station, ['temperatures'], process_vars, [4], logfile, diagnostics=diagnostics, plots=plots) else: if plots or diagnostics: print "Diurnal Cycle Check not run as station latitude ({}) > 60\n".format( station.lat) else: logfile.write( "Diurnal Cycle Check not run as station latitude ({}) > 60\n" .format(station.lat)) # Distributional Gap if gap: qc_tests.distributional_gap.dgc( station, ['temperatures', 'dewpoints', 'slp'], [5, 6, 7], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots, GH=True) # Records if records: qc_tests.records.krc( station, ['temperatures', 'dewpoints', 'windspeeds', 'slp'], [8, 9, 10, 11], logfile, diagnostics=diagnostics, plots=plots) utils.apply_windspeed_flags_to_winddir(station, diagnostics=diagnostics) # Streaks and Repetitions if streaks: qc_tests.streaks.rsc( station, ['temperatures', 'dewpoints', 'windspeeds', 'slp', 'winddirs'], [[12, 16, 20], [13, 17, 21], [14, 18, 22], [15, 19, 23], [66, 67, 68]], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots) utils.apply_windspeed_flags_to_winddir(station, diagnostics=diagnostics) # Climatological Outlier if climatological: qc_tests.climatological.coc(station, ['temperatures', 'dewpoints'], [24, 25], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots) # column 26 kept spare for slp # Spike if spike: qc_tests.spike.sc( station, ['temperatures', 'dewpoints', 'slp', 'windspeeds'], [27, 28, 29, 65], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots, second=second) utils.apply_windspeed_flags_to_winddir(station, diagnostics=diagnostics) # Humidity cross checks if humidity: qc_tests.humidity.hcc(station, [30, 31, 32], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots) # Cloud cross check if cloud: qc_tests.clouds.ccc(station, [33, 34, 35, 36, 37, 38, 39, 40], logfile, diagnostics=diagnostics, plots=plots) # Variance if variance: qc_tests.variance.evc( station, ['temperatures', 'dewpoints', 'slp', 'windspeeds'], [58, 59, 60, 61], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots) utils.apply_windspeed_flags_to_winddir(station, diagnostics=diagnostics) # Winds if winds: qc_tests.winds.wdc(station, [62, 63, 64], DATASTART, DATAEND, logfile, diagnostics=diagnostics, plots=plots) # are flags actually applied? if diagnostics or plots: raw_input("stop") # write to file if first: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_internal.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS, 'attributes.dat'), opt_var_list=carry_thru_vars, compressed=match_to_compress, processing_date='', qc_code_version=qc_code_version) # gzip the raw file subprocess.call( ["gzip", os.path.join(NETCDF_DATA_LOCS, station.id + ".nc")]) elif second: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_internal2.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS, 'attributes.dat'), opt_var_list=carry_thru_vars, compressed=match_to_compress, processing_date='', qc_code_version=qc_code_version) # gzip the raw file subprocess.call([ "gzip", os.path.join(NETCDF_DATA_LOCS, station.id + "_mask.nc") ]) logfile.write( dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write( "processing took {:4.0f}s\n\n".format(time.time() - process_start_time)) logfile.close() print "Internal Checks completed\n" return # internal_checks
def internal_checks(restart_id = "", end_id = "", all_checks = True, duplicate = False, odd = False, frequent = False, diurnal = False, gap = False, records = False, streaks = False, climatological = False, spike = False, humidity = False, cloud = False, variance = False, winds = False, pressure = False, precipitation = False, diagnostics = False, plots = False, doMonth = False): ''' Run through internal checks on list of stations passed :param str restart_id: which station to start on :param str end_id: which station to end on :param bool all_checks: run all the checks :param bool duplicate/odd/frequent/diurnal/gap/records/streaks/climatological/spike/humidity/cloud/variance/winds/pressure/precipitation: run each test separately :param bool diagnostics: print extra material to screen :param bool plots: create plots from each test [many files if all stations/all tests] :param bool doMonth: a monthly append process ''' if all_checks: duplicate = True odd = True frequent = True diurnal = True gap = True records = True streaks = True climatological = True spike = True humidity = True cloud = True variance = True winds = True pressure = True precipitation = True else: print "single tests selected" # qc_code_version = subprocess.check_output(['svnversion']).strip() qc_code_version = subprocess.check_output(['svn', 'info', 'file:///home/h05/rdunn/svn/hadisd_py_qc/branches/monthly/']) for line in qc_code_version.split("\n"): if line.split(":")[0] == "Revision": qc_code_version = line.split(":")[1] break # get station information try: station_info = np.genfromtxt(os.path.join(INPUT_FILE_LOCS, STATION_LIST), dtype=(str)) except IOError: print "station list not found" sys.exit() # sort truncated run startindex = [0] if restart_id != "": startindex, = np.where(station_info[:,0] == restart_id) if end_id != "": endindex, = np.where(station_info[:,0] == end_id) if endindex != len(station_info) -1: station_info = station_info[startindex[0]: endindex[0]+1] else: station_info = station_info[startindex[0]:] else: station_info = station_info[startindex[0]:] for st,stat in enumerate(station_info): # if st%100 != 0: continue # do every nth station print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S") print "{:35s} {:d}/{:d}".format("Station Number : ", st + 1, len(station_info)) print "{:35s} {}".format("Station Identifier :", stat[0]) if doMonth: print "Running with incomplete final year" # set up the log file logfile = file(LOG_OUTFILE_LOCS+stat[0]+'.log','w') logfile.write(dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write("Internal Checks\n") logfile.write("{:35s} {}\n".format("Station Identifier :", stat[0])) process_start_time = time.time() station = utils.Station(stat[0], float(stat[1]), float(stat[2]), float(stat[3])) # latitude and longitude check if np.abs(station.lat) > 90.: if plots or diagnostics: print "{} {} {} {} {} {} {}\n".format(\ station.id,"Latitude Check",DATASTART.year, DATAEND.year,"All", "Unphysical latitude {}".format(station.lat)) else: logfile.write("{} {} {} {} {} {} {}\n".format(\ station.id,"Latitude Check",DATASTART.year, DATAEND.year,"All", "Unphysical latitude {}".format(station.lat))) logfile.close() continue # check if station longitude outside of bounds if np.abs(station.lon) > 180.: if plots or diagnostics: print "{} {} {} {} {} {} {}\n".format(\ station.id,"Longitude Check",DATASTART.year, DATAEND.year,"All", "Unphysical longitude {}".format(station.lon)) else: logfile.write("{} {} {} {} {} {} {}\n".format(\ station.id,"Longitude Check",DATASTART.year, DATAEND.year,"All", "Unphysical longitude {}".format(station.lon))) logfile.close() continue # check if file is zipped if os.path.exists(os.path.join(NETCDF_DATA_LOCS, "hadisd.{}_19310101-{}_{}_raw.nc.gz".format(LONG_VERSION, END_TIME, station.id))): # if gzip file, unzip here subprocess.call(["gunzip",os.path.join(NETCDF_DATA_LOCS, "hadisd.{}_19310101-{}_{}_raw.nc.gz".format(LONG_VERSION, END_TIME, station.id))]) time.sleep(5) # make sure it is unzipped before proceeding # read in the data ncdfp.read(os.path.join(NETCDF_DATA_LOCS, "hadisd.{}_19310101-{}_{}_raw.nc".format(LONG_VERSION, END_TIME, station.id)), station, process_vars, opt_var_list = carry_thru_vars, diagnostics = diagnostics) if plots or diagnostics: print "{:35s} {}\n".format("Total station record size :",len(station.time.data)) else: logfile.write("{:35s} {}\n".format("Total station record size :",len(station.time.data))) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, carry_thru_vars) station.qc_flags = np.zeros([len(station.time.data),71]) # changed to include updated wind tests, station level pressure & precipitation # get reporting accuracies and frequencies. for var in process_vars: st_var = getattr(station, var) st_var.reporting_stats = utils.monthly_reporting_statistics(st_var, DATASTART, DATAEND) # Add history text to netcdf file # Reporting Changes - TODO # Duplicate months - check on temperature ONLY if duplicate: # no change as result of incomplete year qc_tests.duplicate_months.dmc(station, ['temperatures'], process_vars, [0], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots) # Odd Clusters if odd: # no change as result of incomplete year qc_tests.odd_cluster.occ(station,['temperatures','dewpoints','windspeeds','slp'], [54,55,56,57], DATASTART, logfile, diagnostics = diagnostics, plots = plots) utils.apply_flags_from_A_to_B(station, "slp", "stnlp", diagnostics = diagnostics) utils.apply_flags_from_A_to_B(station, "windspeeds", "winddirs", diagnostics = diagnostics) # Frequent Values if frequent: qc_tests.frequent_values.fvc(station, ['temperatures', 'dewpoints','slp'], [1,2,3], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, doMonth = doMonth) utils.apply_flags_from_A_to_B(station, "slp", "stnlp", diagnostics = diagnostics) # Diurnal Cycle if diurnal: if np.abs(station.lat) <= 60.: qc_tests.diurnal_cycle.dcc(station, ['temperatures'], process_vars, [4], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, doMonth = doMonth) else: if plots or diagnostics: print "Diurnal Cycle Check not run as station latitude ({}) > 60\n".format(station.lat) else: logfile.write("Diurnal Cycle Check not run as station latitude ({}) > 60\n".format(station.lat)) # Distributional Gap if gap: qc_tests.distributional_gap.dgc(station, ['temperatures','dewpoints','slp'], [5,6,7], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, GH = True, doMonth = doMonth) utils.apply_flags_from_A_to_B(station, "slp", "stnlp", diagnostics = diagnostics) # Records if records: qc_tests.records.krc(station, ['temperatures','dewpoints','windspeeds','slp'], [8,9,10,11], logfile, diagnostics = diagnostics, plots = plots) utils.apply_flags_from_A_to_B(station, "windspeeds", "winddirs", diagnostics = diagnostics) utils.apply_flags_from_A_to_B(station, "slp", "stnlp", diagnostics = diagnostics) # Streaks and Repetitions if streaks: qc_tests.streaks.rsc(station, ['temperatures','dewpoints','windspeeds','slp','winddirs'], [[12,16,20],[13,17,21],[14,18,22],[15,19,23],[66,67,68]], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, doMonth = doMonth) utils.apply_flags_from_A_to_B(station, "windspeeds", "winddirs", diagnostics = diagnostics) utils.apply_flags_from_A_to_B(station, "slp", "stnlp", diagnostics = diagnostics) # Climatological Outlier if climatological: qc_tests.climatological.coc(station, ['temperatures','dewpoints'], [24,25], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, doMonth = doMonth) # column 26 kept spare for slp # Spike if spike: qc_tests.spike.sc(station, ['temperatures','dewpoints','slp','windspeeds'], [27,28,29,65], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, doMonth = doMonth) utils.apply_flags_from_A_to_B(station, "windspeeds", "winddirs", diagnostics = diagnostics) utils.apply_flags_from_A_to_B(station, "slp", "stnlp", diagnostics = diagnostics) # Humidity cross checks if humidity: qc_tests.humidity.hcc(station, [30,31,32], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots) # Cloud cross check if cloud: qc_tests.clouds.ccc(station, [33,34,35,36,37,38,39,40], logfile, diagnostics = diagnostics, plots = plots) # Variance if variance: qc_tests.variance.evc(station, ['temperatures','dewpoints','slp','windspeeds'], [58,59,60,61], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, doMonth = doMonth) utils.apply_flags_from_A_to_B(station, "windspeeds", "winddirs", diagnostics = diagnostics) utils.apply_flags_from_A_to_B(station, "slp", "stnlp", diagnostics = diagnostics) # Winds if winds: qc_tests.winds.wdc(station, [62,63,64], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, doMonth = doMonth) # Pressure if pressure: qc_tests.pressure.spc(station, [69], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots, doMonth = doMonth) # Precipitation if precipitation: qc_tests.precipitation.pcc(station, [70], DATASTART, DATAEND, logfile, diagnostics = diagnostics, plots = plots) # are flags actually applied? sys.stdout.flush() if diagnostics or plots: raw_input("stop") # write to file ncdfp.write(os.path.join(NETCDF_DATA_LOCS, "hadisd.{}_19310101-{}_{}_internal.nc".format(LONG_VERSION, END_TIME, station.id)), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = dt.datetime.strftime(dt.datetime.now(), "%d-%b-%Y"), qc_code_version = qc_code_version) # gzip the raw file subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, "hadisd.{}_19310101-{}_{}_raw.nc".format(LONG_VERSION, END_TIME, station.id))]) logfile.write(dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write("processing took {:4.0f}s\n\n".format(time.time() - process_start_time)) logfile.close() # clean up gc.collect() print "Internal Checks completed\n" return # internal_checks
def neighbour_checks(station_info, restart_id = "", end_id = "", distances=np.array([]), angles=np.array([]), second = False, masking = False, doZip=False, plots = False, diagnostics = False): """ Run through neighbour checks on list of stations passed :param list station_info: list of lists - [[ID, lat, lon, elev]] - strings :param array distances: array of distances between station pairs :param array angles: array of angles between station pairs :param bool second: do the second run :param bool masking: apply the flags to the data to mask the observations. """ first = not second qc_code_version = subprocess.check_output(['svnversion']).strip() # if distances and angles not calculated, then do so if (len(distances) == 0) or (len(angles) == 0): print "calculating distances and bearings matrix" distances, angles = get_distances_angles(station_info) # extract before truncate the array neighbour_elevations = np.array(station_info[:,3], dtype=float) neighbour_ids = np.array(station_info[:,0]) neighbour_info = np.array(station_info[:,:]) # sort truncated run startindex = 0 if restart_id != "": startindex, = np.where(station_info[:,0] == restart_id) if end_id != "": endindex, = np.where(station_info[:,0] == end_id) if endindex != len(station_info) -1: station_info = station_info[startindex: endindex+1] distances = distances[startindex:endindex+1,:] angles = angles[startindex:endindex+1,:] else: station_info = station_info[startindex:] distances = distances[startindex:,:] angles = angles[startindex:,:] else: station_info = station_info[startindex:] distances = distances[startindex:,:] angles = angles[startindex:,:] # process each neighbour for st, stat in enumerate(station_info): print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S") print "Neighbour Check" print "{:35s} {}".format("Station Identifier :", stat[0]) if not plots and not diagnostics: logfile = file(LOG_OUTFILE_LOCS+stat[0]+'.log','a') # append to file if second iteration. logfile.write(dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write("Neighbour Check\n") logfile.write("{:35s} {}\n".format("Station Identifier :", stat[0])) else: logfile = "" process_start_time = time.time() station = utils.Station(stat[0], float(stat[1]), float(stat[2]), float(stat[3])) # if running through the first time if first: if os.path.exists(os.path.join(NETCDF_DATA_LOCS, station.id + "_internal.nc.gz")): # if gzip file, unzip here subprocess.call(["gunzip",os.path.join(NETCDF_DATA_LOCS, station.id + "_internal.nc.gz")]) time.sleep(5) # make sure it is unzipped before proceeding # read in the data ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_internal.nc"), station, process_vars, carry_thru_vars, diagnostics = diagnostics) if plots or diagnostics: print "{:35s} {}\n".format("Total station record size :",len(station.time.data)) else: logfile.write("{:35s} {}\n".format("Total station record size :",len(station.time.data))) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, carry_thru_vars) # or if second pass through? elif second: if os.path.exists(os.path.join(NETCDF_DATA_LOCS, station.id + "internal2.nc.gz")): # if gzip file, unzip here subprocess.call(["gunzip",os.path.join(NETCDF_DATA_LOCS, station.id + "_internal2.nc.gz")]) time.sleep(5) # make sure it is unzipped before proceeding ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_internal2.nc"), station, process_vars, carry_thru_vars, diagnostics = diagnostics) if plots or diagnostics: print "{:35s} {}\n".format("Total station record size :",len(station.time.data)) else: logfile.write("{:35s} {}\n".format("Total station record size :",len(station.time.data))) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, carry_thru_vars) # select neighbours neighbour_distances = distances[st,:] neighbour_bearings = angles[st,:] # have to add in start index so that can use location in distance file. # neighbours = n_utils.get_neighbours(st+startindex, np.float(stat[3]), neighbour_distances, neighbour_bearings, neighbour_elevations) # return all neighbours up to a limit from the distance and elevation offsets (500km and 300m respectively) neighbours, neighbour_quadrants = n_utils.get_all_neighbours(st+startindex, np.float(stat[3]), neighbour_distances, neighbour_bearings, neighbour_elevations) if plots or diagnostics: print "{:14s} {:10s} {:10s}".format("Neighbour","Distance","Elevation") for n in neighbours: print "{:14s} {:10.1f} {:10.1f}".format(neighbour_ids[n],neighbour_distances[n],neighbour_elevations[n]) else: logfile.write("{:14s} {:10s} {:10s}\n".format("Neighbour","Distance","Elevation")) for n in neighbours: logfile.write("{:14s} {:10.1f} {:10.1f}\n".format(neighbour_ids[n],neighbour_distances[n],neighbour_elevations[n])) # if sufficient neighbours if len(neighbours) >= 3: for variable, col in FLAG_OUTLIER_DICT.items(): # NOTE - this requires multiple reads of the same file # but does make it easier to understand and code st_var = getattr(station, variable) if plots or diagnostics: print "Length of {} record: {}".format(variable, len(st_var.data.compressed())) else: logfile.write("Length of {} record: {}\n".format(variable, len(st_var.data.compressed()))) if len(st_var.data.compressed()) > 0: final_neighbours = n_utils.select_neighbours(station, variable, neighbour_info[neighbours], neighbours, neighbour_distances[neighbours], neighbour_quadrants, NETCDF_DATA_LOCS, DATASTART, DATAEND, logfile, second = second, diagnostics = diagnostics, plots = plots) # now read in final set of neighbours and process neigh_flags = np.zeros(len(station.time.data)) # count up how many neighbours think this obs is bad neigh_count = np.zeros(len(station.time.data)) # number of neighbours at each time stamp dpd_flags = np.zeros(len(station.time.data)) # number of neighbours at each time stamp reporting_accuracies = np.zeros(len(neighbours)) # reporting accuracy of each neighbour all_data = np.ma.zeros([len(final_neighbours), len(station.time.data)]) # store all the neighbour values for nn, nn_loc in enumerate(final_neighbours): neigh_details = neighbour_info[nn_loc] neigh = utils.Station(neigh_details[0], float(neigh_details[1]), float(neigh_details[2]), float(neigh_details[3])) if first: ncdfp.read(os.path.join(NETCDF_DATA_LOCS, neigh.id + "_internal.nc"), neigh, [variable], diagnostics = diagnostics, read_input_station_id = False) elif second: ncdfp.read(os.path.join(NETCDF_DATA_LOCS, neigh.id + "_internal2.nc"), neigh, [variable], diagnostics = diagnostics, read_input_station_id = False) dummy = utils.create_fulltimes(neigh, [variable], DATASTART, DATAEND, [], do_input_station_id = False) all_data[nn, :] = utils.apply_filter_flags(getattr(neigh, variable)) if diagnostics: print neigh_details n_utils.detect(station, neigh, variable, neigh_flags, neigh_count, DATASTART, DATAEND, distance = neighbour_distances[nn_loc], diagnostics = diagnostics, plots = plots) reporting_accuracies[nn] = utils.reporting_accuracy(getattr(neigh,variable).data) dpd_flags += neigh.qc_flags[:,31] # gone through all neighbours # if at least 2/3 of neighbours have flagged this point (and at least 3 neighbours) some_flags, = np.where(neigh_flags > 0) outlier_locs, = np.where(np.logical_and((neigh_count[some_flags] >= 3),(neigh_flags[some_flags].astype("float")/neigh_count[some_flags] > 2./3.))) # flag where < 3 neighbours locs = np.where(neigh_count[some_flags] < 3) station.qc_flags[some_flags[locs], col] = -1 if len(outlier_locs) >= 1: station.qc_flags[some_flags[outlier_locs], col] = 1 # print number flagged and copy into attribute if plots or diagnostics: utils.print_flagged_obs_number(logfile, "Neighbour", variable, len(outlier_locs), noWrite = True) else: utils.print_flagged_obs_number(logfile, "Neighbour", variable, len(outlier_locs)) st_var = getattr(station, variable) st_var.flags[some_flags[outlier_locs]] = 1 else: if plots or diagnostics: utils.print_flagged_obs_number(logfile, "Neighbour", variable, len(outlier_locs), noWrite = True) else: utils.print_flagged_obs_number(logfile, "Neighbour", variable, len(outlier_locs)) if plots: n_utils.plot_outlier(station, variable, some_flags[outlier_locs], all_data, DATASTART) # unflagging using neighbours n_utils.do_unflagging(station, variable, all_data, reporting_accuracies, neigh_count, dpd_flags, FLAG_COL_DICT, DATASTART, logfile, plots = plots, diagnostics = diagnostics) else: if plots or diagnostics: print "No observations to assess for {}".format(variable) else: logfile.write("No observations to assess for {}\n".format(variable)) # variable loop else: if plots or diagnostics: print "Fewer than 3 neighbours" else: logfile.write("Fewer than 3 neighbours\n") print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S") print "processing took {:4.0f}s\n\n".format(time.time() - process_start_time) # end of neighbour check utils.append_history(station, "Neighbour Outlier Check") # clean up months qc_tests.clean_up.clu(station, ["temperatures","dewpoints","slp","windspeeds","winddirs"], [44,45,46,47,48], FLAG_COL_DICT, DATASTART, DATAEND, logfile, plots = plots, diagnostics = diagnostics) if diagnostics or plots: raw_input("stop") # masking (at least call from here - optional call from internal?) # write to file if first: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_external.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = '', qc_code_version = qc_code_version) # gzip the raw file elif second: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_external2.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = '', qc_code_version = qc_code_version) # gzip the raw file # masking - apply the flags and copy masked data to flagged_obs attribute if masking: station = utils.mask(station, process_vars, logfile, FLAG_COL_DICT) # write to file if first: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_mask.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = '', qc_code_version = qc_code_version) elif second: ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_mask2.nc"), station, process_vars, os.path.join(INPUT_FILE_LOCS,'attributes.dat'), opt_var_list = carry_thru_vars, compressed = match_to_compress, processing_date = '', qc_code_version = qc_code_version) if plots or diagnostics: print "Masking completed\n" print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n") print "processing took {:4.0f}s\n\n".format(time.time() - process_start_time) else: logfile.write("Masking completed\n") logfile.write(dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write("processing took {:4.0f}s\n\n".format(time.time() - process_start_time)) logfile.close() # looped through all stations # gzip up all the raw files if doZip: for st, stat in enumerate(station_info): if first: subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_internal.nc")]) if masking: subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_external.nc")]) subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_mask.nc")]) elif second: subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_internal2.nc")]) if masking: subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_external2.nc")]) subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, stat[0]+"_mask2.nc")]) print "Neighbour Checks completed\n" return # neighbour_checks
def make_hum_heat_vars(station_info, restart_id="", end_id="", diagnostics=False, plots=False): """ Make the humidity and heat-stress variable netCDF files Make two sets of output files containing the humidity and heat-stress parameters calculated on an hourly basis from the QC'd HadISD data :param list station_info: station information list :param str restart_id: first station to process :param str end_id: last station to process :param bool diagnostics: verbose output to screen :param bool plots: make plots (placeholder) """ # sort truncated run startindex = 0 if restart_id != "": startindex, = np.where(station_info[:, 0] == restart_id) if end_id != "": endindex, = np.where(station_info[:, 0] == end_id) if endindex != len(station_info) - 1: station_info = station_info[startindex:endindex + 1] else: station_info = station_info[startindex:] else: station_info = station_info[startindex:] for st, stat in enumerate(station_info): print dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S") print "{:35s} {:d}/{:d}".format("Station Number : ", st + 1, len(station_info)) print "{:35s} {}".format("Station Identifier :", stat[0]) if plots or diagnostics: logfile = "" else: logfile = file(LOG_OUTFILE_LOCS + stat[0] + '.log', 'a') logfile.write( dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write("Calculating Humidity and Heat Stress variables\n") logfile.write("{:35s} {}\n".format("Station Identifier :", stat[0])) process_start_time = time.time() station = utils.Station(stat[0], float(stat[1]), float(stat[2]), float(stat[3])) if os.path.exists( os.path.join(NETCDF_DATA_LOCS, station.id + "_mask.nc.gz")): # if gzip file, unzip here subprocess.call([ "gunzip", os.path.join(NETCDF_DATA_LOCS, station.id + "_mask.nc.gz") ]) time.sleep(5) # make sure it is unzipped before proceeding # read in the data ncdfp.read(os.path.join(NETCDF_DATA_LOCS, station.id + "_mask.nc"), station, process_vars, diagnostics=diagnostics, read_qc_flags=False, read_flagged_obs=False) match_to_compress = utils.create_fulltimes(station, process_vars, DATASTART, DATAEND, do_qc_flags=False, do_flagged_obs=False) # run through calculations, each one should add a new variable to object. """ 1) Use T and P to get e [to get es, use Td] 2) Use e, P, Td and T to get Tw 3) If Tw < 0C, recalculate e w.r.t ice, and re-obtain Tw - keep both! 4) Use e and P to calculate q 5) Use e and es to get rh (use appropriate es too) - or q and qs what P to use if no measurement - using monthly mean probably isn't appropriate in this instance?? """ station = humidity.run_calcs(station, logfile) # run through heat stress calculations station = heat_stress.run_calcs(station, logfile) if diagnostics or plots: raw_input("stop") # adjust this to work with the desired output file - will need a separate write function - output humidity in one set, heat indices in another? humidity_vars = [ "temperatures", "dewpoints", "slp", "vapour_pressure", "saturation_vapour_pressure", "wetbulb_temperature", "specific_humidity", "relative_humidity" ] ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_humidity.nc"), station, humidity_vars, os.path.join(INPUT_FILE_LOCS, 'attributes.dat'), compressed=match_to_compress, processing_date='', qc_code_version='', write_QC_flags=False, write_flagged_obs=False, least_significant_digit=5) heat_stress_vars = [ "temperatures", "dewpoints", "windspeeds", "THI", "WBGT", "humidex", "apparent_t", "heat_index" ] ncdfp.write(os.path.join(NETCDF_DATA_LOCS, station.id + "_heat_stress.nc"), station, heat_stress_vars, os.path.join(INPUT_FILE_LOCS, 'attributes.dat'), compressed=match_to_compress, processing_date='', qc_code_version='', write_QC_flags=False, write_flagged_obs=False, least_significant_digit=5) # gzip the raw file # subprocess.call(["gzip","-f",os.path.join(NETCDF_DATA_LOCS, station.id + "_humidity.nc")]) # subprocess.call(["gzip","-f",os.path.join(NETCDF_DATA_LOCS, station.id + "_heat_stress.nc")]) # subprocess.call(["gzip",os.path.join(NETCDF_DATA_LOCS, station.id + "_mask.nc")]) logfile.write( dt.datetime.strftime(dt.datetime.now(), "%A, %d %B %Y, %H:%M:%S\n")) logfile.write( "processing took {:4.0f}s\n\n".format(time.time() - process_start_time)) logfile.close() print "Humidity and Heat Stress Indices calculated" return # make_hum_heat_vars