def make_hourly_files(): rh = DataBaseHandler(table_name="process_hourly_zdr") #list only date directories inputdir = SETTINGS.ZDR_CALIB_DIR outdir = SETTINGS.ZDR_CALIB_DIR pattern = re.compile(r'(\d{8})') proc_dates = [x for x in os.listdir(inputdir) if pattern.match(x)] proc_dates.sort() #For each date where the vertical scans have already been processed, #we now want to calculate hourly values of melting layer height and ZDR for date in proc_dates[0:]: print(date) identifier = f'{date}' result = rh.get_result(identifier) #If there is no 'success' identifier and no 'not enough_rain' identifier, then the file hasn't been processed, so carry on to process the data if rh.ran_successfully(identifier) or result == 'not enough rain': print(f'[INFO] Already processed {date}') else: if calib_functions.calc_hourly_ML(outdir, date): rh.insert_success(identifier) else: rh.insert_failure(identifier, 'not enough rain')
def process_volume_scans(args): """ Processes the volume scans for each day with rain present, to calculate Z bias :param args: (namespace) Namespace object built from arguments parsed from command line """ date = args.date[0] print('Processing ', date) day_dt = dp.parse(date) min_date = dp.parse(SETTINGS.MIN_START_DATE) max_date = dp.parse(SETTINGS.MAX_END_DATE) if day_dt < min_date or day_dt > max_date: raise ValueError( f'Date must be in range {SETTINGS.MIN_START_DATE} - {SETTINGS.MAX_END_DATE}' ) #Directory for input radar data inputdir = SETTINGS.VOLUME_DIR #Directory for zdr_ml data zdrdir = SETTINGS.ZDR_CALIB_DIR #Directory for output calibration data outdir = SETTINGS.Z_CALIB_DIR if not os.path.exists(outdir): os.makedirs(outdir) rh = DataBaseHandler(table_name="process_vol_scans") identifier = f'{date}' #If there is no success or a no_rays identifier, continue to process the data result = rh.get_result(identifier) if rh.ran_successfully(identifier) or result == 'no rays': print(f'[INFO] Already processed {date}') else: mlfile = f'{zdrdir}/{date}/hourly_ml_zdr.csv' if os.path.exists(mlfile): print("found ml file, processing data") ml_zdr = pd.read_csv(mlfile, index_col=0, parse_dates=True) raddir = os.path.join(inputdir, date) #print raddir, outdir, date if calib_functions.calibrate_day_att(raddir, outdir, date, ml_zdr): rh.insert_success(identifier) print("File successfully processed") else: rh.insert_failure(identifier, 'no suitable rays') print("No suitable rays") else: rh.insert_failure(identifier, 'no hourly ml file') print("No hourly ml file")
def process_vert_scans(args): """ Processes all vertical scans for given day to extract values of ZDR bias and melting layer height :param args: (namespace) Namespace object built from arguments parsed from command line """ plot=args.make_plots[0] day=args.date[0] YYYY, MM, DD = day[:4], day[4:6], day[6:8] day_dt = dp.parse(day) min_date = dp.parse(SETTINGS.MIN_START_DATE) max_date = dp.parse(SETTINGS.MAX_END_DATE) if day_dt < min_date or day_dt > max_date: raise ValueError(f'Date must be in range {SETTINGS.MIN_START_DATE} - {SETTINGS.MAX_END_DATE}') #Directory for input radar data raddir = SETTINGS.INPUT_DIR #Directory for weather station data wxdir = SETTINGS.WXDIR #Directory for processed data outdir = SETTINGS.ZDR_CALIB_DIR if not os.path.exists(outdir): os.makedirs(outdir) rh = DataBaseHandler(table_name="process_vert_scans") #For given day of radar data, look to see if rain was observed by the weather station at the site. #If yes, then process the vertical scans to calculate a value of ZDR and an estimate of the height of the melting layer. #Save melting layer height and zdr values to file. Save a success file. expected_file = f'{outdir}/{day}/day_ml_zdr.csv' identifier = f'{YYYY}.{MM}.{DD}' #If the file hasn't already been processed, or there is no rain or insufficient data, then carry on script to process the data result=rh.get_result(identifier) if rh.ran_successfully(identifier) or result in ('no rain', 'insufficient data'): print(f'[INFO] Already processed {day}') else: #Construct the NOAA filename based on the date nfile = f'{wxdir}NOAA-{YYYY}-{MM}.txt' #Use pandas read_table to read the text file into a table to extract the rain amount data = pd.read_table(nfile, sep='\s+', header=6) #Set the index column data2 = data.set_index("DAY") #Extract rain amount for the current day rain = data2.loc[DD,"RAIN"] #If there was less than 1mm of rain, go to the next day if rain < 1.0 or np.isfinite(rain)==False: print('no rain') rh.insert_failure(identifier, 'no rain') #Otherwise process the day's data else: print('processing day ',day) if calib_functions.process_zdr_scans(outdir,raddir,day,expected_file,plot): rh.insert_success(identifier) else: rh.insert_failure(identifier, 'insufficient data')
def loop_over_hours(args): """ Processes each file for each hour passed in the comand line arguments. :param args: (namespace) Namespace object built from attributes parsed from command line """ scan_type = args.scan_type[0] hours = args.hours # error types are bad_num (different number of variables in raw vs nc) # failure (RadxConvert doesnt complete) and bad_output (no output file found) #rh = _get_results_handler(4, '.') rh = DataBaseHandler(table_name="convert_ele_results") failure_count = 0 mapped_scan_type = _map_scan_type(scan_type) for hour in hours: print(f'[INFO] Processing: {hour}') input_files = _get_input_files(hour, scan_type) year, month, day = hour[:4], hour[4:6], hour[6:8] date = year + month + day for dbz_file in input_files: if failure_count >= SETTINGS.EXIT_AFTER_N_FAILURES: raise ValueError( '[WARN] Exiting after failure count reaches limit: ' f'{SETTINGS.EXIT_AFTER_N_FAILURES}') fname = os.path.basename(dbz_file) input_dir = os.path.dirname(dbz_file) #This is the file identifier used in the database identifier = f'{year}.{month}.{day}.{os.path.splitext(fname)[0]}' # Check if this file has already been processed successfully #If yes, then go to the next iteration of the loop, i.e. next file if rh.ran_successfully(identifier): print(f'[INFO] Already ran {dbz_file} successfully') continue #If there is no success identifier then continue processing the file # Remove previous results for this file rh.delete_result(identifier) # Get expected variables fname_base = fname[:16] time_digits = fname[8:14] pattern = f'{input_dir}/{fname_base}*.{scan_type}' expected_vars = set([ os.path.splitext(os.path.basename(name)[16:])[0] for name in glob.glob(pattern) ]) # 'Process the uncalibrated data' (where output is generated) script_cmd = f"RadxConvert -v -params {SETTINGS.PARAMS_FILE} -f {dbz_file}" print(f'[INFO] Running: {script_cmd}') #If RadxConvert fails, create a failure outcome in the database if subprocess.call(script_cmd, shell=True) != 0: print('[ERROR] RadxConvert call resulted in an error') rh.insert_failure(identifier, 'failure') failure_count += 1 continue # Check for expected netcdf output scan_dir_name = None if mapped_scan_type == 'VER': scan_dir_name = 'vert' else: scan_dir_name = mapped_scan_type.lower() # This should probably be a default path that is formatted expected_file = f'{SETTINGS.OUTPUT_DIR}/{scan_dir_name}/{date}/' \ f'ncas-mobile-x-band-radar-1_sandwith_{date}-{time_digits}_{mapped_scan_type}_v1.nc' # Read netcdf file to find variables # If the file can't be found, create a bad_output failure identifier #found_vars = None try: rad2 = pyart.io.read(expected_file, delay_field_loading=True) # ds = Dataset(expected_file, 'r', format="NETCDF4") # found_vars = set(ds.variables.keys()) # ds.close() except FileNotFoundError: print(f'[ERROR] Expected file {expected_file} not found') rh.insert_failure(identifier, 'bad_output') failure_count += 1 continue else: output_vars = set(rad2.fields.keys()) print( '[INFO] Checking that the output variables match those in the input files' ) #print('expected vars = ', expected_vars) #print('output_vars = ', output_vars) #Checks that the variables in the nc file are identical to the variables in the input files #If not, create a failure identifier called bad_num if not expected_vars.issubset(output_vars): print( '[ERROR] Output variables are not the same as input files' f'{output_vars} != {expected_vars}') failure_count += 1 rh.insert_failure(identifier, 'bad_num') continue else: print( f'[INFO] All expected variable were found: {expected_vars}' ) # If all of the above is succesful, create a success identifier rh.insert_success(identifier)
def loop_over_files(args): params_index = args.params_index[0] params_file = f'{SETTINGS.PARAMS_FILE}{params_index}' input_files = args.files print("input_files= ", input_files) scan_type = args.scan_type[0] failure_count = 0 for ncfile in input_files: if failure_count >= SETTINGS.EXIT_AFTER_N_FAILURES: raise ValueError( '[WARN] Exiting after failure count reaches limit: ' f'{SETTINGS.EXIT_AFTER_N_FAILURES}') print("ncfile= ", ncfile) fname = os.path.basename(ncfile) ncdate = os.path.basename(ncfile).split('_')[2].replace('-', '') YYYY = ncdate[0:4] MM = ncdate[4:6] DD = ncdate[6:8] date = ncdate[0:8] rh = DataBaseHandler(table_name="apply_calib_rhi") identifier = f'{ncdate}' #If there is a success identifier, continue to next file in the loop result = rh.get_result(identifier) if rh.ran_successfully(identifier): print(f'[INFO] Already processed {ncdate} successfully') continue #If there is no success identifier then continue processing the file # Remove previous results for this file rh.delete_result(identifier) #Read input uncalibrated netcdf file and extract list of variables try: rad1 = pyart.io.read(ncfile, delay_field_loading=True) except IOError: print('[ERROR] Could not open file, {ncfile}') rh.insert_failure(identifier, 'failure') else: input_vars = rad1.fields.keys() #ds = Dataset(ncfile, 'r', format="NETCDF4") #input_vars = set(ds.variables.keys()) #ds.close() # Process the data script_cmd = f"RadxConvert -v -params {params_file} -f {ncfile}" print(f'[INFO] Running: {script_cmd}') if subprocess.call(script_cmd, shell=True) != 0: print('[ERROR] RadxConvert call resulted in an error') rh.insert_failure(identifier, 'failure') failure_count += 1 continue #this line looks for the file generated from uncalib_v1 in calib_v1. expected_file = f'{SETTINGS.OUTPUT_DIR}/{scan_type}/{date}/{fname}' #print expected_file print("[INFO] Checking that the output file has been produced.") #Read input uncalibrated netcdf file and extract list of variables try: rad2 = pyart.io.read(expected_file, delay_field_loading=True) except IOError: print(f'[ERROR] Expected file {expected_file} not found') rh.insert_failure(identifier, 'bad_output') failure_count += 1 continue else: output_vars = rad2.fields.keys() print(f'[INFO] Found expected file {expected_file}') print( f'[INFO] Checking that the output variables match those in the input files' ) #Checks that the variables in the calibrated nc file are identical to the variables in the uncalibrated input files #If not, create a failure identifier called bad_vars keys_not_found = [] for key in input_vars: if not key in output_vars: keys_not_found.append(key) if len(keys_not_found) > 0: print( '[ERROR] Output variables are not the same as input variables' f'{output_vars} != {input_vars}') failure_count += 1 rh.insert_failure(identifier, 'bad_vars') continue else: print(f'[INFO] All expected variable were found: {output_vars}') rh.insert_success(identifier)