def loop_over_hours(args): """ Processes each file for each hour passed in the comand line arguments. :param args: (namespace) Namespace object built from attributes parsed from command line """ scan_type = args.scan_type[0] hours = args.hours # error types are bad_num (different number of variables in raw vs nc) # failure (RadxConvert doesnt complete) and bad_output (no output file found) #rh = _get_results_handler(4, '.') rh = DataBaseHandler(table_name="convert_ele_results") failure_count = 0 mapped_scan_type = _map_scan_type(scan_type) for hour in hours: print(f'[INFO] Processing: {hour}') input_files = _get_input_files(hour, scan_type) year, month, day = hour[:4], hour[4:6], hour[6:8] date = year + month + day for dbz_file in input_files: if failure_count >= SETTINGS.EXIT_AFTER_N_FAILURES: raise ValueError( '[WARN] Exiting after failure count reaches limit: ' f'{SETTINGS.EXIT_AFTER_N_FAILURES}') fname = os.path.basename(dbz_file) input_dir = os.path.dirname(dbz_file) #This is the file identifier used in the database identifier = f'{year}.{month}.{day}.{os.path.splitext(fname)[0]}' # Check if this file has already been processed successfully #If yes, then go to the next iteration of the loop, i.e. next file if rh.ran_successfully(identifier): print(f'[INFO] Already ran {dbz_file} successfully') continue #If there is no success identifier then continue processing the file # Remove previous results for this file rh.delete_result(identifier) # Get expected variables fname_base = fname[:16] time_digits = fname[8:14] pattern = f'{input_dir}/{fname_base}*.{scan_type}' expected_vars = set([ os.path.splitext(os.path.basename(name)[16:])[0] for name in glob.glob(pattern) ]) # 'Process the uncalibrated data' (where output is generated) script_cmd = f"RadxConvert -v -params {SETTINGS.PARAMS_FILE} -f {dbz_file}" print(f'[INFO] Running: {script_cmd}') #If RadxConvert fails, create a failure outcome in the database if subprocess.call(script_cmd, shell=True) != 0: print('[ERROR] RadxConvert call resulted in an error') rh.insert_failure(identifier, 'failure') failure_count += 1 continue # Check for expected netcdf output scan_dir_name = None if mapped_scan_type == 'VER': scan_dir_name = 'vert' else: scan_dir_name = mapped_scan_type.lower() # This should probably be a default path that is formatted expected_file = f'{SETTINGS.OUTPUT_DIR}/{scan_dir_name}/{date}/' \ f'ncas-mobile-x-band-radar-1_sandwith_{date}-{time_digits}_{mapped_scan_type}_v1.nc' # Read netcdf file to find variables # If the file can't be found, create a bad_output failure identifier #found_vars = None try: rad2 = pyart.io.read(expected_file, delay_field_loading=True) # ds = Dataset(expected_file, 'r', format="NETCDF4") # found_vars = set(ds.variables.keys()) # ds.close() except FileNotFoundError: print(f'[ERROR] Expected file {expected_file} not found') rh.insert_failure(identifier, 'bad_output') failure_count += 1 continue else: output_vars = set(rad2.fields.keys()) print( '[INFO] Checking that the output variables match those in the input files' ) #print('expected vars = ', expected_vars) #print('output_vars = ', output_vars) #Checks that the variables in the nc file are identical to the variables in the input files #If not, create a failure identifier called bad_num if not expected_vars.issubset(output_vars): print( '[ERROR] Output variables are not the same as input files' f'{output_vars} != {expected_vars}') failure_count += 1 rh.insert_failure(identifier, 'bad_num') continue else: print( f'[INFO] All expected variable were found: {expected_vars}' ) # If all of the above is succesful, create a success identifier rh.insert_success(identifier)
def loop_over_files(args): params_index = args.params_index[0] params_file = f'{SETTINGS.PARAMS_FILE}{params_index}' input_files = args.files print("input_files= ", input_files) scan_type = args.scan_type[0] failure_count = 0 for ncfile in input_files: if failure_count >= SETTINGS.EXIT_AFTER_N_FAILURES: raise ValueError( '[WARN] Exiting after failure count reaches limit: ' f'{SETTINGS.EXIT_AFTER_N_FAILURES}') print("ncfile= ", ncfile) fname = os.path.basename(ncfile) ncdate = os.path.basename(ncfile).split('_')[2].replace('-', '') YYYY = ncdate[0:4] MM = ncdate[4:6] DD = ncdate[6:8] date = ncdate[0:8] rh = DataBaseHandler(table_name="apply_calib_rhi") identifier = f'{ncdate}' #If there is a success identifier, continue to next file in the loop result = rh.get_result(identifier) if rh.ran_successfully(identifier): print(f'[INFO] Already processed {ncdate} successfully') continue #If there is no success identifier then continue processing the file # Remove previous results for this file rh.delete_result(identifier) #Read input uncalibrated netcdf file and extract list of variables try: rad1 = pyart.io.read(ncfile, delay_field_loading=True) except IOError: print('[ERROR] Could not open file, {ncfile}') rh.insert_failure(identifier, 'failure') else: input_vars = rad1.fields.keys() #ds = Dataset(ncfile, 'r', format="NETCDF4") #input_vars = set(ds.variables.keys()) #ds.close() # Process the data script_cmd = f"RadxConvert -v -params {params_file} -f {ncfile}" print(f'[INFO] Running: {script_cmd}') if subprocess.call(script_cmd, shell=True) != 0: print('[ERROR] RadxConvert call resulted in an error') rh.insert_failure(identifier, 'failure') failure_count += 1 continue #this line looks for the file generated from uncalib_v1 in calib_v1. expected_file = f'{SETTINGS.OUTPUT_DIR}/{scan_type}/{date}/{fname}' #print expected_file print("[INFO] Checking that the output file has been produced.") #Read input uncalibrated netcdf file and extract list of variables try: rad2 = pyart.io.read(expected_file, delay_field_loading=True) except IOError: print(f'[ERROR] Expected file {expected_file} not found') rh.insert_failure(identifier, 'bad_output') failure_count += 1 continue else: output_vars = rad2.fields.keys() print(f'[INFO] Found expected file {expected_file}') print( f'[INFO] Checking that the output variables match those in the input files' ) #Checks that the variables in the calibrated nc file are identical to the variables in the uncalibrated input files #If not, create a failure identifier called bad_vars keys_not_found = [] for key in input_vars: if not key in output_vars: keys_not_found.append(key) if len(keys_not_found) > 0: print( '[ERROR] Output variables are not the same as input variables' f'{output_vars} != {input_vars}') failure_count += 1 rh.insert_failure(identifier, 'bad_vars') continue else: print(f'[INFO] All expected variable were found: {output_vars}') rh.insert_success(identifier)