Exemplo n.º 1
0
def make_hourly_files():

    rh = DataBaseHandler(table_name="process_hourly_zdr")

    #list only date directories
    inputdir = SETTINGS.ZDR_CALIB_DIR
    outdir = SETTINGS.ZDR_CALIB_DIR

    pattern = re.compile(r'(\d{8})')
    proc_dates = [x for x in os.listdir(inputdir) if pattern.match(x)]
    proc_dates.sort()

    #For each date where the vertical scans have already been processed,
    #we now want to calculate hourly values of melting layer height and ZDR

    for date in proc_dates[0:]:
        print(date)
        identifier = f'{date}'
        result = rh.get_result(identifier)
        #If there is no 'success' identifier and no 'not enough_rain' identifier, then the file hasn't been processed, so carry on to process the data
        if rh.ran_successfully(identifier) or result == 'not enough rain':
            print(f'[INFO] Already processed {date}')
        else:
            if calib_functions.calc_hourly_ML(outdir, date):
                rh.insert_success(identifier)
            else:
                rh.insert_failure(identifier, 'not enough rain')
Exemplo n.º 2
0
def process_volume_scans(args):
    """ 
    Processes the volume scans for each day with rain present, to calculate Z bias
    
    :param args: (namespace) Namespace object built from arguments parsed from command line
    """

    date = args.date[0]
    print('Processing ', date)
    day_dt = dp.parse(date)
    min_date = dp.parse(SETTINGS.MIN_START_DATE)
    max_date = dp.parse(SETTINGS.MAX_END_DATE)

    if day_dt < min_date or day_dt > max_date:
        raise ValueError(
            f'Date must be in range {SETTINGS.MIN_START_DATE} - {SETTINGS.MAX_END_DATE}'
        )

    #Directory for input radar data
    inputdir = SETTINGS.VOLUME_DIR

    #Directory for zdr_ml data
    zdrdir = SETTINGS.ZDR_CALIB_DIR

    #Directory for output calibration data
    outdir = SETTINGS.Z_CALIB_DIR
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    rh = DataBaseHandler(table_name="process_vol_scans")

    identifier = f'{date}'

    #If there is no success or a no_rays identifier, continue to process the data
    result = rh.get_result(identifier)
    if rh.ran_successfully(identifier) or result == 'no rays':
        print(f'[INFO] Already processed {date}')

    else:
        mlfile = f'{zdrdir}/{date}/hourly_ml_zdr.csv'
        if os.path.exists(mlfile):
            print("found ml file, processing data")
            ml_zdr = pd.read_csv(mlfile, index_col=0, parse_dates=True)
            raddir = os.path.join(inputdir, date)
            #print raddir, outdir, date
            if calib_functions.calibrate_day_att(raddir, outdir, date, ml_zdr):
                rh.insert_success(identifier)
                print("File successfully processed")
            else:
                rh.insert_failure(identifier, 'no suitable rays')
                print("No suitable rays")
        else:
            rh.insert_failure(identifier, 'no hourly ml file')
            print("No hourly ml file")
def process_vert_scans(args):

    """ 
    Processes all vertical scans for given day to extract values of ZDR bias and melting layer height
    
    :param args: (namespace) Namespace object built from arguments parsed from command line
    """

    plot=args.make_plots[0]
    day=args.date[0]
    YYYY, MM, DD = day[:4], day[4:6], day[6:8]
    day_dt = dp.parse(day)
    min_date = dp.parse(SETTINGS.MIN_START_DATE)
    max_date = dp.parse(SETTINGS.MAX_END_DATE)

    if day_dt < min_date or day_dt > max_date:
        raise ValueError(f'Date must be in range {SETTINGS.MIN_START_DATE} - {SETTINGS.MAX_END_DATE}')

    #Directory for input radar data
    raddir = SETTINGS.INPUT_DIR
    
    #Directory for weather station data
    wxdir = SETTINGS.WXDIR
    
    #Directory for processed data
    outdir = SETTINGS.ZDR_CALIB_DIR
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    
    rh = DataBaseHandler(table_name="process_vert_scans")
    
    #For given day of radar data, look to see if rain was observed by the weather station at the site. 
    #If yes, then process the vertical scans to calculate a value of ZDR and an estimate of the height of the melting layer. 
    #Save melting layer height and zdr values to file. Save a success file. 
    
    expected_file = f'{outdir}/{day}/day_ml_zdr.csv'
   
    identifier = f'{YYYY}.{MM}.{DD}'
 
    #If the file hasn't already been processed, or there is no rain or insufficient data, then carry on script to process the data
   
    result=rh.get_result(identifier) 
    if rh.ran_successfully(identifier) or result in ('no rain', 'insufficient data'):
        print(f'[INFO] Already processed {day}')
     
    else:
        #Construct the NOAA filename based on the date
        nfile = f'{wxdir}NOAA-{YYYY}-{MM}.txt'
            
        #Use pandas read_table to read the text file into a table to extract the rain amount
        data = pd.read_table(nfile, sep='\s+', header=6)
        #Set the index column
        data2 = data.set_index("DAY")
        #Extract rain amount for the current day
        rain = data2.loc[DD,"RAIN"]
            
        #If there was less than 1mm of rain, go to the next day
        if rain < 1.0 or np.isfinite(rain)==False:
            print('no rain')
            rh.insert_failure(identifier, 'no rain')
            #Otherwise process the day's data
        else:
            print('processing day ',day)
            if calib_functions.process_zdr_scans(outdir,raddir,day,expected_file,plot):       
                rh.insert_success(identifier)
            else:
                rh.insert_failure(identifier, 'insufficient data')
Exemplo n.º 4
0
def loop_over_hours(args):
    """
    Processes each file for each hour passed in the comand line arguments.

    :param args: (namespace) Namespace object built from attributes parsed
    from command line
    """

    scan_type = args.scan_type[0]
    hours = args.hours

    # error types are bad_num (different number of variables in raw vs nc)
    # failure (RadxConvert doesnt complete) and bad_output (no output file found)
    #rh = _get_results_handler(4, '.')
    rh = DataBaseHandler(table_name="convert_ele_results")

    failure_count = 0
    mapped_scan_type = _map_scan_type(scan_type)

    for hour in hours:

        print(f'[INFO] Processing: {hour}')

        input_files = _get_input_files(hour, scan_type)

        year, month, day = hour[:4], hour[4:6], hour[6:8]
        date = year + month + day

        for dbz_file in input_files:

            if failure_count >= SETTINGS.EXIT_AFTER_N_FAILURES:
                raise ValueError(
                    '[WARN] Exiting after failure count reaches limit: '
                    f'{SETTINGS.EXIT_AFTER_N_FAILURES}')

            fname = os.path.basename(dbz_file)
            input_dir = os.path.dirname(dbz_file)

            #This is the file identifier used in the database
            identifier = f'{year}.{month}.{day}.{os.path.splitext(fname)[0]}'

            # Check if this file has already been processed successfully
            #If yes, then go to the next iteration of the loop, i.e. next file
            if rh.ran_successfully(identifier):
                print(f'[INFO] Already ran {dbz_file} successfully')
                continue

            #If there is no success identifier then continue processing the file
            # Remove previous results for this file
            rh.delete_result(identifier)

            # Get expected variables
            fname_base = fname[:16]
            time_digits = fname[8:14]

            pattern = f'{input_dir}/{fname_base}*.{scan_type}'
            expected_vars = set([
                os.path.splitext(os.path.basename(name)[16:])[0]
                for name in glob.glob(pattern)
            ])

            # 'Process the uncalibrated data' (where output is generated)
            script_cmd = f"RadxConvert -v -params {SETTINGS.PARAMS_FILE} -f {dbz_file}"
            print(f'[INFO] Running: {script_cmd}')
            #If RadxConvert fails, create a failure outcome in the database
            if subprocess.call(script_cmd, shell=True) != 0:
                print('[ERROR] RadxConvert call resulted in an error')
                rh.insert_failure(identifier, 'failure')
                failure_count += 1
                continue

            # Check for expected netcdf output
            scan_dir_name = None

            if mapped_scan_type == 'VER':
                scan_dir_name = 'vert'
            else:
                scan_dir_name = mapped_scan_type.lower()

            # This should probably be a default path that is formatted
            expected_file = f'{SETTINGS.OUTPUT_DIR}/{scan_dir_name}/{date}/' \
                            f'ncas-mobile-x-band-radar-1_sandwith_{date}-{time_digits}_{mapped_scan_type}_v1.nc'

            # Read netcdf file to find variables
            # If the file can't be found, create a bad_output failure identifier
            #found_vars = None
            try:
                rad2 = pyart.io.read(expected_file, delay_field_loading=True)


#                ds = Dataset(expected_file, 'r', format="NETCDF4")
#                found_vars = set(ds.variables.keys())
#                ds.close()
            except FileNotFoundError:
                print(f'[ERROR] Expected file {expected_file} not found')
                rh.insert_failure(identifier, 'bad_output')
                failure_count += 1
                continue
            else:
                output_vars = set(rad2.fields.keys())

            print(
                '[INFO] Checking that the output variables match those in the input files'
            )
            #print('expected vars = ', expected_vars)
            #print('output_vars = ', output_vars)

            #Checks that the variables in the nc file are identical to the variables in the input files
            #If not, create a failure identifier called bad_num
            if not expected_vars.issubset(output_vars):
                print(
                    '[ERROR] Output variables are not the same as input files'
                    f'{output_vars} != {expected_vars}')
                failure_count += 1
                rh.insert_failure(identifier, 'bad_num')
                continue
            else:
                print(
                    f'[INFO] All expected variable were found: {expected_vars}'
                )

            # If all of the above is succesful, create a success identifier
            rh.insert_success(identifier)
def loop_over_files(args):

    params_index = args.params_index[0]
    params_file = f'{SETTINGS.PARAMS_FILE}{params_index}'
    input_files = args.files
    print("input_files= ", input_files)
    scan_type = args.scan_type[0]

    failure_count = 0

    for ncfile in input_files:

        if failure_count >= SETTINGS.EXIT_AFTER_N_FAILURES:
            raise ValueError(
                '[WARN] Exiting after failure count reaches limit: '
                f'{SETTINGS.EXIT_AFTER_N_FAILURES}')

        print("ncfile= ", ncfile)
        fname = os.path.basename(ncfile)
        ncdate = os.path.basename(ncfile).split('_')[2].replace('-', '')

        YYYY = ncdate[0:4]
        MM = ncdate[4:6]
        DD = ncdate[6:8]
        date = ncdate[0:8]

        rh = DataBaseHandler(table_name="apply_calib_rhi")
        identifier = f'{ncdate}'

        #If there is a success identifier, continue to next file in the loop
        result = rh.get_result(identifier)
        if rh.ran_successfully(identifier):
            print(f'[INFO] Already processed {ncdate} successfully')
            continue
        #If there is no success identifier then continue processing the file
        # Remove previous results for this file
        rh.delete_result(identifier)

        #Read input uncalibrated netcdf file and extract list of variables
        try:
            rad1 = pyart.io.read(ncfile, delay_field_loading=True)
        except IOError:
            print('[ERROR] Could not open file, {ncfile}')
            rh.insert_failure(identifier, 'failure')
        else:
            input_vars = rad1.fields.keys()
            #ds = Dataset(ncfile, 'r', format="NETCDF4")
            #input_vars = set(ds.variables.keys())
            #ds.close()

        # Process the data
        script_cmd = f"RadxConvert -v -params {params_file} -f {ncfile}"
        print(f'[INFO] Running: {script_cmd}')
        if subprocess.call(script_cmd, shell=True) != 0:
            print('[ERROR] RadxConvert call resulted in an error')
            rh.insert_failure(identifier, 'failure')
            failure_count += 1
            continue

        #this line looks for the file generated from uncalib_v1 in calib_v1.
        expected_file = f'{SETTINGS.OUTPUT_DIR}/{scan_type}/{date}/{fname}'

        #print expected_file
        print("[INFO] Checking that the output file has been produced.")
        #Read input uncalibrated netcdf file and extract list of variables
        try:
            rad2 = pyart.io.read(expected_file, delay_field_loading=True)
        except IOError:
            print(f'[ERROR] Expected file {expected_file} not found')
            rh.insert_failure(identifier, 'bad_output')
            failure_count += 1
            continue
        else:
            output_vars = rad2.fields.keys()

        print(f'[INFO] Found expected file {expected_file}')

        print(
            f'[INFO] Checking that the output variables match those in the input files'
        )
        #Checks that the variables in the calibrated nc file are identical to the variables in the uncalibrated input files
        #If not, create a failure identifier called bad_vars
        keys_not_found = []
        for key in input_vars:
            if not key in output_vars:
                keys_not_found.append(key)

        if len(keys_not_found) > 0:
            print(
                '[ERROR] Output variables are not the same as input variables'
                f'{output_vars} != {input_vars}')
            failure_count += 1
            rh.insert_failure(identifier, 'bad_vars')
            continue
        else:
            print(f'[INFO] All expected variable were found: {output_vars}')

        rh.insert_success(identifier)