def gdb(gdb_file, select_table='all', input_fields=None): """ Reads the contents of a gdb table into a Pandas dataframe""" import ogr # Initialize file driver = ogr.GetDriverByName("OpenFileGDB") gdb_obj = driver.Open(gdb_file) # parsing layers by index tables = {gdb_obj.GetLayerByIndex(i).GetName(): i for i in range(gdb_obj.GetLayerCount())} table_names = sorted(tables.keys()) if select_table == 'all' else [select_table] for table_name in table_names: table = gdb_obj.GetLayer(tables[table_name]) table_def = table.GetLayerDefn() table_fields = [table_def.GetFieldDefn(i).GetName() for i in range(table_def.GetFieldCount())] if input_fields is None: input_fields = table_fields else: missing_fields = set(input_fields) - set(table_fields) if any(missing_fields): report("Fields {} not found in table {}".format(", ".join(missing_fields), table_name)) input_fields = [field for field in input_fields if field not in missing_fields] data = np.array([[row.GetField(f) for f in input_fields] for row in table]) df = pd.DataFrame(data=data, columns=input_fields) if select_table != 'all': return df else: yield table_name, df
def parse(self, field, data): try: return FieldParser.parse(self, field, data) except ValueError as e: report(e) # raise e return None
def main(): fields.expand('horizon', max_horizons) region_filter = None class_filter = [200, 210, 211] for i, region, class_num, class_name, scenarios in get_scenarios( region_filter, class_filter): report(f"Working on Region {region} {class_name}...") selection = report_region(scenarios, region, class_name, class_num) write.selected_scenarios(selection, i == 0)
def main(): years = range(2015, 2020) # range(2010, 2016) overwrite_raster = True overwrite_combos = False regions = ['07'] + list(nhd_regions) for region in regions: nhd_raster = nhd_raster_path.format(vpus_nhd[region], region) arcpy.env.snapRaster = nhd_raster arcpy.env.mask = nhd_raster for year in years: print(region, year) cdl_raster = cdl_path.format(year) combined_raster = combined_raster_path.format(region, year) + "test" combinations_table = combo_path.format(region, year) if overwrite_raster or not os.path.exists(combined_raster): report("Performing raster overlay for Region {}, {}...".format(region, year)) try: overlay_rasters(combined_raster, cdl_raster, nhd_raster) report(f"Combined raster saved to {combined_raster}") except Exception as e: raise e if overwrite_combos or not os.path.exists(combinations_table): report("Building combinations table for Region {}, {}...".format(region, year)) try: combos = generate_combos(combined_raster, year) combos.to_csv(combinations_table, index=None) except Exception as e: raise e
def pwc_outfile(class_num=None, class_name=None, path=None): """ Read a PWC output file (BatchOutputVVWM.txt) into a dataframe :param class_num: Numerical class ID (str, int) :param class_name: Descriptive class name (str) :param path: Override the default input path (optional, str) :return: Dataframe of PWC output (df) """ pwc_header = ['line_num', 'run_id'] + pwc_durations tables = [] if path is None: path = pwc_outfile_path for i in range(10): for koc in kocs: p = os.path.join(path.format(class_num, class_name, i, koc), 'BatchOutputVVWM.txt') try: new_table = pd.read_csv(p, names=pwc_header, delimiter=r'\s+') tables.append(new_table) report(f"Read file {p}") except FileNotFoundError: break table = pd.concat(tables, axis=0) # Adjust line number so that header is not included table['line_num'] = table.line_num.astype(np.int32) - 1 # Split the Batch Run ID field into constituent parts data = table.pop('run_id').str.split('_', expand=True) data.columns = ['bunk', 'koc', 'scenario_id', 'rep'] data['koc'] = data.koc.str.slice(3).astype(np.int32) table = pd.concat([data, table], axis=1) table = table.melt( id_vars=[f for f in table.columns if f not in pwc_durations], value_vars=pwc_durations, var_name='duration', value_name='conc') return table
def pwc_infile(class_num, class_name, path=None, fixed_base=None): """ Read the tabular scenarios that were used to parameterize the PWC run for a given crop :param class_num: Numerical class ID (str, int) :param class_name: Descriptive class name (str) :param path: Override the default input path (optional, str) :param fixed_base: Override the default table name (optional, str) :return: Pandas dataframe of the PWC input scenarios """ if path is None: path = pwc_outfile_path # "{}_Corn_all_{}_koc{}" tables = [] # Look for all chunked tables (so far the number of chunks has never exceeded 10) for i in range(10): try: # Read the input table from the koc10 folder. It should be identical in all folders p = path.format(class_num, class_name, i, 10) if fixed_base is None: base = os.path.basename(p).replace("_koc10", ".csv") else: base = fixed_base new_table = pd.read_csv(os.path.join(p, base), dtype={'area': np.int64}) tables.append(new_table) report(f"Read file {p}") except FileNotFoundError as e: break if tables: # Join together all the chunks table = pd.concat(tables, axis=0) table['region'] = table.region.astype('str').str.zfill(2) return table else: print(f"No infiles found for {class_name}") return None
def chunk_combinations(combos): """ Break the master combinations table into smaller chunks to avoid memory overflow. :param combos: Master scenarios table (df) """ from parameters import chunk_size n_combinations = combos.shape[0] n_chunks = int(n_combinations / chunk_size) + 1 if n_combinations > chunk_size: report( f"Breaking {n_combinations} combinations into {n_chunks} chunks", 1) for i, start_row in enumerate(range(0, n_combinations, chunk_size)): end_row = min((start_row + chunk_size, n_combinations)) report(f"Processing chunk {i + 1}...", 2) chunk = combos.iloc[start_row:end_row] yield i + 1, chunk else: report("Processing all combinations...", 2) yield 1, combos
def scenarios_and_recipes(regions, years, mode, class_filter=None): """ Main program routine. Creates scenario and recipe (if applicable) files for specified NHD Plus Hydroregions and years. Years and regions provided must have corresponding input data. Specify paths to input data in paths.py Mode may be either 'sam' or 'pwc'. In 'sam' mode, recipes are created and aggregations are performed. In 'pwc' mode, different output files are created :param regions: NHD Plus Hydroregions to process (list of strings) :param years: Years to process (list of integers) :param mode: 'sam' or 'pwc' """ report("Reading input files...") # Read and modify data indexed to weather grid report("Reading weather-indexed data...") met_params = read.met() met_params = modify.met(met_params) # Read crop related params. This has multiple functions since data are differently indexed report("Reading crop data...") crop_params = read.crop() crop_dates = read.crop_dates() irrigation = read.irrigation() # Read and modify data indexed to soil report("Reading soils data...") soil_params = read.soil() soil_params, aggregation_key = modify.soils(soil_params, mode) # Create a filter if only processing certain crops if class_filter is not None: class_filter = pd.DataFrame({pwc_selection_field: class_filter}) # Soils, watersheds and combinations are broken up by NHD region for region in regions: report("Processing Region {}...".format(region)) # Read curve numbers curve_numbers = read.curve_numbers(region) # Read and modify met/crop/land cover/soil/watershed combinations report("Reading combinations...") combinations = read.combinations(region, years) report("Processing combinations...") combinations = modify.combinations(combinations, crop_params, mode, aggregation_key) # Generate watershed 'recipes' for SAM and aggregate combinations after recipe fields removed if mode == 'sam': report( f"Creating watershed recipes and aggregating combinations...", 1) watershed_params = pd.read_csv( condensed_nhd_path.format(region))[['gridcode', 'comid']] recipes, recipe_map, combinations = create_recipes( combinations, watershed_params) write.recipes(region, recipes, recipe_map) # Create and modify scenarios, and write to file report(f"Creating scenarios...", 1) if mode == 'sam': # Because SAM datasets do not exclude any scenarios, break into pieces to avoid memory overload for chunk_num, chunk in chunk_combinations(combinations): scenarios = create_scenarios(chunk, soil_params, met_params, crop_params, crop_dates, irrigation, curve_numbers) # Filter out only the desired crop, if a filter is specified if class_filter is not None: scenarios = scenarios.merge(class_filter, on=pwc_selection_field, how='inner') if scenarios.empty: continue scenarios = modify.scenarios(scenarios, mode, region, write_qc=False) report("Writing to file...", 2) write.scenarios(scenarios, mode, region, name=chunk_num) elif mode == 'pwc': scenarios = create_scenarios(combinations, soil_params, met_params, crop_params, crop_dates, irrigation, curve_numbers) scenarios = modify.scenarios(scenarios, mode, region) # For PWC, apply sampling and write crop-specific tables for crop_num, crop_name, crop_scenarios in select_pwc_scenarios( scenarios, crop_params): if crop_num in (200, 210, 211): report( "Writing table for Region {} {}...".format( region, crop_name), 2) write.scenarios(crop_scenarios, mode, region, name=crop_name, num=crop_num)
def check_raster_RAT(raster): if not raster.hasRAT: report("Building RAT for {}".format(raster.catalogPath), 1) arcpy.BuildRasterAttributeTable_management(raster)
def scenarios(in_scenarios, mode, region, write_qc=True): """ Modify a table of field scenario parameters. This is primarly for computing parameters that are linked to multiple indices (e.g., land cover and soil). The major functions here include the assignment of runoff curve numbers, setting root and evaporation depth, and performing QAQC. QAQC parameters are specified in fields_and_qc.csv. :param in_scenarios: Input scenarios table (df) :param mode: 'sam' or 'pwc' :param region: NHD Plus region (str) :param write_qc: Write the results of the QAQC to file (bool) :return: Modified scenarios table (df) """ from parameters import anetd # Assigns 'cover' and 'fallow' curve numbers for each scenario based on hydrologic soil group in_scenarios['cn_cov'] = in_scenarios['cn_fal'] = -1. # Do cultivated crops, then non-cultivated crops for cultivated, col in enumerate(('non-cultivated', 'cultivated')): # Convert from HSG number (hydro_group) to letter # For drained soils, fallow is set to D condition for hsg_num, hsg_letter in enumerate(hydro_soil_group[col]): sel = (in_scenarios.hydro_group == hsg_num + 1) & (in_scenarios.cultivated == cultivated) in_scenarios.loc[sel, 'cn_cov'] = in_scenarios.loc[ sel, f'cn_cov_{hsg_letter}'] in_scenarios.loc[sel, 'cn_fal'] = in_scenarios.loc[ sel, f'cn_fal_{hsg_letter}'] # Calculate max irrigation rate by the USDA curve number method in_scenarios['max_irrigation'] = 0.2 * ( ((2540. / in_scenarios.cn_cov) - 25.4)) # cm # Ensure that root and evaporation depths are 0.5 cm or more shallower than soil depth in_scenarios['root_depth'] = \ np.minimum(in_scenarios.root_zone_max.values - 0.5, in_scenarios.max_root_depth) in_scenarios['evaporation_depth'] = \ np.minimum(in_scenarios.root_zone_max.values - 0.5, anetd) # Choose output fields and perform data correction report("Performing data correction...", 3) fields.refresh() in_scenarios = in_scenarios.reset_index() if mode == 'pwc': qc_table = fields.perform_qc( in_scenarios[fields.fetch('pwc_qc')]).copy() index_cols = in_scenarios[['scenario_id', pwc_selection_field]] in_scenarios = in_scenarios[qc_table.max(axis=1) < 2] fields.expand('horizon', max_horizons) else: fields.expand("depth_weight", depth_bins) in_scenarios = in_scenarios[fields.fetch('sam_scenario')] qc_table = fields.perform_qc(in_scenarios) in_scenarios = in_scenarios.mask(qc_table == 2, fields.fill(), axis=1) if write_qc: qc_table = pd.concat([index_cols, qc_table], axis=1) write.qc_report(region, mode, qc_table) if mode == 'pwc': in_scenarios = in_scenarios[~in_scenarios.sam_only.fillna(0). astype(bool)] return in_scenarios[fields.fetch(mode + '_scenario')]