Пример #1
0
def main():
    # parser = argparse.ArgumentParser(description='Check the current revision of the local NHMparamdb against GIT')
    # parser.add_argument('-u', '--update', help='Name of new job directory', store_action=True)
    #
    # args = parser.parse_args()

    config = bc.Cfg('bandit.cfg')

    # Get NHMparamdb version that is currently used for the merged parameter database
    with open('{}/00-REVISION'.format(config.merged_paramdb_dir), 'r') as fhdl:
        m_rev = fhdl.readline().strip()

    # Get the current available revision for the NhmParamDb
    c_rev = git_version(config.paramdb_dir)

    if m_rev != c_rev:
        print('A newer version of the NhmParamDb is available.')

        print('  NhmParamDb revision used by bandit: {}'.format(m_rev))
        print('NhmParamDb revision available on GIT: {}'.format(c_rev))
        print('\nTo update the NhmParamDb first change into directory: {}'.
              format(config.paramdb_dir))
        print("Then type 'git pull'")
        print('After the update is completed change into directory: {}'.format(
            os.getcwd()))
        print("Then type 'create_merged_db'")
    else:
        print('NhmParamDb is up-to-date.')
Пример #2
0
def main():
    parser = argparse.ArgumentParser(
        description='Setup new job for Bandit extraction')
    parser.add_argument('-c',
                        '--config',
                        help='Name of configuration file',
                        nargs='?',
                        default='bandit.cfg',
                        type=str)
    parser.add_argument('jobdir', help='Name of new job directory')

    args = parser.parse_args()

    config = bc.Cfg(args.config)

    print('Creating new job for Bandit extraction')
    print('Config file: {}'.format(args.config))

    # Check that the various required directories and files defined in bandit.cfg exist
    if os.path.splitext(config.cbh_dir)[1] == '.nc':
        print('INFO: Using netCDF format for CBH files')
        cbh_dir_tmp = os.path.split(config.cbh_dir)[0]
    else:
        cbh_dir_tmp = config.cbh_dir

    if not os.path.exists(cbh_dir_tmp):
        print("Location of the CBH files (cbh_dir) does not exist!")
        exit(2)
    elif not os.path.exists(config.paramdb_dir):
        print(
            "Location of the NHM parameter database (paramdb_dir) does not exist!"
        )
        exit(2)
    elif not os.path.exists(config.merged_paramdb_dir):
        print(
            "Location of the merged parameters database (merged_paramdb_dir) does not exist!"
        )
        exit(2)
    elif not os.path.exists(config.geodatabase_filename):
        print("The geodatabase file (geodatabase_filename) does not exist!")
        exit(2)
    elif not os.path.exists(config.output_dir):
        print("The main jobs directory (output_dir) does not exist!")
        exit(2)

    # Define the path to the new job directory
    tl_jobsdir = config.output_dir

    # check for / create output directory
    new_job_dir = '{}/{}'.format(tl_jobsdir, args.jobdir)

    try:
        os.mkdir(new_job_dir)
        print('\tJob directory created: {}'.format(new_job_dir))
    except OSError as err:
        if err.errno == 2:
            print('\tThe top-level jobs directory does not exist: {}'.format(
                tl_jobsdir))
            exit(2)
        elif err.errno == 13:
            print('\tYou have insufficient privileges to create: {}'.format(
                new_job_dir))
            exit(2)
        elif err.errno == 17:
            print('\tFile/Directory already exists: {}'.format(new_job_dir))
            print(
                '\tNew bandit.cfg and control.default files will be copied here.'
            )
        else:
            print('\tOther error')
            print(err)
            raise

    # Copy bandit.cfg to job directory
    print('\tCreating bandit.cfg file for new job')
    config.update_value('output_dir', new_job_dir)
    config.write('{}/bandit.cfg'.format(new_job_dir))

    # Copy the control.default file to the job directory
    print('\tCopying control.default to new job')
    shutil.copy('{}/control.default'.format(tl_jobsdir),
                '{}/control.default'.format(new_job_dir))

    print('\nNew job directory has been created.')
    print(
        'Make sure to update outlets, cutoffs, and hru_noroute parameters as needed in bandit.cfg '
        + 'before running bandit.')
Пример #3
0
def main():
    parser = argparse.ArgumentParser(
        description='Setup new job for Bandit extraction')
    parser.add_argument('-c',
                        '--config',
                        help='Name of configuration file',
                        nargs='?',
                        default='bandit.cfg',
                        type=str)

    args = parser.parse_args()

    config = bc.Cfg(args.config)

    paramdb_dir = config.paramdb_dir
    merged_paramdb_dir = config.merged_paramdb_dir

    # check for / create output directory
    try:
        os.makedirs(merged_paramdb_dir)
        print('Creating directory for merged parameters: {}'.format(
            merged_paramdb_dir))
    except OSError:
        print("\tUsing existing directory for merged parameters: {}".format(
            merged_paramdb_dir))

    # Write the git revision number of the NhmParamDb repo to a file in the merged params directory
    with open('{}/00-REVISION'.format(merged_paramdb_dir), 'w') as fhdl:
        fhdl.write('{}\n'.format(git_version(paramdb_dir)))

    # Create NhmParamDb object and retrieve the parameters
    pdb = ParamDbRegion(paramdb_dir, verbose=True, verify=True)
    param_info = pdb.available_parameters

    # Overwrite the data for tosegment and hru_segment with their respective
    # NHM counterparts.
    pdb.parameters['tosegment'].data = pdb.parameters['tosegment_nhm'].data
    pdb.parameters['hru_segment'].data = pdb.parameters['hru_segment_nhm'].data

    # =======================================================================
    # Process all the parameters, skipping special-handling cases
    for pp in param_info:
        sys.stdout.write('\r                                       ')
        sys.stdout.write('\rProcessing {}'.format(pp))
        sys.stdout.flush()

        cparam = pdb.parameters.get(pp)

        with open('{}/{}.csv'.format(merged_paramdb_dir, pp), 'w') as ff:
            ff.write(cparam.toparamdb())

        # write the serialized param to a file
        # with open('{}/{}.msgpack'.format(merged_paramdb_dir, pp), 'wb') as ff:
        #     msgpack.dump(cparam.tostructure(), ff)

    # Write the parameters.xml and dimensions.xml files to the merged_db directory
    pdb.write_parameters_xml(merged_paramdb_dir)
    pdb.write_dimensions_xml(merged_paramdb_dir)

    # =======================================================================
    # Lastly there are a few non-parameter mapping variables that are needed
    # during the checkout process. It's easier/faster to create them once
    # here rather than create them on the fly during checkout.

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Process nhm_seg related mappings
    # pp = 'nhm_seg'
    sys.stdout.write('\r                                       ')
    sys.stdout.write('\rProcessing segment mappings')
    sys.stdout.flush()

    # write the serialized segment mappings to a file
    # with open('{}/segment_nhm_to_local.msgpack'.format(merged_paramdb_dir), 'wb') as ff:
    #     msgpack.dump(segment_nhm_to_local, ff)

    # with open('{}/segment_nhm_to_region.msgpack'.format(merged_paramdb_dir), 'wb') as ff:
    #     msgpack.dump(pdb.segment_nhm_to_region, ff)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Process nhm_id related mappings
    sys.stdout.write('\r                                       ')
    sys.stdout.write('\rProcessing hru mappings')
Пример #4
0
def main():
    # Command line arguments
    parser = argparse.ArgumentParser(
        description='Extract model subsets from the National Hydrologic Model')
    parser.add_argument('-O',
                        '--output_dir',
                        help='Output directory for subset')
    parser.add_argument('-p',
                        '--param_filename',
                        help='Name of output parameter file')
    parser.add_argument('-s',
                        '--streamflow_filename',
                        help='Name of streamflow data file')
    parser.add_argument('-P',
                        '--paramdb_dir',
                        help='Location of parameter database')
    parser.add_argument('-M',
                        '--merged_paramdb_dir',
                        help='Location of merged parameter database')
    parser.add_argument('-C', '--cbh_dir', help='Location of CBH files')
    parser.add_argument('-g',
                        '--geodatabase_filename',
                        help='Full path to NHM geodatabase')
    parser.add_argument('-j', '--job', help='Job directory to work in')
    parser.add_argument('-v',
                        '--verbose',
                        help='Output additional information',
                        action='store_true')
    parser.add_argument('--check_DAG',
                        help='Verify the streamflow network',
                        action='store_true')
    parser.add_argument('--output_cbh',
                        help='Output CBH files for subset',
                        action='store_true')
    parser.add_argument('--output_shapefiles',
                        help='Output shapefiles for subset',
                        action='store_true')
    parser.add_argument('--output_streamflow',
                        help='Output streamflows for subset',
                        action='store_true')
    parser.add_argument('--cbh_netcdf',
                        help='Enable netCDF output for CBH files',
                        action='store_true')
    parser.add_argument('--param_netcdf',
                        help='Enable netCDF output for parameter file',
                        action='store_true')
    parser.add_argument(
        '--add_gages',
        metavar="KEY=VALUE",
        nargs='+',
        help=
        'Add arbitrary streamgages to POIs of form gage_id=segment. Segment must exist in the model subset. Additional streamgages are marked as poi_type=0.'
    )
    parser.add_argument(
        '--no_filter_params',
        help='Output all parameters regardless of modules selected',
        action='store_true')
    args = parser.parse_args()

    stdir = os.getcwd()

    # TODO: Add to command line arguments
    single_poi = False

    if args.job:
        if os.path.exists(args.job):
            # Change into job directory before running extraction
            os.chdir(args.job)
            # print('Working in directory: {}'.format(args.job))
        else:
            print('ERROR: Invalid jobs directory: {}'.format(args.job))
            exit(-1)

    # Setup the logging
    bandit_log = logging.getLogger('bandit')
    bandit_log.setLevel(logging.DEBUG)

    log_fmt = logging.Formatter('%(levelname)s: %(name)s: %(message)s')

    # Handler for file logs
    flog = logging.FileHandler('bandit.log')
    flog.setLevel(logging.DEBUG)
    flog.setFormatter(log_fmt)

    # Handler for console logs
    clog = logging.StreamHandler()
    clog.setLevel(logging.ERROR)
    clog.setFormatter(log_fmt)

    bandit_log.addHandler(flog)
    bandit_log.addHandler(clog)

    bandit_log.info('========== START {} =========='.format(
        datetime.datetime.now().isoformat()))

    addl_gages = None
    if args.add_gages:
        addl_gages = parse_gages(args.add_gages)
        bandit_log.info('Additionals streamgages specified on command line')

    config = bc.Cfg('bandit.cfg')

    # Override configuration variables with any command line parameters
    for kk, vv in iteritems(args.__dict__):
        if kk not in [
                'job', 'verbose', 'cbh_netcdf', 'add_gages', 'param_netcdf',
                'no_filter_params'
        ]:
            if vv:
                bandit_log.info(
                    'Overriding configuration for {} with {}'.format(kk, vv))
                config.update_value(kk, vv)

    # Where to output the subset
    outdir = config.output_dir

    # The control file to use
    control_filename = config.control_filename

    # What to name the output parameter file
    param_filename = config.param_filename

    # Location of the NHM parameter database
    paramdb_dir = config.paramdb_dir

    # Location of the merged parameter database
    merged_paramdb_dir = config.merged_paramdb_dir

    streamgage_file = config.streamgage_file

    # List of outlets
    # dsmost_seg = config.outlets

    # List of upstream cutoffs
    # uscutoff_seg = config.cutoffs

    # List of additional HRUs (have no route to segment within subset)
    # hru_noroute = config.hru_noroute

    # List of output variables to sbuset
    try:
        include_model_output = config.include_model_output
        output_vars_dir = config.output_vars_dir
        output_vars = config.output_vars
    except KeyError:
        include_model_output = False

    # Control what is checked and output for subset
    check_dag = config.check_DAG

    try:
        output_cbh = config.output_cbh

        # Location of the NHM CBH files
        cbh_dir = config.cbh_dir
    except KeyError:
        output_cbh = False

    try:
        output_streamflow = config.output_streamflow

        # What to name the streamflow output file
        obs_filename = config.streamflow_filename
    except KeyError:
        output_streamflow = False

    try:
        output_shapefiles = config.output_shapefiles

        # Full path and filename to the geodatabase to use for outputting shapefile subsets
        geo_file = config.geodatabase_filename
    except KeyError:
        output_shapefiles = False

    # Load the control file
    ctl = ControlFile(control_filename)

    if ctl.has_dynamic_parameters:
        if config.dyn_params_dir:
            if os.path.exists(config.dyn_params_dir):
                dyn_params_dir = config.dyn_params_dir
            else:
                bandit_log.error('dyn_params_dir: {}, does not exist.'.format(
                    config.dyn_params_dir))
                exit(2)
        else:
            bandit_log.error(
                'Control file has dynamic parameters but dyn_params_dir is not specified in the config file'
            )
            exit(2)

    # Load master list of valid parameters
    vpdb = ValidParams()

    # Build list of parameters required for the selected control file modules
    required_params = vpdb.get_params_for_modules(modules=ctl.modules.values())

    # TODO: make sure dynamic parameter filenames are correct
    # Write an updated control file
    # ctl.write('somefile')

    # Date range for pulling NWIS streamgage observations
    if isinstance(config.start_date, datetime.date):
        st_date = config.start_date
    else:
        st_date = datetime.datetime(
            *[int(x) for x in re.split('-| |:', config.start_date)])

    if isinstance(config.end_date, datetime.date):
        en_date = config.end_date
    else:
        en_date = datetime.datetime(
            *[int(x) for x in re.split('-| |:', config.end_date)])

    # ===============================================================
    params_file = '{}/{}'.format(merged_paramdb_dir, PARAMETERS_XML)

    # Output revision of NhmParamDb and the revision used by merged paramdb
    nhmparamdb_revision = git_version(paramdb_dir)
    bandit_log.info('Parameters based on NhmParamDb revision: {}'.format(
        nhmparamdb_revision))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Read hru_nhm_to_local and hru_nhm_to_region
    # Create segment_nhm_to_local and segment_nhm_to_region

    # TODO: since hru_nhm_to_region and nhru_nhm_to_local are only needed for
    #       CBH files we should 'soft-fail' if the files are missing and just
    #       output a warning and turn off CBH output if it was selected.
    # hru_nhm_to_region = get_parameter('{}/hru_nhm_to_region.msgpack'.format(cbh_dir))
    # hru_nhm_to_local = get_parameter('{}/hru_nhm_to_local.msgpack'.format(cbh_dir))

    # Load the NHMparamdb
    print('Loading NHM ParamDb')
    pdb = ParamDb(merged_paramdb_dir)
    nhm_params = pdb.parameters
    nhm_global_dimensions = pdb.dimensions

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Get tosegment_nhm
    # NOTE: tosegment is now tosegment_nhm and the regional tosegment is gone.
    tosegment = nhm_params.get('tosegment').data
    nhm_seg = nhm_params.get('nhm_seg').data

    if args.verbose:
        print('Generating stream network from tosegment_nhm')

    # Build the stream network
    dag_ds = nx.DiGraph()
    for ii, vv in enumerate(tosegment):
        #     dag_ds.add_edge(ii+1, vv)
        if vv == 0:
            dag_ds.add_edge(ii + 1, 'Out_{}'.format(ii + 1))
        else:
            dag_ds.add_edge(ii + 1, vv)

    # nx.draw_networkx(dag_ds)
    bandit_log.debug('Number of NHM downstream nodes: {}'.format(
        dag_ds.number_of_nodes()))
    bandit_log.debug('Number of NHM downstream edges: {}'.format(
        dag_ds.number_of_edges()))

    if check_dag:
        if not nx.is_directed_acyclic_graph(dag_ds):
            bandit_log.error('Cycles and/or loops found in stream network')

            for xx in nx.simple_cycles(dag_ds):
                bandit_log.error('Cycle found for segment {}'.format(xx))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Build dictionary which maps poi_gage_id to poi_gage_segment
    # poi_gage_segment_tmp = get_parameter('{}/poi_gage_segment.msgpack'.format(merged_paramdb_dir))['data']
    # poi_gage_id_tmp = get_parameter('{}/poi_gage_id.msgpack'.format(merged_paramdb_dir))['data']
    poi_gage_segment_tmp = nhm_params.get('poi_gage_segment').data
    poi_gage_id_tmp = nhm_params.get('poi_gage_id').data

    # Create dictionary to lookup nhm_segment for a given poi_gage_id
    poi_id_to_seg = dict(zip(poi_gage_id_tmp, poi_gage_segment_tmp))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Read streamgage ids from file - one streamgage id per row
    with open(streamgage_file, 'r') as fhdl:
        streamgages = fhdl.read().splitlines()

    # =====================================
    # dag_ds should not change below here
    # For each streamgage:
    #   1) lookup nhm_segment (if any) and use as outlet
    #   2) create output directory
    #   3) subset the stream network, HRUs, params, etc

    uscutoff_seg = []

    for sg in streamgages:
        print('Working on streamgage {}'.format(sg))

        while True:
            # Create the upstream graph
            dag_us = dag_ds.reverse()
            bandit_log.debug('Number of NHM upstream nodes: {}'.format(
                dag_us.number_of_nodes()))
            bandit_log.debug('Number of NHM upstream edges: {}'.format(
                dag_us.number_of_edges()))

            # Trim the u/s graph to remove segments above the u/s cutoff segments
            try:
                for xx in uscutoff_seg:
                    try:
                        dag_us.remove_nodes_from(
                            nx.dfs_predecessors(dag_us, xx))

                        # Also remove the cutoff segment itself
                        dag_us.remove_node(xx)
                    except KeyError:
                        print(
                            'WARNING: nhm_segment {} does not exist in stream network'
                            .format(xx))
            except TypeError:
                bandit_log.error(
                    '\nSelected cutoffs should at least be an empty list instead of NoneType. ({})'
                    .format(outdir))
                exit(200)

            bandit_log.debug(
                'Number of NHM upstream nodes (trimmed): {}'.format(
                    dag_us.number_of_nodes()))
            bandit_log.debug(
                'Number of NHM upstream edges (trimmed): {}'.format(
                    dag_us.number_of_edges()))

            # Lookup the outlet for the current streamgage
            try:
                dsmost_seg = [poi_id_to_seg[sg]]

                if dsmost_seg[0] == 0:
                    # POI stream segment was never properly assigned in paramdb
                    bandit_log.error(
                        'Streamgage {} has segment = 0. Skipping.'.format(sg))
                    break
                elif len(dsmost_seg) > 1:
                    # Should never have more than one segment per streamgage
                    bandit_log.info(
                        'Streamgage {} has more than one stream segment.'.
                        format(sg))
                    break
            except KeyError:
                bandit_log.error(
                    'Streamgage {} does not exist in poi_gage_id'.format(sg))
                break

            sg_dir = '{}/{}'.format(outdir, sg)

            try:
                os.makedirs(sg_dir)
            except OSError as exception:
                if exception.errno != errno.EEXIST:
                    raise
                else:
                    pass

            # =======================================
            # Given a d/s segment (dsmost_seg) create a subset of u/s segments
            if args.verbose:
                print('\tExtracting model subset')

            # Get all unique segments u/s of the starting segment
            uniq_seg_us = set()
            if dsmost_seg:
                for xx in dsmost_seg:
                    try:
                        pred = nx.dfs_predecessors(dag_us, xx)
                        uniq_seg_us = uniq_seg_us.union(
                            set(pred.keys()).union(set(pred.values())))
                    except KeyError:
                        bandit_log.error(
                            'KeyError: Segment {} does not exist in stream network'
                            .format(xx))
                # print('\nKeyError: Segment {} does not exist in stream network'.format(xx))

                # Get a subgraph in the dag_ds graph and return the edges
                dag_ds_subset = dag_ds.subgraph(uniq_seg_us).copy()

                # 2018-02-13 PAN: It is possible to have outlets specified which are not truly
                #                 outlets in the most conservative sense (e.g. a point where
                #                 the stream network exits the study area). This occurs when
                #                 doing headwater extractions where all segments for a headwater
                #                 are specified in the configuration file. Instead of creating
                #                 output edges for all specified 'outlets' the set difference
                #                 between the specified outlets and nodes in the graph subset
                #                 which have no edges is performed first to reduce the number of
                #                 outlets to the 'true' outlets of the system.
                node_outlets = [ee[0] for ee in dag_ds_subset.edges()]
                true_outlets = set(dsmost_seg).difference(set(node_outlets))
                bandit_log.debug('node_outlets: {}'.format(','.join(
                    map(str, node_outlets))))
                bandit_log.debug('true_outlets: {}'.format(','.join(
                    map(str, true_outlets))))

                # Add the downstream segments that exit the subgraph
                for xx in true_outlets:
                    dag_ds_subset.add_edge(xx, 'Out_{}'.format(xx))
            else:
                # No outlets specified so pull the CONUS
                dag_ds_subset = dag_ds

            # Create list of toseg ids for the model subset
            try:
                # networkx 1.x
                toseg_idx = list(
                    set(xx[0] for xx in dag_ds_subset.edges_iter()))
            except AttributeError:
                # networkx 2.x
                toseg_idx = list(set(xx[0] for xx in dag_ds_subset.edges))

            toseg_idx0 = [xx - 1
                          for xx in toseg_idx]  # 0-based version of toseg_idx

            bandit_log.info('Number of segments in subset: {}'.format(
                len(toseg_idx)))

            # NOTE: With monolithic nhmParamDb files hru_segment becomes hru_segment_nhm and the regional hru_segments are gone.
            # 2019-09-16 PAN: This initially assumed hru_segment in the monolithic paramdb was ALWAYS
            #                 ordered 1..nhru. This is not always the case so the nhm_id parameter
            #                 needs to be loaded and used to map the nhm HRU ids to their
            #                 respective indices.
            hru_segment = nhm_params.get('hru_segment').data
            nhm_id = nhm_params.get('nhm_id').data

            nhm_id_to_idx = {}
            for ii, vv in enumerate(nhm_id):
                # keys are 1-based, values are 0-based
                nhm_id_to_idx[vv] = ii

            bandit_log.info('Number of NHM hru_segment entries: {}'.format(
                len(hru_segment)))

            # Create a dictionary mapping segments to HRUs
            seg_to_hru = {}
            for ii, vv in enumerate(hru_segment):
                # keys are 1-based, values in arrays are 1-based
                seg_to_hru.setdefault(vv, []).append(ii + 1)

    # Get HRU ids ordered by the segments in the model subset - entries are 1-based
            hru_order_subset = []
            for xx in toseg_idx:
                if xx in seg_to_hru:
                    for yy in seg_to_hru[xx]:
                        hru_order_subset.append(yy)
                else:
                    bandit_log.warning(
                        'Stream segment {} has no HRUs connected to it.'.
                        format(xx))
                    # raise ValueError('Stream segment has no HRUs connected to it.')

            # Append the additional non-routed HRUs to the list
            # if len(hru_noroute) > 0:
            #     for xx in hru_noroute:
            #         if hru_segment[xx-1] == 0:
            #             bandit_log.info('User-supplied HRU {} is not connected to any stream segment'.format(xx))
            #             hru_order_subset.append(xx)
            #         else:
            #             bandit_log.error('User-supplied HRU {} routes to stream segment {} - Skipping.'.format(xx,
            #                                                                                            hru_segment[xx-1]))

            hru_order_subset0 = [xx - 1 for xx in hru_order_subset]

            bandit_log.info('Number of HRUs in subset: {}'.format(
                len(hru_order_subset)))

            # Use hru_order_subset to pull selected indices for parameters with nhru dimensions
            # hru_order_subset contains the in-order indices for the subset of hru_segments
            # toseg_idx contains the in-order indices for the subset of tosegments

            # Renumber the tosegment list
            new_tosegment = []

            # Map old DAG_subds indices to new
            for xx in toseg_idx:
                if list(dag_ds_subset.neighbors(xx))[0] in toseg_idx:
                    new_tosegment.append(
                        toseg_idx.index(list(dag_ds_subset.neighbors(xx))[0]) +
                        1)
                else:
                    # Outlets should be assigned zero
                    new_tosegment.append(0)

            # Renumber the hru_segments for the subset
            new_hru_segment = []

            for xx in toseg_idx:
                # if DAG_subds.neighbors(xx)[0] in toseg_idx:
                if xx in seg_to_hru:
                    for _ in seg_to_hru[xx]:
                        # The new indices should be 1-based from PRMS
                        new_hru_segment.append(toseg_idx.index(xx) + 1)

            # Append zeroes to new_hru_segment for each additional non-routed HRU
            # if len(hru_noroute) > 0:
            #     for xx in hru_noroute:
            #         if hru_segment[xx-1] == 0:
            #             new_hru_segment.append(0)

            bandit_log.info('Size of hru_segment for subset: {}'.format(
                len(new_hru_segment)))

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Subset hru_deplcrv
            hru_deplcrv = nhm_params.get('hru_deplcrv').data

            bandit_log.info('Size of NHM hru_deplcrv: {}'.format(
                len(hru_deplcrv)))

            # Get subset of hru_deplcrv using hru_order
            # A single snarea_curve can be referenced by multiple HRUs
            hru_deplcrv_subset = np.array(hru_deplcrv)[
                tuple(hru_order_subset0), ]
            uniq_deplcrv = list(set(hru_deplcrv_subset))
            uniq_deplcrv0 = [xx - 1 for xx in uniq_deplcrv]

            # Create new hru_deplcrv and renumber
            new_hru_deplcrv = [
                uniq_deplcrv.index(cc) + 1 for cc in hru_deplcrv_subset
            ]
            bandit_log.info('Size of hru_deplcrv for subset: {}'.format(
                len(new_hru_deplcrv)))

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Subset poi_gage_segment
            new_poi_gage_segment = []
            new_poi_gage_id = []
            new_poi_type = []

            if nhm_params.exists('poi_gage_segment'):
                poi_gage_segment = nhm_params.get('poi_gage_segment').tolist()
                bandit_log.info('Size of NHM poi_gage_segment: {}'.format(
                    len(poi_gage_segment)))

                poi_gage_id = nhm_params.get('poi_gage_id').data
                poi_type = nhm_params.get('poi_type').data

                # We want to get the indices of the poi_gage_segments that match the
                # segments that are part of the subset. We can then use these
                # indices to subset poi_gage_id and poi_type.
                # The poi_gage_segment will need to be renumbered for the subset of segments.

                # To subset poi_gage_segment we have to lookup each segment in the subset

                # Reset the cutoff list
                uscutoff_seg = []

                # for ss in uniq_seg_us:
                try:
                    # networkx 1.x
                    for ss in nx.nodes_iter(dag_ds_subset):
                        if ss in poi_gage_segment:
                            new_poi_gage_segment.append(
                                toseg_idx.index(ss) + 1)
                            new_poi_gage_id.append(
                                poi_gage_id[poi_gage_segment.index(ss)])
                            new_poi_type.append(
                                poi_type[poi_gage_segment.index(ss)])
                except AttributeError:
                    # networkx 2.x
                    for ss in dag_ds_subset.nodes:
                        if ss in poi_gage_segment:
                            new_poi_gage_segment.append(
                                toseg_idx.index(ss) + 1)
                            new_poi_gage_id.append(
                                poi_gage_id[poi_gage_segment.index(ss)])
                            new_poi_type.append(
                                poi_type[poi_gage_segment.index(ss)])

                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Add any valid user-specified streamgage, nhm_seg pairs
                if addl_gages:
                    for ss, vv in iteritems(addl_gages):
                        if ss in new_poi_gage_id:
                            idx = new_poi_gage_id.index(ss)
                            bandit_log.warning(
                                'Existing NHM POI, {}, overridden on commandline (was {}, now {})'
                                .format(ss, new_poi_gage_segment[idx],
                                        toseg_idx.index(vv) + 1))
                            new_poi_gage_segment[idx] = toseg_idx.index(vv) + 1
                            new_poi_type[idx] = 0
                        elif toseg_idx.index(vv) + 1 in new_poi_gage_segment:
                            sidx = new_poi_gage_segment.index(
                                toseg_idx.index(vv) + 1)
                            bandit_log.warning(
                                'User-specified streamgage ({}) has same nhm_seg ({}) as existing POI ({}), replacing streamgage ID'
                                .format(ss,
                                        toseg_idx.index(vv) + 1,
                                        new_poi_gage_id[sidx]))
                            new_poi_gage_id[sidx] = ss
                            new_poi_type[sidx] = 0
                        elif vv not in seg_to_hru.keys():
                            bandit_log.warning(
                                'User-specified streamgage ({}) has nhm_seg={} which is not part of the model subset - Skipping.'
                                .format(ss, vv))
                        else:
                            new_poi_gage_id.append(ss)
                            new_poi_gage_segment.append(
                                toseg_idx.index(vv) + 1)
                            new_poi_type.append(0)
                            bandit_log.info(
                                'Added user-specified POI streamgage ({}) at nhm_seg={}'
                                .format(ss, vv))

            # ==================================================================
            # ==================================================================
            # Process the parameters and create a parameter file for the subset
            params = list(nhm_params.keys())

            # Remove the POI-related parameters if we have no POIs
            if len(new_poi_gage_segment) == 0:
                bandit_log.warning(
                    'No POI gages found for subset; removing POI-related parameters.'
                )

                for rp in ['poi_gage_id', 'poi_gage_segment', 'poi_type']:
                    # params.pop(rp, None)
                    try:
                        params.remove(rp)
                    except ValueError:
                        print('ERROR: unable to remove {}'.format(rp))
                        pass

            params.sort()

            dims = {}
            for kk in nhm_global_dimensions.values():
                dims[kk.name] = kk.size

            # Resize dimensions to the model subset
            crap_dims = dims.copy()  # need a copy since we modify dims
            for dd, dv in iteritems(crap_dims):
                # dimensions 'nmonths' and 'one' are never changed
                if dd in HRU_DIMS:
                    dims[dd] = len(hru_order_subset0)
                elif dd == 'nsegment':
                    dims[dd] = len(toseg_idx0)
                elif dd == 'ndeplval':
                    dims[dd] = len(uniq_deplcrv0) * 11
                    # if 'ndepl' not in dims:
                    dims['ndepl'] = len(uniq_deplcrv0)
                elif dd == 'npoigages':
                    dims[dd] = len(new_poi_gage_segment)

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Build a ParameterSet for output
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            new_ps = ParameterSet()

            for dd, dv in iteritems(dims):
                new_ps.dimensions.add(dd, dv)

                if dd == 'npoigages':
                    # 20170217 PAN: nobs is missing from the paramdb but is necessary
                    new_ps.dimensions.add('nobs', dv)

            new_params = list(required_params)

            # WARNING: 2019-04-23 PAN
            #          Very hacky way to remove parameters that shouldn't always get
            #          included. Need to figure out a better way.
            check_list = [
                'basin_solsta', 'gvr_hru_id', 'hru_solsta', 'humidity_percent',
                'irr_type', 'obsout_segment', 'rad_conv', 'rain_code',
                'hru_lon'
            ]

            for xx in check_list:
                if xx in new_params:
                    if xx in ['basin_solsta', 'hru_solsta', 'rad_conv']:
                        if not new_ps.dimensions.exists('nsol'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nsol') == 0:
                            new_params.remove(xx)
                    elif xx == 'humidity_percent':
                        if not new_ps.dimensions.exists('nhumid'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nhumid') == 0:
                            new_params.remove(xx)
                    elif xx == 'irr_type':
                        if not new_ps.dimensions.exists('nwateruse'):
                            new_params.remove(xx)
                        elif new_ps.dimensions.get('nwateruse') == 0:
                            new_params.remove(xx)
                    elif xx == 'gvr_hru_id':
                        if ctl.get('mapOutON_OFF').values == 0:
                            new_params.remove(xx)
                    elif xx in [
                            'hru_lat',
                            'hru_lon',
                    ]:
                        if not nhm_params.exists(xx):
                            new_params.remove(xx)

            new_params.sort()
            for pp in params:
                if pp in new_params or args.no_filter_params:
                    cparam = nhm_params.get(pp).tostructure()

                    new_ps.parameters.add(cparam['name'])

                    ndims = len(cparam['dimensions'])
                    if args.verbose:
                        sys.stdout.write(
                            '\r                                       ')
                        sys.stdout.write('\rProcessing {} '.format(
                            cparam['name']))
                        sys.stdout.flush()

                    # Get order of dimensions and total size for parameter
                    dim_order = [None] * ndims

                    for dd, dv in iteritems(cparam['dimensions']):
                        dim_order[dv['position']] = dd

                    for dd in dim_order:
                        # self.parameters.get(varname).dimensions.add(dd, self.dimensions.get(dd).size)
                        new_ps.parameters.get(cparam['name']).dimensions.add(
                            dd,
                            new_ps.dimensions.get(dd).size)

                        new_ps.parameters.get(
                            cparam['name']).datatype = cparam['datatype']

                    first_dimension = dim_order[0]

                    if ndims == 2:
                        second_dimension = dim_order[1]

                    # Write out the data for the parameter
                    if ndims == 1:
                        # 1D Parameters
                        if first_dimension == 'one':
                            outdata = np.array(cparam['data'])
                        elif first_dimension == 'nsegment':
                            if pp in ['tosegment']:
                                outdata = np.array(new_tosegment)
                            else:
                                outdata = np.array(
                                    cparam['data'])[tuple(toseg_idx0), ]
                        elif first_dimension == 'ndeplval':
                            # This is really a 2D in disguise, however, it is stored in C-order unlike
                            # other 2D arrays
                            outdata = np.array(cparam['data']).reshape(
                                (-1, 11))[tuple(uniq_deplcrv0), :]
                        elif first_dimension == 'npoigages':
                            if pp == 'poi_gage_segment':
                                outdata = np.array(new_poi_gage_segment)
                            elif pp == 'poi_gage_id':
                                outdata = np.array(new_poi_gage_id)
                            elif pp == 'poi_type':
                                outdata = np.array(new_poi_type)
                            else:
                                bandit_log.error(
                                    'Unkown parameter, {}, with dimensions {}'.
                                    format(pp, first_dimension))
                        elif first_dimension in HRU_DIMS:
                            if pp == 'hru_deplcrv':
                                outdata = np.array(new_hru_deplcrv)
                            elif pp == 'hru_segment':
                                outdata = np.array(new_hru_segment)
                            else:
                                outdata = np.array(
                                    cparam['data'])[tuple(hru_order_subset0), ]
                        else:
                            bandit_log.error(
                                'No rules to handle dimension {}'.format(
                                    first_dimension))
                    elif ndims == 2:
                        # 2D Parameters
                        outdata = np.array(cparam['data']).reshape(
                            (-1, dims[second_dimension]), order='F')

                        if first_dimension == 'nsegment':
                            outdata = outdata[tuple(toseg_idx0), :]
                        elif first_dimension in HRU_DIMS:
                            outdata = outdata[tuple(hru_order_subset0), :]
                        else:
                            bandit_log.error(
                                'No rules to handle 2D parameter, {}, which contains dimension {}'
                                .format(pp, first_dimension))

                    # Convert outdata to a list for writing
                    if first_dimension == 'ndeplval':
                        outlist = outdata.ravel().tolist()
                    else:
                        outlist = outdata.ravel(order='F').tolist()

                    new_ps.parameters.get(cparam['name']).data = outlist

            # Write the new parameter file
            header = [
                'Written by Bandit version {}'.format(__version__),
                'NhmParamDb revision: {}'.format(nhmparamdb_revision)
            ]
            if args.param_netcdf:
                base_filename = os.path.splitext(param_filename)[0]
                param_filename = '{}.nc'.format(base_filename)
                new_ps.write_netcdf('{}/{}'.format(sg_dir, param_filename))
            else:
                new_ps.write_parameter_file('{}/{}'.format(
                    sg_dir, param_filename),
                                            header=header)

            ctl.get('param_file').values = param_filename

            if args.verbose:
                sys.stdout.write('\n')
                # sys.stdout.write('\r                                       ')
                # sys.stdout.write('\r\tParameter file written: {}\n'.format('{}/{}'.format(outdir, param_filename)))
                sys.stdout.flush()

            # 2019-09-16 PAN: Nasty hack to handle parameter databases that may not have
            #                 a one-to-one match between index value and nhm_id.
            cparam = nhm_params.get('nhm_id').tostructure()
            hru_order_subset_nhm_id = np.array(
                cparam['data'])[tuple(hru_order_subset0), ].ravel(
                    order='F').tolist()

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write CBH files
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            if output_cbh:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Subset the cbh files for the selected HRUs
                if len(hru_order_subset) > 0:
                    if args.verbose:
                        print('Processing CBH files')

                    if os.path.splitext(cbh_dir)[1] == '.nc':
                        cbh_hdl = CbhNetcdf(src_path=cbh_dir,
                                            st_date=st_date,
                                            en_date=en_date,
                                            nhm_hrus=hru_order_subset_nhm_id)
                        # nhm_hrus=hru_order_subset)
                    else:
                        # Subset the hru_nhm_to_local mapping
                        # TODO: This section will not work with the monolithic paramdb - remove
                        hru_order_ss = OrderedDict()
                        for kk in hru_order_subset:
                            hru_order_ss[kk] = hru_nhm_to_local[kk]

                        cbh_hdl = CbhAscii(src_path=cbh_dir,
                                           st_date=st_date,
                                           en_date=en_date,
                                           nhm_hrus=hru_order_subset,
                                           indices=hru_order_ss,
                                           mapping=hru_nhm_to_region)

                    if args.cbh_netcdf:
                        # Pull the filename prefix off of the first file found in the
                        # source netcdf CBH directory.
                        file_it = glob.iglob(cbh_dir)
                        cbh_prefix = os.path.basename(
                            next(file_it)).split('_')[0]

                        cbh_outfile = '{}/{}.nc'.format(outdir, cbh_prefix)
                        cbh_hdl.write_netcdf(cbh_outfile)
                        ctl.get('tmax_day').values = os.path.basename(
                            cbh_outfile)
                        ctl.get('tmin_day').values = os.path.basename(
                            cbh_outfile)
                        ctl.get('precip_day').values = os.path.basename(
                            cbh_outfile)
                    else:
                        cbh_hdl.write_ascii(pathname=sg_dir)
                    # bandit_log.info('{} written to: {}'.format(vv, '{}/{}.cbh'.format(outdir, vv)))
                else:
                    bandit_log.error('No HRUs associated with the segments')

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write output variables
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # 2019-08-07 PAN: first prototype for extractions of output variables
            if include_model_output:
                if len(hru_order_subset) > 0:
                    try:
                        os.makedirs(f'{sg_dir}/model_output')
                        print(
                            'Creating directory model_output, for model output variables'
                        )
                    except OSError:
                        print(
                            'Using existing model_output directory for output variables'
                        )

                    for vv in output_vars:
                        if args.verbose:
                            sys.stdout.write(
                                '\r                                                  '
                            )
                            sys.stdout.write(
                                f'\rProcessing output variable: {vv} ')
                            sys.stdout.flush()

                        filename = f'{output_vars_dir}/{vv}.nc'

                        if vv[0:3] == 'seg':
                            mod_out = ModelOutput(filename=filename,
                                                  varname=vv,
                                                  startdate=st_date,
                                                  enddate=en_date,
                                                  nhm_segs=toseg_idx)
                        else:
                            mod_out = ModelOutput(
                                filename=filename,
                                varname=vv,
                                startdate=st_date,
                                enddate=en_date,
                                nhm_hrus=hru_order_subset_nhm_id)

                        mod_out.write_csv(f'{sg_dir}/model_output')
                    sys.stdout.write('\n')
                    sys.stdout.flush()

            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Write dynamic parameters
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            if ctl.has_dynamic_parameters:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Add dynamic parameters
                for cparam in ctl.dynamic_parameters:
                    param_name = 'dyn_{}'.format(cparam)
                    input_file = '{}/{}.nc'.format(dyn_params_dir, param_name)
                    output_file = '{}/{}.param'.format(sg_dir, param_name)

                    if not os.path.exists(input_file):
                        bandit_log.warning(
                            'WARNING: CONUS dynamic parameter file: {}, does not exist... skipping'
                            .format(input_file))
                    else:
                        if args.verbose:
                            print(
                                'Writing dynamic parameter {}'.format(cparam))

                        mydyn = dyn_params.DynamicParameters(
                            input_file, cparam, st_date, en_date,
                            hru_order_subset_nhm_id)
                        # mydyn = dyn_params.DynamicParameters(input_file, cparam, st_date, en_date, hru_order_subset)

                        mydyn.read_netcdf()
                        out_order = [kk for kk in hru_order_subset_nhm_id]
                        # out_order = [kk for kk in hru_order_subset]
                        for cc in ['day', 'month', 'year']:
                            out_order.insert(0, cc)

                        header = ' '.join(map(str, out_order))

                        # Output ASCII files
                        out_ascii = open(output_file, 'w')
                        out_ascii.write('{}\n'.format(cparam))
                        out_ascii.write('{}\n'.format(header))
                        out_ascii.write('####\n')
                        mydyn.data.to_csv(out_ascii,
                                          columns=out_order,
                                          na_rep='-999',
                                          sep=' ',
                                          index=False,
                                          header=False,
                                          encoding=None,
                                          chunksize=50)
                        out_ascii.close()

            # Write an updated control file to the output directory
            ctl.write('{}.bandit'.format('{}/{}'.format(
                sg_dir, control_filename)))

            if output_streamflow:
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # Download the streamgage information from NWIS
                if args.verbose:
                    print(
                        'Downloading NWIS streamgage observations for {} stations'
                        .format(len(new_poi_gage_id)))

                streamflow = prms_nwis.NWIS(gage_ids=new_poi_gage_id,
                                            st_date=st_date,
                                            en_date=en_date,
                                            verbose=args.verbose)
                streamflow.get_daily_streamgage_observations()
                streamflow.write_prms_data(
                    filename='{}/{}'.format(sg_dir, obs_filename))

            # *******************************************
            # Create a shapefile of the selected HRUs
            if output_shapefiles:
                if args.verbose:
                    print('-' * 40)
                    print('Writing shapefiles for model subset')

                if not os.path.isdir(geo_file):
                    bandit_log.error(
                        'File geodatabase, {}, does not exist. Shapefiles will not be created'
                        .format(geo_file))
                else:
                    geo_shp = prms_geo.Geo(geo_file)

                    # Create GIS sub-directory if it doesn't already exist
                    gis_dir = '{}/GIS'.format(sg_dir)
                    try:
                        os.makedirs(gis_dir)
                    except OSError as exception:
                        if exception.errno != errno.EEXIST:
                            raise
                        else:
                            pass

                    # Output a shapefile of the selected HRUs
                    # print('\tHRUs')
                    # geo_shp.select_layer('nhruNationalIdentifier')
                    geo_shp.select_layer('nhru')
                    geo_shp.write_shapefile(
                        '{}/GIS/HRU_subset.shp'.format(sg_dir),
                        'hru_id_nat',
                        hru_order_subset_nhm_id,
                        included_fields=[
                            'nhm_id', 'model_idx', 'region', 'hru_id_nat'
                        ])

                    # geo_shp.write_shapefile3('{}/GIS/HRU_subset.gdb'.format(outdir), 'hru_id_nat', hru_order_subset)

                    # geo_shp.filter_by_attribute('hru_id_nat', hru_order_subset)
                    # geo_shp.write_shapefile2('{}/HRU_subset.shp'.format(outdir))
                    # geo_shp.write_kml('{}/HRU_subset.kml'.format(outdir))

                    # Output a shapefile of the selected stream segments
                    # print('\tSegments')
                    geo_shp.select_layer('nsegmentNationalIdentifier')
                    geo_shp.write_shapefile(
                        '{}/GIS/Segments_subset.shp'.format(sg_dir),
                        'seg_id_nat',
                        toseg_idx,
                        included_fields=['seg_id_nat', 'model_idx', 'region'])

                    # geo_shp.filter_by_attribute('seg_id_nat', uniq_seg_us)
                    # geo_shp.write_shapefile2('{}/Segments_subset.shp'.format(outdir))

                    del geo_shp

            break  # break out of while True loop

    bandit_log.info('========== END {} =========='.format(
        datetime.datetime.now().isoformat()))

    os.chdir(stdir)
Пример #5
0
def main():
    import argparse
    from distutils.spawn import find_executable

    # Command line arguments
    parser = argparse.ArgumentParser(description='Batch script for Bandit extractions')

    # parser.add_argument('-j', '--jobdir', help='Job directory to work in')
    parser.add_argument('-s', '--segoutlets', help='File containing segment outlets by location')
    parser.add_argument('-n', '--nrhrus', help='File containing non-routed HRUs by location')
    parser.add_argument('-p', '--prefix', help='Directory prefix to add')

    # parser.add_argument('--check_DAG', help='Verify the streamflow network', action='store_true')

    args = parser.parse_args()

    # Should be in the current job directory
    job_dir = os.getcwd()

    # Read the default configuration file
    config = bc.Cfg('{}/bandit.cfg'.format(job_dir))

    if args.segoutlets:
        seg_src = '{}/{}'.format(job_dir, args.segoutlets)
    else:
        print('ERROR: Must specify the segment outlets file.')
        exit(1)

    if args.nrhrus:
        nrhru_src = '{}/{}'.format(job_dir, args.nrhrus)
    else:
        nrhru_src = None

    # jobdir = '/media/scratch/PRMS/bandit/jobs/hw_jobs'
    # default_config_file = '{}/bandit.cfg'.format(jobdir)

    # cmd_bandit = '/media/scratch/PRMS/bandit/Bandit/bandit.py'
    cmd_bandit = find_executable('bandit_v2')

    if not cmd_bandit:
        print('ERROR: Unable to find bandit.py')
        exit(1)

    seg_file = open(seg_src, 'r')

    # Skip the header information
    # NOTE: If file has no header the first entry will be skipped
    seg_file.readline()
    # seg_file.next()

    # First column is hwAreaId
    # Second and following columns are seg_id_nat
    segments_by_loc = OrderedDict()

    # Read the segment outlets by location
    for line in seg_file:
        cols = line.strip().replace(" ", "").split(',')
        try:
            # Assume first column is a number
            cols = [int(xx) for xx in cols]
            segments_by_loc[cols[0]] = cols[1:]
        except ValueError:
            # First column is probably a string
            segments_by_loc[cols[0]] = [int(xx) for xx in cols[1:]]

    if nrhru_src:
        nrhru_file = open(nrhru_src, 'r')
        nrhru_file.readline()
        # nrhru_file.next()

        noroute_hrus_by_loc = OrderedDict()

        # Read in the non-routed HRUs by location
        for line in nrhru_file:
            cols = line.strip().replace(" ", "").split(',')
            try:
                # Assume first column is a number
                cols = [int(xx) for xx in cols]
                noroute_hrus_by_loc[cols[0]] = cols[1:]
            except ValueError:
                # First column is probably a string
                noroute_hrus_by_loc[cols[0]] = [int(xx) for xx in cols[1:]]

    num_threads = 8

    # ****************************************************************************
    # Initialize the threads
    cmd_q = queue.Queue()
    result_q = queue.Queue()

    # Create pool of threads
    pool = [WorkerThread(input_q=cmd_q, result_q=result_q) for __ in range(num_threads)]

    # Start the threads
    for thread in pool:
        try:
            thread.start()
        except (KeyboardInterrupt, SystemExit):
            # Shutdown the threads when the program is terminated
            print('Program terminated.')
            thread.join()
            sys.exit(1)

    # ### For each head_waters
    # - create directory hw_# (where # is the hwAreaId)
    # - copy default bandit.cfg into directory
    # - run bandit on the directory
    #

    if not os.path.exists(job_dir):
        try:
            os.makedirs(job_dir)
        except OSError as err:
            print("\tError creating directory: {}".format(err))
            exit(1)

    # st_dir = os.getcwd()
    os.chdir(job_dir)

    # Read the default configuration file
    # config = bc.Cfg(default_config_file)

    work_count = 0

    for kk, vv in iteritems(segments_by_loc):
        try:
            # Try for integer formatted output directories first
            if args.prefix:
                cdir = '{}{:04d}'.format(args.prefix, kk)
            else:
                cdir = '{:04d}'.format(kk)
        except ValueError:
            cdir = '{}'.format(kk)

        # Create the headwater directory if needed
        if not os.path.exists(cdir):
            try:
                os.makedirs(cdir)
            except OSError as err:
                print("\tError creating directory: {}".format(err))
                exit(1)

        # Update the outlets in the basin.cfg file and write into the headwater directory
        config.update_value('outlets', vv)

        if nrhru_src and kk in noroute_hrus_by_loc:
            config.update_value('hru_noroute', noroute_hrus_by_loc[kk])

        # TODO: This causes the control_filename to be rewritten in the parent
        #       directory; so this happens for each location. Need to fix.
        config.update_value('control_filename', '{}/control.default'.format(job_dir))
        config.update_value('output_dir', '{}/{}'.format(job_dir, cdir))
        config.write('{}/bandit.cfg'.format(cdir))

        # Run bandit
        # Add the command to queue for processing
        work_count += 1
        cmd = '{} -j {}/{}'.format(cmd_bandit, job_dir, cdir)

        os.chdir(cdir)
        cmd_q.put(cmd)
        os.chdir(job_dir)

    print("work_count = {:d}".format(work_count))

    # Output results
    while work_count > 0:
        result = result_q.get()

        sys.stdout.write("\rwork_count: {:4d}".format(work_count))
        sys.stdout.flush()

    #     print "Thread %s return code = %d" % (result[0], result[2])
        work_count -= 1

        if result[2] != 0 and result[2] < 200:
            # An error occurred running the command
            # Returncodes greater than 200 are generated by bandit errors, that
            # don't necessitate shutting the entire job down.
            print("\nThread %s return code = %d (%s)" % (result[0], result[2], result[1]))
            work_count = 0

    # Ask for the threads to die and wait for them to do it
    for thread in pool:
        thread.join()
Пример #6
0
def main():
    parser = argparse.ArgumentParser(
        description='Create merged database from nhmparamdb for bandit')
    parser.add_argument('-c',
                        '--config',
                        help='Name of configuration file',
                        nargs='?',
                        default='bandit.cfg',
                        type=str)

    args = parser.parse_args()

    print('Creating merged database for bandit')
    print('Config file: {}'.format(args.config))

    config = bc.Cfg(args.config)

    # TODO: Automatically update the paramdb from git before creating merged params

    paramdb_dir = config.paramdb_dir
    merged_paramdb_dir = config.merged_paramdb_dir

    print('Input paramdb: {}'.format(paramdb_dir))
    print('Output merged database: {}'.format(merged_paramdb_dir))

    # check for / create output directory
    try:
        os.makedirs(merged_paramdb_dir)
        print('Creating directory for merged parameters: {}'.format(
            merged_paramdb_dir))
    except OSError:
        print("\tUsing existing directory for merged parameters: {}".format(
            merged_paramdb_dir))

    # Write the git revision number of the NhmParamDb repo to a file in the merged params directory
    with open('{}/00-REVISION'.format(merged_paramdb_dir), 'w') as fhdl:
        fhdl.write('{}\n'.format(git_version(paramdb_dir)))

    # Create NhmParamDb object and retrieve the parameters
    pdb = ParamDbRegion(paramdb_dir)
    param_info = pdb.available_parameters

    # =======================================================================
    # Process all the parameters, skipping special-handling cases
    for pp in param_info:
        sys.stdout.write('\r                                       ')
        sys.stdout.write('\rProcessing {}'.format(pp))
        sys.stdout.flush()

        cparam = pdb.parameters.get(pp)

        # write the serialized param to a file
        with open('{}/{}.msgpack'.format(merged_paramdb_dir, pp), 'wb') as ff:
            msgpack.dump(cparam.tostructure(), ff)

    # Write the parameters.xml and dimensions.xml files to the merged_db directory
    pdb.write_parameters_xml(merged_paramdb_dir)
    pdb.write_dimensions_xml(merged_paramdb_dir)

    # =======================================================================
    # Lastly there are a few non-parameter mapping variables that are needed
    # during the checkout process. It's easier/faster to create them once
    # here rather than create them on the fly during checkout.

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Process nhm_seg related mappings
    # pp = 'nhm_seg'
    sys.stdout.write('\r                                       ')
    sys.stdout.write('\rProcessing segment mappings')
    sys.stdout.flush()

    # write the serialized segment mappings to a file
    # with open('{}/segment_nhm_to_local.msgpack'.format(merged_paramdb_dir), 'wb') as ff:
    #     msgpack.dump(segment_nhm_to_local, ff)

    with open('{}/segment_nhm_to_region.msgpack'.format(merged_paramdb_dir),
              'wb') as ff:
        msgpack.dump(pdb.segment_nhm_to_region, ff)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Process nhm_id related mappings
    sys.stdout.write('\r                                       ')
    sys.stdout.write('\rProcessing hru mappings')

    # write the serialized segment mappings to a file
    with open('{}/hru_nhm_to_local.msgpack'.format(merged_paramdb_dir),
              'wb') as ff:
        msgpack.dump(pdb.hru_nhm_to_local, ff)

    with open('{}/hru_nhm_to_region.msgpack'.format(merged_paramdb_dir),
              'wb') as ff:
        msgpack.dump(pdb.hru_nhm_to_region, ff)
Пример #7
0
def main():
    # Command line arguments
    parser = argparse.ArgumentParser(
        description='Extract model subsets from the National Hydrologic Model')
    parser.add_argument('-O',
                        '--output_dir',
                        help='Output directory for subset')
    parser.add_argument('-p',
                        '--param_filename',
                        help='Name of output parameter file')
    parser.add_argument('-s',
                        '--streamflow_filename',
                        help='Name of streamflow data file')
    parser.add_argument('-P',
                        '--paramdb_dir',
                        help='Location of parameter database')
    parser.add_argument('-M',
                        '--merged_paramdb_dir',
                        help='Location of merged parameter database')
    parser.add_argument('-C', '--cbh_dir', help='Location of CBH files')
    parser.add_argument('-g',
                        '--geodatabase_filename',
                        help='Full path to NHM geodatabase')
    parser.add_argument('-j', '--job', help='Job directory to work in')
    parser.add_argument('-v',
                        '--verbose',
                        help='Output additional information',
                        action='store_true')
    parser.add_argument('--check_DAG',
                        help='Verify the streamflow network',
                        action='store_true')
    parser.add_argument('--output_cbh',
                        help='Output CBH files for subset',
                        action='store_true')
    parser.add_argument('--output_shapefiles',
                        help='Output shapefiles for subset',
                        action='store_true')
    parser.add_argument('--output_streamflow',
                        help='Output streamflows for subset',
                        action='store_true')
    parser.add_argument('--cbh_netcdf',
                        help='Enable netCDF output for CBH files',
                        action='store_true')
    parser.add_argument('--param_netcdf',
                        help='Enable netCDF output for parameter file',
                        action='store_true')
    parser.add_argument(
        '--add_gages',
        metavar="KEY=VALUE",
        nargs='+',
        help=
        'Add arbitrary streamgages to POIs of form gage_id=segment. Segment must exist in the model subset. Additional streamgages are marked as poi_type=0.'
    )
    parser.add_argument(
        '--no_filter_params',
        help='Output all parameters regardless of modules selected',
        action='store_true')
    parser.add_argument(
        '--keep_hru_order',
        help='Keep HRUs in the relative order they occur in the paramdb',
        action='store_true')
    args = parser.parse_args()

    stdir = os.getcwd()

    if args.job:
        if os.path.exists(args.job):
            # Change into job directory before running extraction
            os.chdir(args.job)
            # print('Working in directory: {}'.format(args.job))
        else:
            print('ERROR: Invalid jobs directory: {}'.format(args.job))
            exit(-1)

    bandit_log.info('========== START {} =========='.format(
        datetime.datetime.now().isoformat()))

    addl_gages = None
    if args.add_gages:
        addl_gages = parse_gages(args.add_gages)
        bandit_log.info('Additionals streamgages specified on command line')

    config = bc.Cfg('bandit.cfg')

    # Override configuration variables with any command line parameters
    for kk, vv in args.__dict__.items():
        if kk not in [
                'job', 'verbose', 'cbh_netcdf', 'add_gages', 'param_netcdf',
                'no_filter_params', 'keep_hru_order'
        ]:
            if vv:
                bandit_log.info(
                    'Overriding configuration for {} with {}'.format(kk, vv))
                config.update_value(kk, vv)

    # Where to output the subset
    outdir = config.output_dir

    # The control file to use
    control_filename = config.control_filename

    # What to name the output parameter file
    param_filename = config.param_filename

    # Location of the NHM parameter database
    paramdb_dir = config.paramdb_dir

    # Location of the merged parameter database
    merged_paramdb_dir = config.merged_paramdb_dir

    # List of outlets
    dsmost_seg = config.outlets

    # List of upstream cutoffs
    uscutoff_seg = config.cutoffs

    # List of additional HRUs (have no route to segment within subset)
    hru_noroute = config.hru_noroute

    # List of output variables to sbuset
    try:
        include_model_output = config.include_model_output
        output_vars = config.output_vars
        output_vars_dir = config.output_vars_dir
    except KeyError:
        include_model_output = False
        output_vars = []
        output_vars_dir = ''

    # Control what is checked and output for subset
    check_dag = config.check_DAG

    try:
        output_cbh = config.output_cbh

        # Location of the NHM CBH files
        cbh_dir = config.cbh_dir
    except KeyError:
        output_cbh = False
        cbh_dir = ''

    try:
        output_streamflow = config.output_streamflow

        # What to name the streamflow output file
        obs_filename = config.streamflow_filename
    except KeyError:
        output_streamflow = False
        obs_filename = ''

    try:
        output_shapefiles = config.output_shapefiles

        # Full path and filename to the geodatabase to use for outputting shapefile subsets
        geo_file = config.geodatabase_filename
    except KeyError:
        output_shapefiles = False
        geo_file = ''

    # Load the control file
    ctl = ControlFile(control_filename)

    dyn_params_dir = ''
    if ctl.has_dynamic_parameters:
        if config.dyn_params_dir:
            if os.path.exists(config.dyn_params_dir):
                dyn_params_dir = config.dyn_params_dir
            else:
                bandit_log.error('dyn_params_dir: {}, does not exist.'.format(
                    config.dyn_params_dir))
                exit(2)
        else:
            bandit_log.error(
                'Control file has dynamic parameters but dyn_params_dir is not specified in the config file'
            )
            exit(2)

    # Load master list of valid parameters
    vpdb = ValidParams()

    # Build list of parameters required for the selected control file modules
    required_params = vpdb.get_params_for_modules(
        modules=list(ctl.modules.values()))

    # Date range for pulling NWIS streamgage observations
    if isinstance(config.start_date, datetime.date):
        st_date = config.start_date
    else:
        st_date = datetime.datetime(
            *[int(x) for x in re.split('[- :]', config.start_date)])
        # st_date = datetime.datetime(*[int(x) for x in re.split('-| |:', config.start_date)])

    if isinstance(config.end_date, datetime.date):
        en_date = config.end_date
    else:
        en_date = datetime.datetime(
            *[int(x) for x in re.split('[- :]', config.end_date)])
        # en_date = datetime.datetime(*[int(x) for x in re.split('-| |:', config.end_date)])

    # ===============================================================
    # params_file = '{}/{}'.format(merged_paramdb_dir, PARAMETERS_XML)

    # Output revision of NhmParamDb and the revision used by merged paramdb
    nhmparamdb_revision = git_version(paramdb_dir)
    bandit_log.info('Parameters based on NhmParamDb revision: {}'.format(
        nhmparamdb_revision))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Read hru_nhm_to_local and hru_nhm_to_region
    # Create segment_nhm_to_local and segment_nhm_to_region

    # TODO: since hru_nhm_to_region and nhru_nhm_to_local are only needed for
    #       CBH files we should 'soft-fail' if the files are missing and just
    #       output a warning and turn off CBH output if it was selected.
    # hru_nhm_to_region = get_parameter('{}/hru_nhm_to_region.msgpack'.format(cbh_dir))
    # hru_nhm_to_local = get_parameter('{}/hru_nhm_to_local.msgpack'.format(cbh_dir))

    # Load the NHMparamdb
    print('Loading NHM ParamDb')
    pdb = ParamDb(merged_paramdb_dir)
    nhm_params = pdb.parameters
    nhm_global_dimensions = pdb.dimensions

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Get tosegment_nhm
    # NOTE: tosegment is now tosegment_nhm and the regional tosegment is gone.
    # Convert to list for fastest access to array
    tosegment = nhm_params.get('tosegment_nhm').tolist()
    nhm_seg = nhm_params.get('nhm_seg').tolist()

    if args.verbose:
        print('Generating stream network from tosegment_nhm')

    # First check if any of the requested stream segments exist in the NHM.
    # An intersection of 0 elements can occur when all stream segments are
    # not included in the NHM (e.g. segments in Alaska).
    # NOTE: It's possible to have a stream segment that does not exist in
    #       tosegment but does exist in nhm_seg (e.g. standalone segment). So
    #       we use nhm_seg to verify at least one of the given segment(s) exist.
    if dsmost_seg and len(set(dsmost_seg).intersection(nhm_seg)) == 0:
        bandit_log.error(
            'None of the requested stream segments exist in the NHM paramDb')
        exit(200)

    # Build the stream network
    dag_ds = generate_stream_network(tosegment, nhm_seg)

    if check_dag:
        if not nx.is_directed_acyclic_graph(dag_ds):
            bandit_log.error('Cycles and/or loops found in stream network')

            for xx in nx.simple_cycles(dag_ds):
                bandit_log.error('Cycle found for segment {}'.format(xx))

    if args.verbose:
        print('\tExtracting model subset')

    dag_ds_subset = subset_stream_network(dag_ds, uscutoff_seg, dsmost_seg)

    # Create list of toseg ids for the model subset
    try:
        # networkx 1.x
        toseg_idx = list(set(xx[0] for xx in dag_ds_subset.edges_iter()))
    except AttributeError:
        # networkx 2.x
        toseg_idx = list(set(xx[0] for xx in dag_ds_subset.edges))

    bandit_log.info('Number of segments in subset: {}'.format(len(toseg_idx)))

    # Use the mapping to create subsets of nhm_seg, tosegment_nhm, and tosegment
    # NOTE: toseg_idx and new_nhm_seg are the same thing
    new_nhm_seg = [ee[0] for ee in dag_ds_subset.edges]

    # Using a dictionary mapping nhm_seg to 1-based index for speed
    new_nhm_seg_to_idx1 = OrderedDict(
        (ss, ii + 1) for ii, ss in enumerate(new_nhm_seg))

    # Generate the renumbered local tosegments (1-based with zero being an outlet)
    new_tosegment = [
        new_nhm_seg_to_idx1[ee[1]] if ee[1] in new_nhm_seg_to_idx1 else 0
        for ee in dag_ds_subset.edges
    ]

    # NOTE: With monolithic nhmParamDb files hru_segment becomes hru_segment_nhm and the regional hru_segments are gone.
    # 2019-09-16 PAN: This initially assumed hru_segment in the monolithic paramdb was ALWAYS
    #                 ordered 1..nhru. This is not always the case so the nhm_id parameter
    #                 needs to be loaded and used to map the nhm HRU ids to their
    #                 respective indices.
    hru_segment = nhm_params.get('hru_segment_nhm').tolist()
    nhm_id = nhm_params.get('nhm_id').tolist()
    nhm_id_to_idx = nhm_params.get('nhm_id').index_map
    bandit_log.info('Number of NHM hru_segment entries: {}'.format(
        len(hru_segment)))

    # Create a dictionary mapping hru_segment segments to hru_segment 1-based indices filtered by
    # new_nhm_seg and hru_noroute.
    seg_to_hru = OrderedDict()
    hru_to_seg = OrderedDict()

    for ii, vv in enumerate(hru_segment):
        # Contains both new_nhm_seg values and non-routed HRU values
        # keys are 1-based, values in arrays are 1-based
        if vv in new_nhm_seg:
            hid = nhm_id[ii]
            seg_to_hru.setdefault(vv, []).append(hid)
            hru_to_seg[hid] = vv
        elif nhm_id[ii] in hru_noroute:
            if vv != 0:
                err_txt = 'User-supplied non-routed HRU {} routes to stream segment {} - Skipping.'
                bandit_log.error(err_txt.format(nhm_id[ii], vv))
            else:
                hid = nhm_id[ii]
                seg_to_hru.setdefault(vv, []).append(hid)
                hru_to_seg[hid] = vv
    # print('{0} seg_to_hru {0}'.format('-'*15))
    # print(seg_to_hru)
    # print('{0} hru_to_seg {0}'.format('-'*15))
    # print(hru_to_seg)

    # HRU-related parameters can either be output with the legacy, segment-orient order
    # or can be output while maintaining their relative order from the parameter database.
    if args.keep_hru_order:
        hru_order_subset = [kk for kk in hru_to_seg.keys()]

        new_hru_segment = [
            new_nhm_seg_to_idx1[kk]
            if kk in new_nhm_seg else 0 if kk == 0 else -1
            for kk in hru_to_seg.values()
        ]
    else:
        # Get NHM HRU ids ordered by the segments in the model subset - entries are 1-based
        hru_order_subset = []
        for xx in new_nhm_seg:
            if xx in seg_to_hru:
                for yy in seg_to_hru[xx]:
                    hru_order_subset.append(yy)
            else:
                bandit_log.warning(
                    'Stream segment {} has no HRUs connected to it.'.format(
                        xx))

        # Append the additional non-routed HRUs to the list
        if len(hru_noroute) > 0:
            for xx in hru_noroute:
                if hru_segment[nhm_id_to_idx[xx]] == 0:
                    bandit_log.info(
                        'User-supplied HRU {} is not connected to any stream segment'
                        .format(xx))
                    hru_order_subset.append(xx)
                else:
                    err_txt = 'User-supplied HRU {} routes to stream segment {} - Skipping.'
                    bandit_log.error(
                        err_txt.format(xx, hru_segment[nhm_id_to_idx[xx]]))

        # Renumber the hru_segments for the subset
        new_hru_segment = []

        for xx in new_nhm_seg:
            if xx in seg_to_hru:
                for _ in seg_to_hru[xx]:
                    # The new indices should be 1-based from PRMS
                    new_hru_segment.append(new_nhm_seg_to_idx1[xx])

        # Append zeroes to new_hru_segment for each additional non-routed HRU
        if len(hru_noroute) > 0:
            for xx in hru_noroute:
                if hru_segment[nhm_id_to_idx[xx]] == 0:
                    new_hru_segment.append(0)

    hru_order_subset0 = [nhm_id_to_idx[xx] for xx in hru_order_subset]
    bandit_log.info('Number of HRUs in subset: {}'.format(
        len(hru_order_subset)))
    bandit_log.info('Size of hru_segment for subset: {}'.format(
        len(new_hru_segment)))

    # Use hru_order_subset to pull selected indices for parameters with nhru dimensions
    # hru_order_subset contains the in-order indices for the subset of hru_segments
    # toseg_idx contains the in-order indices for the subset of tosegments

    # ==========================================================================
    # Get subset of hru_deplcrv using hru_order
    # A single snarea_curve can be referenced by multiple HRUs
    hru_deplcrv_subset = nhm_params.get_subset('hru_deplcrv', hru_order_subset)

    uniq_deplcrv = list(set(hru_deplcrv_subset))
    uniq_deplcrv0 = [xx - 1 for xx in uniq_deplcrv]

    uniq_dict = {}
    for ii, xx in enumerate(uniq_deplcrv):
        uniq_dict[xx] = ii + 1

    # Create new hru_deplcrv and renumber
    new_hru_deplcrv = [uniq_dict[xx] for xx in hru_deplcrv_subset]
    bandit_log.info('Size of hru_deplcrv for subset: {}'.format(
        len(new_hru_deplcrv)))

    # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Subset poi_gage_segment
    new_poi_gage_segment = []
    new_poi_gage_id = []
    new_poi_type = []

    if nhm_params.exists('poi_gage_segment'):
        poi_gage_segment = nhm_params.get('poi_gage_segment').tolist()
        bandit_log.info('Size of NHM poi_gage_segment: {}'.format(
            len(poi_gage_segment)))

        poi_gage_id = nhm_params.get('poi_gage_id').tolist()
        poi_type = nhm_params.get('poi_type').tolist()

        # We want to get the indices of the poi_gage_segments that match the
        # segments that are part of the subset. We can then use these
        # indices to subset poi_gage_id and poi_type.
        # The poi_gage_segment will need to be renumbered for the subset of segments.

        # To subset poi_gage_segment we have to lookup each segment in the subset
        nhm_seg_dict = nhm_params.get('nhm_seg').index_map
        poi_gage_dict = nhm_params.get('poi_gage_segment').index_map

        for ss in new_nhm_seg:
            sidx = nhm_seg_dict[ss] + 1
            if sidx in poi_gage_segment:
                # print('   {}'.format(poi_gage_segment.index(sidx)))
                new_poi_gage_segment.append(new_nhm_seg_to_idx1[sidx])
                new_poi_gage_id.append(poi_gage_id[poi_gage_dict[sidx]])
                new_poi_type.append(poi_type[poi_gage_dict[sidx]])

        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Add any valid user-specified streamgage, nhm_seg pairs
        if addl_gages:
            for ss, vv in addl_gages.items():
                if ss in new_poi_gage_id:
                    idx = new_poi_gage_id.index(ss)
                    warn_txt = 'Existing NHM POI, {}, overridden on commandline (was {}, now {})'
                    bandit_log.warning(
                        warn_txt.format(ss, new_poi_gage_segment[idx],
                                        new_nhm_seg_to_idx1[vv]))
                    new_poi_gage_segment[idx] = new_nhm_seg_to_idx1[vv]
                    new_poi_type[idx] = 0
                elif new_nhm_seg_to_idx1[vv] in new_poi_gage_segment:
                    sidx = new_poi_gage_segment.index(new_nhm_seg_to_idx1[vv])
                    warn_txt = 'User-specified streamgage ({}) has same nhm_seg ({}) as existing POI ({}), replacing streamgage ID'
                    bandit_log.warning(
                        warn_txt.format(ss, new_nhm_seg_to_idx1[vv],
                                        new_poi_gage_id[sidx]))
                    new_poi_gage_id[sidx] = ss
                    new_poi_type[sidx] = 0
                elif vv not in seg_to_hru.keys():
                    warn_txt = 'User-specified streamgage ({}) has nhm_seg={} which is not part of the model subset - Skipping.'
                    bandit_log.warning(warn_txt.format(ss, vv))
                else:
                    new_poi_gage_id.append(ss)
                    new_poi_gage_segment.append(new_nhm_seg_to_idx1[vv])
                    new_poi_type.append(0)
                    bandit_log.info(
                        'Added user-specified POI streamgage ({}) at nhm_seg={}'
                        .format(ss, vv))

    # ==================================================================
    # ==================================================================
    # Process the parameters and create a parameter file for the subset
    params = list(nhm_params.keys())

    # Remove the POI-related parameters if we have no POIs
    if len(new_poi_gage_segment) == 0:
        bandit_log.warning(
            'No POI gages found for subset; removing POI-related parameters.')

        for rp in ['poi_gage_id', 'poi_gage_segment', 'poi_type']:
            if rp in params:
                params.remove(rp)

    params.sort()

    dims = {}
    for kk in nhm_global_dimensions.values():
        dims[kk.name] = kk.size

    # Resize dimensions to the model subset
    crap_dims = dims.copy()  # need a copy since we modify dims
    for dd, dv in crap_dims.items():
        # dimensions 'nmonths' and 'one' are never changed
        if dd in HRU_DIMS:
            dims[dd] = len(hru_order_subset0)
        elif dd == 'nsegment':
            dims[dd] = len(new_nhm_seg)
        elif dd == 'ndeplval':
            dims[dd] = len(uniq_deplcrv0) * 11
            # if 'ndepl' not in dims:
            dims['ndepl'] = len(uniq_deplcrv0)
        elif dd == 'npoigages':
            dims[dd] = len(new_poi_gage_segment)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Build a ParameterSet for output
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    new_ps = ParameterSet()

    for dd, dv in dims.items():
        new_ps.dimensions.add(dd, dv)

        if dd == 'npoigages':
            # 20170217 PAN: nobs is missing from the paramdb but is necessary
            new_ps.dimensions.add('nobs', dv)

    new_params = list(required_params)

    # WARNING: 2019-04-23 PAN
    #          Very hacky way to remove parameters that shouldn't always get
    #          included. Need to figure out a better way.
    check_list = [
        'basin_solsta', 'gvr_hru_id', 'hru_solsta', 'humidity_percent',
        'irr_type', 'obsout_segment', 'rad_conv', 'rain_code', 'hru_lon'
    ]

    for xx in check_list:
        if xx in new_params:
            if xx in ['basin_solsta', 'hru_solsta', 'rad_conv']:
                if not new_ps.dimensions.exists('nsol'):
                    new_params.remove(xx)
                elif new_ps.dimensions.get('nsol') == 0:
                    new_params.remove(xx)
            elif xx == 'humidity_percent':
                if not new_ps.dimensions.exists('nhumid'):
                    new_params.remove(xx)
                elif new_ps.dimensions.get('nhumid') == 0:
                    new_params.remove(xx)
            elif xx == 'irr_type':
                if not new_ps.dimensions.exists('nwateruse'):
                    new_params.remove(xx)
                elif new_ps.dimensions.get('nwateruse') == 0:
                    new_params.remove(xx)
            elif xx == 'gvr_hru_id':
                if ctl.get('mapOutON_OFF').values == 0:
                    new_params.remove(xx)
            elif xx in [
                    'hru_lat',
                    'hru_lon',
            ]:
                if not nhm_params.exists(xx):
                    new_params.remove(xx)

    new_params.sort()
    for pp in params:
        if pp in new_params or args.no_filter_params:
            src_param = nhm_params.get(pp)

            new_ps.parameters.add(src_param.name)

            ndims = src_param.ndims

            if args.verbose:
                sys.stdout.write('\r                                       ')
                sys.stdout.write('\rProcessing {} '.format(src_param.name))
                sys.stdout.flush()

            dim_order = [dd for dd in src_param.dimensions.keys()]

            for dd in src_param.dimensions.keys():
                new_ps.parameters.get(src_param.name).dimensions.add(
                    dd,
                    new_ps.dimensions.get(dd).size)
                new_ps.parameters.get(
                    src_param.name).datatype = src_param.datatype

            first_dimension = dim_order[0]
            outdata = None

            # Write out the data for the parameter
            if ndims == 1:
                # 1D Parameters
                if first_dimension == 'one':
                    outdata = src_param.data
                elif first_dimension == 'nsegment':
                    if pp in ['tosegment']:
                        outdata = np.array(new_tosegment)
                    else:
                        outdata = nhm_params.get_subset(pp, new_nhm_seg)
                elif first_dimension == 'ndeplval':
                    # This is really a 2D in disguise, however, it is stored in C-order unlike
                    # other 2D arrays
                    outdata = src_param.data.reshape(
                        (-1, 11))[tuple(uniq_deplcrv0), :].reshape((-1))
                elif first_dimension == 'npoigages':
                    if pp == 'poi_gage_segment':
                        outdata = np.array(new_poi_gage_segment)
                    elif pp == 'poi_gage_id':
                        outdata = np.array(new_poi_gage_id)
                    elif pp == 'poi_type':
                        outdata = np.array(new_poi_type)
                    else:
                        bandit_log.error(
                            'Unkown parameter, {}, with dimensions {}'.format(
                                pp, first_dimension))
                elif first_dimension in HRU_DIMS:
                    if pp == 'hru_deplcrv':
                        outdata = np.array(new_hru_deplcrv)
                    elif pp == 'hru_segment':
                        outdata = np.array(new_hru_segment)
                    else:
                        outdata = nhm_params.get_subset(pp, hru_order_subset)
                else:
                    bandit_log.error('No rules to handle dimension {}'.format(
                        first_dimension))
            elif ndims == 2:
                # 2D Parameters
                if first_dimension == 'nsegment':
                    outdata = nhm_params.get_subset(pp, new_nhm_seg)
                elif first_dimension in HRU_DIMS:
                    outdata = nhm_params.get_subset(pp, hru_order_subset)
                else:
                    err_txt = 'No rules to handle 2D parameter, {}, which contains dimension {}'
                    bandit_log.error(err_txt.format(pp, first_dimension))

            new_ps.parameters.get(src_param.name).data = outdata

    # Write the new parameter file
    header = [
        'Written by Bandit version {}'.format(__version__),
        'NhmParamDb revision: {}'.format(nhmparamdb_revision)
    ]
    if args.param_netcdf:
        base_filename = os.path.splitext(param_filename)[0]
        param_filename = '{}.nc'.format(base_filename)
        new_ps.write_netcdf('{}/{}'.format(outdir, param_filename))
    else:
        new_ps.write_parameter_file('{}/{}'.format(outdir, param_filename),
                                    header=header)

    ctl.get('param_file').values = param_filename

    if args.verbose:
        sys.stdout.write('\n')
    #     sys.stdout.write('\r                                       ')
    #     sys.stdout.write('\r\tParameter file written: {}\n'.format('{}/{}'.format(outdir, param_filename)))
    sys.stdout.flush()

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Write CBH files
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if output_cbh:
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Subset the cbh files for the selected HRUs
        if len(hru_order_subset) > 0:
            if args.verbose:
                print('Processing CBH files')

            if os.path.splitext(cbh_dir)[1] == '.nc':
                cbh_hdl = CbhNetcdf(src_path=cbh_dir,
                                    st_date=st_date,
                                    en_date=en_date,
                                    nhm_hrus=hru_order_subset)
            else:
                # Subset the hru_nhm_to_local mapping
                # TODO: This section will not work with the monolithic paramdb - remove
                hru_order_ss = OrderedDict()
                for kk in hru_order_subset:
                    hru_order_ss[kk] = hru_nhm_to_local[kk]

                cbh_hdl = CbhAscii(src_path=cbh_dir,
                                   st_date=st_date,
                                   en_date=en_date,
                                   nhm_hrus=hru_order_subset,
                                   indices=hru_order_ss,
                                   mapping=hru_nhm_to_region)

            if args.cbh_netcdf:
                # Pull the filename prefix off of the first file found in the
                # source netcdf CBH directory.
                file_it = glob.iglob(cbh_dir)
                cbh_prefix = os.path.basename(next(file_it)).split('_')[0]

                cbh_outfile = '{}/{}.nc'.format(outdir, cbh_prefix)
                cbh_hdl.write_netcdf(cbh_outfile)
                ctl.get('tmax_day').values = os.path.basename(cbh_outfile)
                ctl.get('tmin_day').values = os.path.basename(cbh_outfile)
                ctl.get('precip_day').values = os.path.basename(cbh_outfile)
            else:
                cbh_hdl.write_ascii()
            # bandit_log.info('{} written to: {}'.format(vv, '{}/{}.cbh'.format(outdir, vv)))
        else:
            bandit_log.error('No HRUs associated with the segments')

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Write output variables
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # 2019-08-07 PAN: first prototype for extractions of output variables
    if include_model_output:
        if len(hru_order_subset) > 0:
            try:
                os.makedirs(f'{outdir}/model_output')
                print(
                    'Creating directory model_output, for model output variables'
                )
            except OSError:
                print(
                    'Using existing model_output directory for output variables'
                )

            for vv in output_vars:
                if args.verbose:
                    sys.stdout.write(
                        '\r                                                  ')
                    sys.stdout.write(f'\rProcessing output variable: {vv} ')
                    sys.stdout.flush()

                filename = f'{output_vars_dir}/{vv}.nc'
                mod_out = ModelOutput(filename=filename,
                                      varname=vv,
                                      startdate=st_date,
                                      enddate=en_date,
                                      nhm_hrus=hru_order_subset)
                mod_out.write_csv(f'{outdir}/model_output')

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Write dynamic parameters
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if ctl.has_dynamic_parameters:
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Add dynamic parameters
        for cparam in ctl.dynamic_parameters:
            param_name = 'dyn_{}'.format(cparam)
            input_file = '{}/{}.nc'.format(dyn_params_dir, param_name)
            output_file = '{}/{}.param'.format(outdir, param_name)

            if not os.path.exists(input_file):
                warn_txt = 'WARNING: CONUS dynamic parameter file: {}, does not exist... skipping'
                bandit_log.warning(warn_txt.format(input_file))
            else:
                if args.verbose:
                    print('Writing dynamic parameter {}'.format(cparam))

                mydyn = dyn_params.DynamicParameters(input_file, cparam,
                                                     st_date, en_date,
                                                     hru_order_subset)
                # mydyn = dyn_params.DynamicParameters(input_file, cparam, st_date, en_date, hru_order_subset)

                mydyn.read_netcdf()
                out_order = [kk for kk in hru_order_subset]
                # out_order = [kk for kk in hru_order_subset]
                for cc in ['day', 'month', 'year']:
                    out_order.insert(0, cc)

                header = ' '.join(map(str, out_order))

                # Output ASCII files
                out_ascii = open(output_file, 'w')
                out_ascii.write('{}\n'.format(cparam))
                out_ascii.write('{}\n'.format(header))
                out_ascii.write('####\n')
                mydyn.data.to_csv(out_ascii,
                                  columns=out_order,
                                  na_rep='-999',
                                  sep=' ',
                                  index=False,
                                  header=False,
                                  encoding=None,
                                  chunksize=50)
                out_ascii.close()

    # Write an updated control file to the output directory
    ctl.write('{}.bandit'.format(control_filename))

    if output_streamflow:
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Download the streamgage information from NWIS
        if args.verbose:
            print('Downloading NWIS streamgage observations for {} stations'.
                  format(len(new_poi_gage_id)))

        streamflow = prms_nwis.NWIS(gage_ids=new_poi_gage_id,
                                    st_date=st_date,
                                    en_date=en_date,
                                    verbose=args.verbose)
        streamflow.get_daily_streamgage_observations()
        streamflow.write_prms_data(
            filename='{}/{}'.format(outdir, obs_filename))

    # *******************************************
    # Create a shapefile of the selected HRUs
    if output_shapefiles:
        if args.verbose:
            print('-' * 40)
            print('Writing shapefiles for model subset')

        if not os.path.isdir(geo_file):
            bandit_log.error(
                'File geodatabase, {}, does not exist. Shapefiles will not be created'
                .format(geo_file))
        else:
            geo_shp = prms_geo.Geo(geo_file)

            # Create GIS sub-directory if it doesn't already exist
            gis_dir = '{}/GIS'.format(outdir)
            try:
                os.makedirs(gis_dir)
            except OSError as exception:
                if exception.errno != errno.EEXIST:
                    raise
                else:
                    pass

            # Output a shapefile of the selected HRUs
            # print('\tHRUs')
            # geo_shp.select_layer('nhruNationalIdentifier')
            geo_shp.select_layer('nhru')
            # geo_shp.write_shapefile('{}/GIS/HRU_subset.shp'.format(outdir), 'hru_id_nat', hru_order_subset,
            geo_shp.write_shapefile('{}/GIS/HRU_subset.shp'.format(outdir),
                                    'hru_id_nat',
                                    hru_order_subset,
                                    included_fields=[
                                        'nhm_id', 'model_idx', 'region',
                                        'hru_id_nat'
                                    ])

            # geo_shp.write_shapefile3('{}/GIS/HRU_subset.gdb'.format(outdir), 'hru_id_nat', hru_order_subset)

            # geo_shp.filter_by_attribute('hru_id_nat', hru_order_subset)
            # geo_shp.write_shapefile2('{}/HRU_subset.shp'.format(outdir))
            # geo_shp.write_kml('{}/HRU_subset.kml'.format(outdir))

            # Output a shapefile of the selected stream segments
            # print('\tSegments')
            geo_shp.select_layer('nsegmentNationalIdentifier')
            geo_shp.write_shapefile(
                '{}/GIS/Segments_subset.shp'.format(outdir),
                'seg_id_nat',
                new_nhm_seg,
                included_fields=['seg_id_nat', 'model_idx', 'region'])

            # geo_shp.filter_by_attribute('seg_id_nat', uniq_seg_us)
            # geo_shp.write_shapefile2('{}/Segments_subset.shp'.format(outdir))

            del geo_shp

    bandit_log.info('========== END {} =========='.format(
        datetime.datetime.now().isoformat()))

    os.chdir(stdir)