示例#1
0
def get_fm_file(file_name, download_if_missing=False):
    """Read facilitymatcher file, if not present, generate it.
    :param file_name: str, can be 'FacilityMatchList_forStEWI' or
        'FRS_NAICSforStEWI'
    :param download_if_missing: bool, if True will attempt to load from
        remote server prior to generating if file not found locally
    """
    file_meta = set_facilitymatcher_meta(file_name, category='')
    df = load_preprocessed_output(file_meta, paths)
    if df is None:
        log.info(f'{file_name} not found in {output_dir}, '
                 'writing facility matches to file')
        if download_if_missing:
            download_from_remote(file_meta, paths)
        elif file_name == 'FacilityMatchList_forStEWI':
            write_fm.write_facility_matches()
        elif file_name == 'FRS_NAICSforStEWI':
            write_naics.write_NAICS_matches()
        df = load_preprocessed_output(file_meta, paths)
    col_dict = {"FRS_ID": "str",
                "FacilityID": "str",
                "NAICS": "str"}
    for k, v in col_dict.items():
        if k in df:
            df[k] = df[k].astype(v)
    return df
示例#2
0
def standardize_output(year, source='Point'):
    """Read and parses NEI data.

    :param year : str, Year of NEI dataset
    :returns nei: DataFrame of parsed NEI data.
    """
    nei = pd.DataFrame()
    # read in nei files and concatenate all nei files into one dataframe
    nei_file_path = _config[year]['file_name']
    for file in nei_file_path:
        filename = OUTPUT_PATH.joinpath(file)
        if not filename.is_file():
            log.info(f'{file} not found in {OUTPUT_PATH}, '
                     'downloading source data')
            # download source file and metadata
            file_meta = set_stewi_meta(strip_file_extension(file))
            file_meta.category = EXT_DIR
            file_meta.tool = file_meta.tool.lower()
            download_from_remote(file_meta, paths)
        # concatenate all other files
        log.info(f'reading NEI data from {filename}')
        nei = pd.concat([nei, read_data(year, filename)])
        log.debug(f'{str(len(nei))} records')
    # convert TON to KG
    nei['FlowAmount'] = nei['FlowAmount'] * USton_kg

    log.info('adding Data Quality information')
    if source == 'Point':
        nei_reliability_table = get_reliability_table_for_source('NEI')
        nei_reliability_table['Code'] = nei_reliability_table['Code'].astype(
            float)
        nei['ReliabilityScore'] = nei['ReliabilityScore'].astype(float)
        nei = nei.merge(nei_reliability_table,
                        left_on='ReliabilityScore',
                        right_on='Code',
                        how='left')
        nei['DataReliability'] = nei['DQI Reliability Score']
        # drop Code and DQI Reliability Score columns
        nei = nei.drop(
            columns=['Code', 'DQI Reliability Score', 'ReliabilityScore'])

        nei['Compartment'] = 'air'
        """
        # Modify compartment based on stack height (ft)
        nei.loc[nei['StackHeight'] < 32, 'Compartment'] = 'air/ground'
        nei.loc[(nei['StackHeight'] >= 32) & (nei['StackHeight'] < 164),
                'Compartment'] = 'air/low'
        nei.loc[(nei['StackHeight'] >= 164) & (nei['StackHeight'] < 492),
                'Compartment'] = 'air/high'
        nei.loc[nei['StackHeight'] >= 492, 'Compartment'] = 'air/very high'
        """
    else:
        nei['DataReliability'] = 3
    # add Source column
    nei['Source'] = source
    nei.reset_index(drop=True, inplace=True)
    return nei
示例#3
0
def getFlowByActivity(datasource,
                      year,
                      flowclass=None,
                      geographic_level=None,
                      download_if_missing=DEFAULT_DOWNLOAD_IF_MISSING):
    """
    Retrieves stored data in the FlowByActivity format
    :param datasource: str, the code of the datasource.
    :param year: int, a year, e.g. 2012
    :param flowclass: str, a 'Class' of the flow. Optional. E.g. 'Water'
    :param geographic_level: str, a geographic level of the data.
                             Optional. E.g. 'national', 'state', 'county'.
    :param download_if_missing: bool, if True will attempt to load from remote server
        prior to generating if file not found locally
    :return: a pandas DataFrame in FlowByActivity format
    """
    from esupy.processed_data_mgmt import download_from_remote
    # Set fba metadata
    name = flowsa.flowbyactivity.set_fba_name(datasource, year)
    fba_meta = set_fb_meta(name, "FlowByActivity")

    # Try to load a local version of fba; generate and load if missing
    fba = load_preprocessed_output(fba_meta, paths)
    # Remote download
    if fba is None and download_if_missing:
        log.info('%s %s not found in %s, downloading from remote source',
                 datasource, str(year), fbaoutputpath)
        download_from_remote(fba_meta, paths)
        fba = load_preprocessed_output(fba_meta, paths)

    if fba is None:
        log.info('%s %s not found in %s, running functions to generate FBA',
                 datasource, str(year), fbaoutputpath)
        # Generate the fba
        flowsa.flowbyactivity.main(year=year, source=datasource)
        # Now load the fba
        fba = load_preprocessed_output(fba_meta, paths)
        if fba is None:
            log.error('getFlowByActivity failed, FBA not found')
        else:
            log.info('Loaded %s %s from %s', datasource, str(year),
                     fbaoutputpath)
    else:
        log.info('Loaded %s %s from %s', datasource, str(year), fbaoutputpath)

    # Address optional parameters
    if flowclass is not None:
        fba = fba[fba['Class'] == flowclass]
    # if geographic level specified, only load rows in geo level
    if geographic_level is not None:
        fba = filter_by_geoscale(fba, geographic_level)
    return fba
示例#4
0
def getFlowBySector(methodname,
                    download_FBAs_if_missing=DEFAULT_DOWNLOAD_IF_MISSING,
                    download_FBS_if_missing=DEFAULT_DOWNLOAD_IF_MISSING):
    """
    Loads stored FlowBySector output or generates it if it doesn't exist,
    then loads
    :param methodname: string, Name of an available method for the given class
    :param download_FBAs_if_missing: bool, if True will attempt to load FBAS
        used in generating the FBS from remote server prior to generating if
        file not found locally
    :param download_FBS_if_missing: bool, if True will attempt to load from
        remote server prior to generating if file not found locally
    :return: dataframe in flow by sector format
    """
    fbs_meta = set_fb_meta(methodname, "FlowBySector")
    # Try to load a local version of the FBS
    fbs = load_preprocessed_output(fbs_meta, paths)
    # If that didn't work, try to download a remote version of FBS
    if fbs is None and download_FBS_if_missing:
        log.info('%s not found in %s, downloading from remote source',
                 methodname, fbsoutputpath)
        # download and load the FBS parquet
        subdirectory_dict = {'.log': 'Log'}
        download_from_remote(fbs_meta,
                             paths,
                             subdirectory_dict=subdirectory_dict)
        fbs = load_preprocessed_output(fbs_meta, paths)
    # If that didn't work or wasn't allowed, try to construct the FBS
    if fbs is None:
        log.info('%s not found in %s, running functions to generate FBS',
                 methodname, fbsoutputpath)
        # Generate the fbs, with option to download any required FBAs from
        # Data Commons
        flowsa.flowbysector.main(
            method=methodname,
            download_FBAs_if_missing=download_FBAs_if_missing)
        # Now load the fbs
        fbs = load_preprocessed_output(fbs_meta, paths)
    # If none of the above worked, log an error message
    if fbs is None:
        log.error('getFlowBySector failed, FBS not found')
    # Otherwise (that is, if one of the above methods successfuly loaded the
    # FBS), log it.
    else:
        log.info('Loaded %s from %s', methodname, fbsoutputpath)
    return fbs
示例#5
0
def read_inventory(inventory_acronym, year, f, download_if_missing=False):
    """Return the inventory from local directory. If not found, generate it.

    :param inventory_acronym: like 'TRI'
    :param year: year as number like 2010
    :param f: object of class StewiFormat
    :param download_if_missing: bool, if True will attempt to load from
        remote server prior to generating if file not found locally
    :return: dataframe of stored inventory; if not present returns None
    """
    file_name = inventory_acronym + '_' + str(year)
    meta = set_stewi_meta(file_name, str(f))
    inventory = load_preprocessed_output(meta, paths)
    method_path = paths.local_path + '/' + meta.category
    if inventory is None:
        log.info(f'{meta.name_data} not found in {method_path}')
        if download_if_missing:
            meta.tool = meta.tool.lower()  # lower case for remote access
            download_from_remote(meta, paths)
            # download metadata file
            metadata_meta = copy.copy(meta)
            metadata_meta.category = ''
            metadata_meta.ext = 'json'
            download_from_remote(metadata_meta, paths)
        else:
            log.info('requested inventory does not exist in local directory, '
                     'it will be generated...')
            generate_inventory(inventory_acronym, year)
        inventory = load_preprocessed_output(meta, paths)
        if inventory is None:
            log.error('error generating inventory')
    if inventory is not None:
        log.info(f'loaded {meta.name_data} from {method_path}')
        # ensure dtypes
        fields = f.field_types()
        fields = {
            key: value
            for key, value in fields.items() if key in list(inventory)
        }
        inventory = inventory.astype(fields)
    return inventory
示例#6
0
def getFlowBySector(methodname,
                    download_if_missing=DEFAULT_DOWNLOAD_IF_MISSING):
    """
    Loads stored FlowBySector output or generates it if it doesn't exist, then loads
    :param methodname: string, Name of an available method for the given class
    :param download_if_missing: bool, if True will attempt to load from remote server
        prior to generating if file not found locally
    :return: dataframe in flow by sector format
    """
    from esupy.processed_data_mgmt import download_from_remote

    fbs_meta = set_fb_meta(methodname, "FlowBySector")
    fbs = load_preprocessed_output(fbs_meta, paths)

    # Remote download
    if fbs is None and download_if_missing:
        log.info('%s not found in %s, downloading from remote source',
                 methodname, fbsoutputpath)
        # download and load the FBS parquet
        subdirectory_dict = {'.log': 'Log'}
        download_from_remote(fbs_meta,
                             paths,
                             subdirectory_dict=subdirectory_dict)
        fbs = load_preprocessed_output(fbs_meta, paths)

    # If remote download not specified and no FBS, generate the FBS
    if fbs is None:
        log.info('%s not found in %s, running functions to generate FBS',
                 methodname, fbsoutputpath)
        # Generate the fba
        flowsa.flowbysector.main(method=methodname)
        # Now load the fba
        fbs = load_preprocessed_output(fbs_meta, paths)
        if fbs is None:
            log.error('getFlowBySector failed, FBS not found')
        else:
            log.info('Loaded %s from %s', methodname, fbsoutputpath)
    else:
        log.info('Loaded %s from %s', methodname, fbsoutputpath)
    return fbs
示例#7
0
def download_stewicombo_from_remote(name):
    """Prepare metadata and download file via esupy."""
    meta = set_stewicombo_meta(name, category='')
    log.info(f'attempting download of {name} from {paths.remote_path}')
    download_from_remote(meta, paths)