Пример #1
0
 def write_dataset(self, dataset, attributes, out_storage):
     with block("Exporting %s dataset" % dataset.get_dataset_name()):
         if isinstance(out_storage, csv_storage):
             data = {}
             for attr in attributes:
                 data[VariableName(attr).get_alias()] = dataset[attr]
             dataframe = DataFrame(data)
             output_loc = out_storage.get_storage_location()
             if not os.path.exists(output_loc):
                 os.makedirs(output_loc)
             filename = dataset.out_table_name_default + '.csv'
             filepath = os.path.join(output_loc, filename)
             logger.log_status("export to %s" % filepath)
             dataframe.to_csv(filepath, cols=data.keys(), index=False)
         else:
             dataset.write_dataset(attributes=attributes,
                                   out_storage=out_storage)
Пример #2
0
def import_travel_model_data(config, year):

    cache_directory = config['cache_directory']
    simulation_state = SimulationState()
    simulation_state.set_current_time(year)
    simulation_state.set_cache_directory(cache_directory)
    out_store = AttributeCache().get_flt_storage_for_year(year+1)
    out_store_loc = out_store.get_storage_location()

    tm_config = config['travel_model_configuration']
    data_to_import = tm_config['tm_to_urbansim_variable_mapping'] 
    base_dir = mtc_common.tm_get_base_dir(config)
    data_dir = tm_config[year]['data_dir']

    for dataset_name, skim_file in data_to_import.iteritems():
        skim_file = os.path.join(base_dir, data_dir, skim_file)
        data = read_csv(skim_file, header=0)
        
        with block("Caching {} to {}".format(dataset_name, out_store_loc)):
            logger.log_status("Source file {}".format(skim_file))
            opus_ds = to_opus_dataset(data, out_store, dataset_name)
Пример #3
0
    from pandas import DataFrame
    df = {}
    for attr in opus_dataset.get_known_attribute_names():
        df[attr] = opus_dataset[attr]

    df = DataFrame(df)
    return df

units_df = {}
bldg_df = {}
for attr in units.get_known_attribute_names():
    units_df[attr] = units[attr]
for attr in bldg.get_known_attribute_names():
    bldg_df[attr] = bldg[attr]

with block('opus join'):
    results_opus = units.compute_variables('residential_unit.disaggregate(building.building_type_id)',
                            dataset_pool=dataset_pool)

units_df = DataFrame(units_df)
bldg_df = DataFrame(bldg_df)
with block('pandas join without index'):
    units_merged1 = merge(units_df, bldg_df[['building_id', 'building_type_id']], 
          on='building_id', sort=False, how='left')
    results_df1 = units_merged1['building_type_id'] 
    results_df1.fillna(value=-1, inplace=True)

bldg_df.set_index('building_id', inplace=True)
with block('pandas join with index'):
    units_merged2 = units_df.join(bldg_df['building_type_id'], on='building_id', how='left')
    results_df2 = units_merged2['building_type_id'] 
Пример #4
0
    def run(self, year=None, years_to_run=[], configuration=None):
        if year not in years_to_run or self.data_to_export == None:
            return

        cache_directory = configuration['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        package_order=configuration['dataset_pool_configuration'].package_order
        dataset_pool = SessionConfiguration(new_instance=True,
                                            package_order=package_order,
                                            in_storage=attribute_cache
                                            ).get_dataset_pool()
        out_dir = os.path.join(cache_directory, "mtc_data")

        out_storage = csv_storage(storage_location=out_dir)

        # Adjust the age distribution per ABAG/MTC's specifications
        age_control_dir = os.path.join(paths.OPUS_DATA_PATH, configuration['project_name'], "ageControl")
        age_control_storage = csv_storage(storage_location=age_control_dir)
        age_control_files = os.listdir(age_control_dir)
        years = np.array(map(lambda x:
                             int(os.path.basename(x).replace("tazData", "").replace(".csv", "")),
                             glob.glob(os.path.join(age_control_dir, "tazData*.csv"))))
        closest_year = years[np.argmin(np.abs(years - [year]*len(years)))]
        if closest_year != year:
            logger.log_warning("Could not find age control data for " + str(year) +
                               ".  Choosing nearest year " + str(closest_year) + ".")

        age_control_table = age_control_storage.load_table("tazData" + str(closest_year), lowercase=False)

        # Calculate the ABAG shares of person by age
        age_categories = ['AGE0004', 'AGE0519', 'AGE2044', 'AGE4564', 'AGE65P']
        age_category_sums = dict((k, age_control_table[k].sum()) for k in age_categories)
        total = sum(age_category_sums.values())
        abag_age_category_shares = dict((k, age_category_sums[k]/total) for k in age_categories)

        for data_fname, variable_mapping in self.data_to_export.iteritems():

            if not flip_urbansim_to_tm_variable_mappling:
                col_names = variable_mapping.values()
                variables_aliases = ["=".join(mapping[::-1]) for mapping in \
                                     variable_mapping.iteritems()]
            else:
                col_names = variable_mapping.keys()
                variables_aliases = ["=".join(mapping) for mapping in \
                                     variable_mapping.iteritems()]

            dataset_name = VariableName(variables_aliases[0]).get_dataset_name()
            dataset = dataset_pool.get_dataset(dataset_name)
            dataset.compute_variables(variables_aliases)

            if data_fname == "ABAGData":
                logger.log_status("Adjusting ABAGData to match age controls")
                age_category_sums = dict((k, dataset[k].sum()) for k in age_categories)
                total = sum(age_category_sums.values())
                us_age_category_shares = dict((k, age_category_sums[k]/total) for k in age_categories)
                adjustments = dict((k, abag_age_category_shares[k]/us_age_category_shares[k]) for k in age_categories)
                diff = np.zeros(dataset.n)
                for k in age_categories:
                    before = dataset[k]
                    dataset[k] = np.array(map(lambda v : round(v*adjustments[k]), dataset.get_attribute(k)))
                    diff += (dataset[k] - before)
                dataset["TOTPOP"] += diff
                dataset["HHPOP"] += diff
                logger.log_status("NOTE: Adjusted total population by %d (%2.3f%%) due to rounding error." %
                                  (int(diff.sum()), diff.sum()*100/total))

            org_fname = os.path.join(out_dir, "%s.computed.csv" % data_fname)
            new_fname = os.path.join(out_dir, "%s%s.csv" % (year,data_fname))
            block_msg = "Writing {} for travel model to {}".format(data_fname,
                                                                   new_fname)
            with block(block_msg):
                dataset.write_dataset(attributes=col_names,
                                    out_storage=out_storage,
                                    out_table_name=data_fname)
                #rename & process header
                shutil.move(org_fname, new_fname)
                os.system("sed 's/:[a-z][0-9]//g' -i %s" % new_fname)