def test_mnl_estimation(obs, alts): """ Confirm that estimated params from the new interface match urbansim.urbanchoice. Only runs if the urbansim package has been installed. """ try: from urbansim.urbanchoice.mnl import mnl_estimate except: print( "Comparison of MNL estimation results skipped because urbansim is not installed" ) return model_expression = 'obsval + altval - 1' mct = MergedChoiceTable(obs, alts, 'choice') # new interface m = MultinomialLogit(mct, model_expression) r = m.fit().get_raw_results() # old interface dm = dmatrix(model_expression, mct.to_frame()) chosen = np.reshape(mct.to_frame()[mct.choice_col].values, (100, 5)) log_lik, fit = mnl_estimate(np.array(dm), chosen, numalts=5) for k, v in log_lik.items(): assert (v == pytest.approx(r['log_likelihood'][k], 0.00001)) assert_frame_equal( fit, r['fit_parameters'][['Coefficient', 'Std. Error', 'T-Score']])
def test_mnl_estimation(obs, alts): """ Confirm that estimated params from the new interface match urbansim.urbanchoice. Only runs if the urbansim package has been installed. """ try: from urbansim.urbanchoice.mnl import mnl_estimate except: print("Comparison of MNL estimation results skipped because urbansim is not installed") return model_expression = 'obsval + altval - 1' mct = MergedChoiceTable(obs, alts, 'choice') # new interface m = MultinomialLogit(mct, model_expression) r = m.fit().get_raw_results() # old interface dm = dmatrix(model_expression, mct.to_frame()) chosen = np.reshape(mct.to_frame()[mct.choice_col].values, (100, 5)) log_lik, fit = mnl_estimate(np.array(dm), chosen, numalts=5) for k,v in log_lik.items(): assert(v == pytest.approx(r['log_likelihood'][k], 0.00001)) assert_frame_equal(fit, r['fit_parameters'][['Coefficient', 'Std. Error', 'T-Score']])
def test_mnl_prediction(obs, alts): """ Confirm that fitted probabilities in the new codebase match urbansim.urbanchoice. Only runs if the urbansim package has been installed. """ try: from urbansim.urbanchoice.mnl import mnl_simulate except: print("Comparison of MNL simulation results skipped because urbansim is not installed") return # produce a fitted model mct = MergedChoiceTable(obs, alts, 'choice', 5) m = MultinomialLogit(mct, model_expression='obsval + altval - 1') results = m.fit() # get predicted probabilities using choicemodels probs1 = results.probabilities(mct) # compare to probabilities from urbansim.urbanchoice dm = dmatrix(results.model_expression, data=mct.to_frame(), return_type='dataframe') probs = mnl_simulate(data=dm, coeff=results.fitted_parameters, numalts=mct.sample_size, returnprobs=True) df = mct.to_frame() df['prob'] = probs.flatten() probs2 = df.prob pd.testing.assert_series_equal(probs1, probs2)
def mct(obs, alts, intx_ops=None): this_mct = MergedChoiceTable(obs, alts, sample_size=self.alt_sample_size, interaction_terms=interaction_terms) if intx_ops: this_mct = self.perform_mct_intx_ops(this_mct) this_mct.sample_size = self.alt_sample_size return this_mct
def test_no_choosers(obs, alts): """ Empty observations should produce empty choice table. """ mct = MergedChoiceTable(pd.DataFrame(), alts).to_frame() assert len(mct) == 0
def test_no_alternatives(obs, alts): """ Empty alternatives should produce empty choice table. """ mct = MergedChoiceTable(obs, pd.DataFrame()).to_frame() assert len(mct) == 0
def test_mnl(obs, alts): """ Confirm that MNL estimation runs, using the native estimator. """ model_expression = 'obsval + altval - 1' mct = MergedChoiceTable(obs, alts, 'choice') m = MultinomialLogit(mct, model_expression) print(m.fit())
def test_join_key_name_conflict(obs, alts): """ Duplicate column names are not allowed, except for the join key -- it's fine for the chosen_alternatives column in the observations to have the same name as the index of the alternatives. This test should run without raising an error. """ obs[alts.index.name] = obs.choice MergedChoiceTable(obs, alts, chosen_alternatives=alts.index.name)
def test_dupe_column(obs, alts): """ Duplicate column names should raise an error. """ obs['save_the_whales'] = None alts['save_the_whales'] = None try: MergedChoiceTable(obs, alts) except ValueError as e: print(e)
def test_mnl_estimation(obs, alts): """ Confirm that estimated params from the new interface match urbansim.urbanchoice. """ model_expression = 'obsval + altval - 1' mct = MergedChoiceTable(obs, alts, 'choice') # new interface m = MultinomialLogit(mct, model_expression) r = m.fit().get_raw_results() # old interface dm = dmatrix(model_expression, mct.to_frame()) chosen = np.reshape(mct.to_frame()[mct.choice_col].values, (100, 5)) log_lik, fit = mnl_estimate(np.array(dm), chosen, numalts=5) for k,v in log_lik.items(): assert(v == pytest.approx(r['log_likelihood'][k], 0.00001)) assert_frame_equal(fit, r['fit_parameters'][['Coefficient', 'Std. Error', 'T-Score']])
def test_multiple_dupe_columns(obs, alts): """ Duplicate column names should raise an error. This covers the case of multiple columns, and the case of an index conflicting with a non-index. """ obs['save_the_whales'] = None alts['save_the_whales'] = None alts[obs.index.name] = None try: MergedChoiceTable(obs, alts) except ValueError as e: print(e)
def test_mnl_prediction(obs, alts): """ Confirm that fitted probabilities in the new codebase match urbansim.urbanchoice. Only runs if the urbansim package has been installed. """ try: from urbansim.urbanchoice.mnl import mnl_simulate except: print( "Comparison of MNL simulation results skipped because urbansim is not installed" ) return # produce a fitted model mct = MergedChoiceTable(obs, alts, 'choice', 5) m = MultinomialLogit(mct, model_expression='obsval + altval - 1') results = m.fit() # get predicted probabilities using choicemodels probs1 = results.probabilities(mct) # compare to probabilities from urbansim.urbanchoice dm = dmatrix(results.model_expression, data=mct.to_frame(), return_type='dataframe') probs = mnl_simulate(data=dm, coeff=results.fitted_parameters, numalts=mct.sample_size, returnprobs=True) df = mct.to_frame() df['prob'] = probs.flatten() probs2 = df.prob pd.testing.assert_series_equal(probs1, probs2)
def fit(self): """ Fit the model; save and report results. This uses the ChoiceModels estimation engine (originally from UrbanSim MNL). The `fit()` method can be run as many times as desired. Results will not be saved with Orca or ModelManager until the `register()` method is run. """ # TO DO - update choicemodels to accept a column name for chosen alts observations = self._get_df(tables=self.choosers, filters=self.chooser_filters) chosen = observations[self.choice_column] alternatives = self._get_df(tables=self.alternatives, filters=self.alt_filters) data = MergedChoiceTable(observations=observations, alternatives=alternatives, chosen_alternatives=chosen, sample_size=self._get_alt_sample_size()) model = MultinomialLogit(data=data.to_frame(), observation_id_col=data.observation_id_col, choice_col=data.choice_col, model_expression=self.model_expression) results = model.fit() self.name = self._generate_name() self.summary_table = str(results) print(self.summary_table) # For now, just save the summary table and fitted parameters coefs = results.get_raw_results()['fit_parameters']['Coefficient'] self.fitted_parameters = coefs.tolist()
def test_mnl_prediction(obs, alts): """ Confirm that fitted probabilities in the new codebase match urbansim.urbanchoice. """ # produce a fitted model mct = MergedChoiceTable(obs, alts, 'choice', 5) m = MultinomialLogit(mct, model_expression='obsval + altval - 1') results = m.fit() # get predicted probabilities using choicemodels probs1 = results.probabilities(mct) # compare to probabilities from urbansim.urbanchoice dm = dmatrix(results.model_expression, data=mct.to_frame(), return_type='dataframe') probs = mnl_simulate(data=dm, coeff=results.fitted_parameters, numalts=mct.sample_size, returnprobs=True) df = mct.to_frame() df['prob'] = probs.flatten() probs2 = df.prob pd.testing.assert_series_equal(probs1, probs2)
def run(self): """ Run the model step: calculate simulated choices and use them to update a column. Predicted probabilities and simulated choices come from ChoiceModels. For now, the choices are unconstrained (any number of choosers can select the same alternative). The predicted probabilities and simulated choices are saved to the class object for interactive use (`probabilities` with type pd.DataFrame, and `choices` with type pd.Series) but are not persisted in the dictionary representation of the model step. """ observations = self._get_df(tables=self.out_choosers, fallback_tables=self.choosers, filters=self.out_chooser_filters) alternatives = self._get_df(tables=self.out_alternatives, fallback_tables=self.alternatives, filters=self.out_alt_filters) numalts = self._get_alt_sample_size() mct = MergedChoiceTable(observations=observations, alternatives=alternatives, sample_size=numalts) mct_df = mct.to_frame() # Data columns need to align with the coefficients dm = patsy.dmatrix(self.model_expression, data=mct_df, return_type='dataframe') # Get probabilities and choices probs = mnl.mnl_simulate(data=dm, coeff=self.fitted_parameters, numalts=numalts, returnprobs=True) # TO DO - this ends up recalculating the probabilities because there's not # currently a code path to get both at once - fix this) choice_positions = mnl.mnl_simulate(data=dm, coeff=self.fitted_parameters, numalts=numalts, returnprobs=False) ids = mct_df[mct.alternative_id_col].tolist() choices = self._get_chosen_ids(ids, choice_positions) # Save results to the class object (via df to include indexes) mct_df['probability'] = np.reshape(probs, (probs.size, 1)) self.probabilities = mct_df[[ mct.observation_id_col, mct.alternative_id_col, 'probability' ]] observations['choice'] = choices self.choices = observations.choice # Update Orca if self.out_choosers is not None: table = orca.get_table(self.out_choosers) else: table = orca.get_table(self.choosers) if self.out_column is not None: column = self.out_column else: column = self.choice_column table.update_col_from_series(column, observations.choice, cast=True) # Print a message about limited usage print( "Warning: choices are unconstrained; additional functionality in progress" )
def fitted_model(obs, alts): mct = MergedChoiceTable(obs, alts, 'choice', sample_size=5) m = MultinomialLogit(mct, model_expression='obsval + altval - 1') return m.fit()
def mct_callable(obs, alts): return MergedChoiceTable(obs, alts, sample_size=10)
def mct(obs, alts): return MergedChoiceTable(obs, alts, sample_size=self.alt_sample_size, interaction_terms=interaction_terms)
def fit(self, mct=None): """ Fit the model; save and report results. This uses the ChoiceModels estimation engine (originally from UrbanSim MNL). The `fit()` method can be run as many times as desired. Results will not be saved with Orca or ModelManager until the `register()` method is run. After sampling alternatives for each chooser, the merged choice table is saved to the class object for diagnostic use (`mergedchoicetable` with type choicemodels.tools.MergedChoiceTable). Parameters ---------- mct : choicemodels.tools.MergedChoiceTable This parameter is a temporary backdoor allowing us to pass in a more complicated choice table than can be generated within the template, for example including sampling weights or interaction terms. Returns ------- None """ check_choicemodels_version() from choicemodels import MultinomialLogit from choicemodels.tools import MergedChoiceTable if (mct is not None): df_from_mct = mct.to_frame() idx_names = df_from_mct.index.names df_from_mct = df_from_mct.reset_index() df_from_mct = apply_filter_query( df_from_mct, self.chooser_filters).set_index(idx_names) mct = MergedChoiceTable.from_df(df_from_mct) else: observations = get_data(tables=self.choosers, filters=self.chooser_filters, model_expression=self.model_expression, extra_columns=self.choice_column) if (self.chooser_sample_size is not None): observations = observations.sample(self.chooser_sample_size) alternatives = get_data(tables=self.alternatives, filters=self.alt_filters, model_expression=self.model_expression) mct = MergedChoiceTable(observations=observations, alternatives=alternatives, chosen_alternatives=self.choice_column, sample_size=self.alt_sample_size) model = MultinomialLogit(data=mct, model_expression=self.model_expression) results = model.fit() self.name = self._generate_name() self.summary_table = str(results) print(self.summary_table) coefs = results.get_raw_results()['fit_parameters']['Coefficient'] self.fitted_parameters = coefs.tolist() self.model = results # Save merged choice table to the class object for diagnostics self.mergedchoicetable = mct
def perform_mct_intx_ops(self, mct, nan_handling='zero'): """ Method to dynamically update a MergedChoiceTable object according to a pre-defined set of operations specified in the model .yaml config. Operations are performed sequentially as follows: 1) Pandas merges with other Orca tables; 2) Pandas group-by aggregations; 3) rename existing columns; 4) create new columns via Pandas `eval()`. Parameters ---------- mct : choicemodels.tools.MergedChoiceTable nan_handling : str Either 'zero' or 'drop', where the former will replace all NaN's and None's with 0 integers and the latter will drop all rows with any NaN or Null values. Returns ------- MergedChoiceTable """ intx_ops = self.mct_intx_ops mct_df = mct.to_frame() og_mct_index = mct_df.index.names mct_df.reset_index(inplace=True) mct_df.index.name = 'mct_index' # merges intx_df = mct_df.copy() for merge_args in intx_ops.get('successive_merges', []): # make sure mct index is preserved during merge left_cols = merge_args.get('mct_cols', intx_df.columns) left_idx = merge_args.get('left_index', False) if intx_df.index.name == mct_df.index.name: if not left_idx: intx_df.reset_index(inplace=True) if mct_df.index.name not in left_cols: left_cols += [mct_df.index.name] elif mct_df.index.name in intx_df.columns: if mct_df.index.name not in left_cols: left_cols += [mct_df.index.name] else: raise KeyError( 'Column {0} must be preserved in intx ops!'.format( mct_df.index.name)) left = intx_df[left_cols] right = get_data(merge_args['right_table'], extra_columns=merge_args.get('right_cols', None)) intx_df = pd.merge(left, right, how=merge_args.get('how', 'inner'), on=merge_args.get('on_cols', None), left_on=merge_args.get('left_on', None), right_on=merge_args.get('right_on', None), left_index=left_idx, right_index=merge_args.get( 'right_index', False), suffixes=merge_args.get('suffixes', ('_x', '_y'))) # aggs aggs = intx_ops.get('aggregations', False) if aggs: intx_df = intx_df.groupby('mct_index').agg(aggs) # rename cols if intx_ops.get('rename_cols', False): intx_df = intx_df.rename(columns=intx_ops['rename_cols']) # update mct mct_df = pd.merge(mct_df, intx_df, on='mct_index') # create new cols from expressions for eval_op in intx_ops.get('sequential_eval_ops', []): new_col = eval_op['name'] expr = eval_op['expr'] engine = eval_op.get('engine', 'numexpr') mct_df[new_col] = mct_df.eval(expr, engine=engine) # restore original mct index mct_df.set_index(og_mct_index, inplace=True) # handle NaNs and Nones if mct_df.isna().values.any(): if nan_handling == 'zero': print("Replacing MCT None's and NaN's with 0") mct_df = mct_df.fillna(0) elif nan_handling == 'drop': print("Dropping rows with None's/NaN's from MCT") mct_df = mct_df.dropna(axis=0) return MergedChoiceTable.from_df(mct_df)
from choicemodels.tools import MergedChoiceTable from collections import OrderedDict tracts = pd.read_csv('../data/tracts.csv').set_index('full_tract_id') trips = pd.read_csv('../data/trips.csv').set_index('place_id') pd.set_option('display.float_format', lambda x: '%.3f' % x) choosers = trips.loc[np.random.choice(trips.index, 500, replace=False)] choosers = choosers.loc[choosers.trip_distance_miles.notnull()] numalts = 10 merged = MergedChoiceTable(observations = choosers, alternatives = tracts, chosen_alternatives = choosers.full_tract_id, sample_size = numalts) model_expression = "home_density + work_density + school_density" model = MultinomialLogit(merged.to_frame(), merged.observation_id_col, merged.choice_col, model_expression) results = model.fit() results.report_fit() """ model_expression = OrderedDict([('home_density', 'all_same'),
def fit(self, mct=None): """ Fit the model; save and report results. This uses the ChoiceModels estimation engine (originally from UrbanSim MNL). The `fit()` method can be run as many times as desired. Results will not be saved with Orca or ModelManager until the `register()` method is run. After sampling alternatives for each chooser, the merged choice table is saved to the class object for diagnostic use (`mergedchoicetable` with type choicemodels.tools.MergedChoiceTable). Parameters ---------- mct : choicemodels.tools.MergedChoiceTable This parameter is a temporary backdoor allowing us to pass in a more complicated merged choice table than can be generated within the template, for example including sampling weights or interaction terms. This will work for model estimation, but is not yet hooked up to the prediction functionality. Returns ------- None """ if (mct is not None): data = mct else: # TO DO - update choicemodels to accept a column name for chosen alts observations = self._get_df(tables=self.choosers, filters=self.chooser_filters) if (self.chooser_sample_size is not None): observations = observations.sample(self.chooser_sample_size) chosen = observations[self.choice_column] alternatives = self._get_df(tables=self.alternatives, filters=self.alt_filters) data = MergedChoiceTable(observations=observations, alternatives=alternatives, chosen_alternatives=chosen, sample_size=self._get_alt_sample_size()) model = MultinomialLogit(data=data.to_frame(), observation_id_col=data.observation_id_col, choice_col=data.choice_col, model_expression=self.model_expression) results = model.fit() self.name = self._generate_name() self.summary_table = str(results) print(self.summary_table) # For now, just save the summary table and fitted parameters coefs = results.get_raw_results()['fit_parameters']['Coefficient'] self.fitted_parameters = coefs.tolist() # Save merged choice table to the class object for diagnostic use self.mergedchoicetable = data