def test_balance_multi_trust_initial(self): hh_table, A, w, mu, _ = self._mock_list_inconsistent() B = np.mat(np.dot(np.ones((1, 1)), A)[0]) gamma = 1. hh_weights = listbalancer.balance_multi_cvx(hh_table, A, B, w, gamma * mu.T) np.testing.assert_allclose(hh_weights, w, rtol=0.05, atol=0)
def _allocate_households(households, persons, tract_controls): # Only take nonzero weights households = households[households[inputs.HOUSEHOLD_WEIGHT.name] > 0] # Initial weights from PUMS w = households[inputs.HOUSEHOLD_WEIGHT.name].as_matrix().T allocation_inputs = [inputs.NUM_PEOPLE, inputs.NUM_VEHICLES] # Hard-coded for now # Prepend column name to bin name to prevent bin collision hh_columns = [] for a_input in allocation_inputs: subset_values = households[a_input.name].unique().tolist() hh_columns += HouseholdAllocator._str_broadcast(a_input.name, subset_values) hh_columns = HouseholdAllocator._filter_sparse_columns(households, hh_columns) hh_table = households[hh_columns].as_matrix() A = tract_controls.data[hh_columns].as_matrix() n_tracts, n_controls = A.shape n_samples = len(households.index.values) # Control importance weights # < 1 means not important (thus relaxing the constraint in the solver) mu = np.mat([1] * n_controls) w_extend = np.tile(w, (n_tracts, 1)) mu_extend = np.mat(np.tile(mu, (n_tracts, 1))) B = np.mat(np.dot(np.ones((1, n_tracts)), A)[0]) # Our trade-off coefficient gamma # Low values (~1) mean we trust our initial weights, high values # (~10000) mean want to fit the marginals. gamma = 100. # Meta-balancing coefficient meta_gamma = 100. hh_weights = balance_multi_cvx( hh_table, A, B, w_extend, gamma * mu_extend.T, meta_gamma ) # We're running discretization independently for each tract tract_ids = tract_controls.data['TRACTCE'].values total_weights = np.zeros(hh_weights.shape) sample_weights_int = hh_weights.astype(int) discretized_hh_weights = discretize_multi_weights(hh_table, hh_weights) total_weights = sample_weights_int + discretized_hh_weights # Extend households and add the weights and ids households_extend = pandas.concat([households] * n_tracts) households_extend[inputs.COUNT.name] = total_weights.flatten().T tracts = np.repeat(tract_ids, n_samples) households_extend[inputs.TRACT.name] = tracts return households_extend, persons
def test_balance_multi_trust_controls(self): hh_table, A, w, mu, expected_weights = self._mock_list_consistent() B = np.mat(np.dot(np.ones((1, 1)), A)[0]) gamma = 100000. hh_weights, _, _ = listbalancer.balance_multi_cvx( hh_table, A, B, w, gamma * mu.T ) np.testing.assert_allclose( hh_weights, expected_weights.T, rtol=0.05, atol=0)
def test_balance_multi_zero_marginal(self): hh_table, A, w, mu, expected_weights = \ self._mock_list_infeasible_marginal() n_tracts = A.shape[0] B = np.mat(np.dot(np.ones((1, n_tracts)), A)[0]) gamma = 10000. hh_weights, _, _ = listbalancer.balance_multi_cvx( hh_table, A, B, w, gamma * mu.T ) np.testing.assert_allclose( hh_weights, expected_weights, rtol=0.05, atol=0)
def _allocate_households(households, persons, tract_controls): # Only take nonzero weights households = households[households[inputs.HOUSEHOLD_WEIGHT.name] > 0] # Initial weights from PUMS w = households[inputs.HOUSEHOLD_WEIGHT.name].as_matrix().T hh_columns = ['1', '2', '3', '4+'] hh_table = households[hh_columns].as_matrix() A = tract_controls.data[hh_columns].as_matrix() n_tracts, n_controls = A.shape n_samples = len(households.index.values) # Control importance weights # < 1 means not important (thus relaxing the contraint in the solver) mu = np.mat([1] * n_controls) w_extend = np.tile(w, (n_tracts, 1)) mu_extend = np.mat(np.tile(mu, (n_tracts, 1))) B = np.mat(np.dot(np.ones((1, n_tracts)), A)[0]) # Our trade-off coefficient gamma # Low values (~1) mean we trust our initial weights, high values # (~10000) mean want to fit the marginals. gamma = 100. # Meta-balancing coefficient meta_gamma = 100. hh_weights, z, q = balance_multi_cvx(hh_table, A, B, w_extend, gamma * mu_extend.T, meta_gamma) # We're running discretization independently for each tract tract_ids = tract_controls.data['TRACTCE'].values total_weights = np.zeros(hh_weights.shape) sample_weights_int = hh_weights.astype(int) discretized_hh_weights = discretize_multi_weights(hh_table, hh_weights) total_weights = sample_weights_int + discretized_hh_weights # Extend households and add the weights and ids households_extend = pandas.concat([households] * n_tracts) households_extend['count'] = total_weights.flatten().T tracts = np.repeat(tract_ids, n_samples) households_extend['tract'] = tracts return households_extend, persons
def test_balance_multi_cvx_infeasible(self): hh_table, A, w, mu, expected_weights = self._mock_list_infeasible() # Extend the data n_tracts = 10 A_extend = np.mat(np.tile(A, (n_tracts, 1))) w_extend = np.mat(np.tile(w, (n_tracts, 1))) expected_weights_extend = np.mat(np.tile(expected_weights, (n_tracts, 1))) mu_extend = np.mat(np.tile(mu, (n_tracts, 1))) B = np.mat(np.dot(np.ones((1, n_tracts)), A_extend)[0]) gamma = 10. meta_gamma = 1000. hh_weights, _, _ = listbalancer.balance_multi_cvx( hh_table, A_extend, B, w_extend, gamma * mu_extend.T, meta_gamma ) np.testing.assert_allclose( hh_weights, expected_weights_extend, rtol=0.01, atol=0)