Пример #1
0
    def _allocate_households(households, persons, tract_controls):

        # Only take nonzero weights
        households = households[households[inputs.HOUSEHOLD_WEIGHT.name] > 0]

        # Initial weights from PUMS
        w = households[inputs.HOUSEHOLD_WEIGHT.name].as_matrix().T

        allocation_inputs = [inputs.NUM_PEOPLE, inputs.NUM_VEHICLES]  # Hard-coded for now
        # Prepend column name to bin name to prevent bin collision
        hh_columns = []
        for a_input in allocation_inputs:
            subset_values = households[a_input.name].unique().tolist()
            hh_columns += HouseholdAllocator._str_broadcast(a_input.name, subset_values)

        hh_columns = HouseholdAllocator._filter_sparse_columns(households, hh_columns)

        hh_table = households[hh_columns].as_matrix()

        A = tract_controls.data[hh_columns].as_matrix()
        n_tracts, n_controls = A.shape
        n_samples = len(households.index.values)

        # Control importance weights
        # < 1 means not important (thus relaxing the constraint in the solver)
        mu = np.mat([1] * n_controls)

        w_extend = np.tile(w, (n_tracts, 1))
        mu_extend = np.mat(np.tile(mu, (n_tracts, 1)))
        B = np.mat(np.dot(np.ones((1, n_tracts)), A)[0])

        # Our trade-off coefficient gamma
        # Low values (~1) mean we trust our initial weights, high values
        # (~10000) mean want to fit the marginals.
        gamma = 100.

        # Meta-balancing coefficient
        meta_gamma = 100.

        hh_weights = balance_multi_cvx(
            hh_table, A, B, w_extend, gamma * mu_extend.T, meta_gamma
        )

        # We're running discretization independently for each tract
        tract_ids = tract_controls.data['TRACTCE'].values
        total_weights = np.zeros(hh_weights.shape)
        sample_weights_int = hh_weights.astype(int)
        discretized_hh_weights = discretize_multi_weights(hh_table, hh_weights)
        total_weights = sample_weights_int + discretized_hh_weights

        # Extend households and add the weights and ids
        households_extend = pandas.concat([households] * n_tracts)
        households_extend[inputs.COUNT.name] = total_weights.flatten().T
        tracts = np.repeat(tract_ids, n_samples)
        households_extend[inputs.TRACT.name] = tracts

        return households_extend, persons
Пример #2
0
    def _allocate_households(households, persons, tract_controls):
        # Only take nonzero weights
        households = households[households[inputs.HOUSEHOLD_WEIGHT.name] > 0]

        # Initial weights from PUMS
        w = households[inputs.HOUSEHOLD_WEIGHT.name].as_matrix().T

        hh_columns = ['1', '2', '3', '4+']

        hh_table = households[hh_columns].as_matrix()

        A = tract_controls.data[hh_columns].as_matrix()
        n_tracts, n_controls = A.shape
        n_samples = len(households.index.values)

        # Control importance weights
        # < 1 means not important (thus relaxing the contraint in the solver)
        mu = np.mat([1] * n_controls)

        w_extend = np.tile(w, (n_tracts, 1))
        mu_extend = np.mat(np.tile(mu, (n_tracts, 1)))
        B = np.mat(np.dot(np.ones((1, n_tracts)), A)[0])

        # Our trade-off coefficient gamma
        # Low values (~1) mean we trust our initial weights, high values
        # (~10000) mean want to fit the marginals.
        gamma = 100.

        # Meta-balancing coefficient
        meta_gamma = 100.

        hh_weights, z, q = balance_multi_cvx(hh_table, A, B, w_extend,
                                             gamma * mu_extend.T, meta_gamma)

        # We're running discretization independently for each tract
        tract_ids = tract_controls.data['TRACTCE'].values
        total_weights = np.zeros(hh_weights.shape)
        sample_weights_int = hh_weights.astype(int)
        discretized_hh_weights = discretize_multi_weights(hh_table, hh_weights)
        total_weights = sample_weights_int + discretized_hh_weights

        # Extend households and add the weights and ids
        households_extend = pandas.concat([households] * n_tracts)
        households_extend['count'] = total_weights.flatten().T
        tracts = np.repeat(tract_ids, n_samples)
        households_extend['tract'] = tracts

        return households_extend, persons
Пример #3
0
 def test_discretize_multi_zero_weights(self):
     hh_table, hh_weights, expected_hh_discretized = self._mock_hh_weights_zeroed(
     )
     hh_discretized = listbalancer.discretize_multi_weights(
         hh_table, hh_weights)
     np.testing.assert_array_equal(hh_discretized, expected_hh_discretized)