def test_write_to_file(self): persons = MagicMock() households = MagicMock() population = Population(persons, households) population.write(persons_outfile='persons_file', households_outfile='households_file') persons.to_csv.assert_called_once_with('persons_file') households.to_csv.assert_called_once_with('households_file')
def test_read_from_file(self): read_csv = MagicMock(return_value=pandas.DataFrame()) with patch('pandas.read_csv', read_csv): population = Population.from_csvs('persons_file', 'households_file') assert type(population) == Population read_csv.assert_any_call('households_file') read_csv.assert_any_call('persons_file')
def test_generate_households_simple(self): household_model = self._mock_model([inputs.NUM_PEOPLE.name], generated=[('6+', ), ('6+', )]) allocations = self._mock_allocated() population = Population.generate(allocations, MagicMock(), household_model) evidence = ((inputs.NUM_PEOPLE.name, '6+'), ) household_model.generate.assert_called_with('one_bucket', evidence, count=2) self.assertIn(inputs.NUM_PEOPLE.name, population.generated_households) self._check_household_output(population.generated_households)
def test_generate_persons_simple(self): person_model = self._mock_model( [inputs.AGE.name, inputs.SEX.name], # Returns two people regardles of count passed in generated=[('35-64', 'F'), ('35-64', 'F')]) allocations = self._mock_allocated() population = Population.generate(allocations, person_model, MagicMock()) evidence = ((inputs.AGE.name, '35-64'), (inputs.SEX.name, 'M')) person_model.generate.assert_called_with('one_bucket', evidence, count=2) self._check_person_output(population.generated_people)
def generate_synthetic_people_and_households(state_id, puma_id, output_dir, allocator, person_model, household_model): '''Replace the PUMS Persons with Synthetic Persons created from the Bayesian Network. Writes out a combined person-household dataframe. Args: state_id: 2-digit state fips code puma_id: 5-digit puma code allocator: PUMS households as best as possible based on marginal census (currently tract) data using a cvx-solver. person_model: bayesian model describing the discritized pums fields' relation to one another household_model: same as person_model but for households ''' population = Population.generate( household_allocator=allocator, person_model=person_model, household_model=household_model ) population.write( os.path.join(output_dir, FILE_PATTERN.format(state_id, puma_id, 'people.csv')), os.path.join(output_dir, FILE_PATTERN.format(state_id, puma_id, 'households.csv')) )