def test__get_missing_valid_rows_excess_rows(self): """If more rows than required are passed, the result is cut to num_rows.""" # Setup data_navigator = MagicMock(spec=DataNavigator) modeler = MagicMock(spec=Modeler) sampler = Sampler(data_navigator, modeler) synthesized = pd.DataFrame(columns=list('AB'), index=range(3, 7)) drop_indices = pd.Series(False, index=range(3, 7)) valid_rows = pd.DataFrame(columns=list('AB'), index=range(2)) num_rows = 5 # Run result = sampler._get_missing_valid_rows(synthesized, drop_indices, valid_rows, num_rows) missing_rows, valid_rows = result # Check assert missing_rows == 0 assert valid_rows.equals( pd.DataFrame(columns=list('AB'), index=range(5))) data_navigator.assert_not_called() assert data_navigator.method_calls == [] modeler.assert_not_called() assert modeler.method_calls == []
def fit(self, metadata, tables=None, root_path=None): """Fit this SDV instance to the dataset data. Args: metadata (dict, str or Metadata): Metadata dict, path to the metadata JSON file or Metadata instance itself. tables (dict): Dictionary with the table names as key and ``pandas.DataFrame`` instances as values. If ``None`` is given, the tables will be loaded from the paths indicated in ``metadata``. Defaults to ``None``. root_path (str or None): Path to the dataset directory. If ``None`` and metadata is a path, the metadata location is used. If ``None`` and metadata is a dict, the current working directory is used. """ if isinstance(metadata, Metadata): self.metadata = metadata else: self.metadata = Metadata(metadata, root_path) self.metadata.validate(tables) self.modeler = Modeler(self.metadata, self.model, self.model_kwargs) self.modeler.model_database(tables) self.sampler = Sampler(self.metadata, self.modeler.models, self.model, self.model_kwargs)
def test__unflatten_dict_child_name(self): """unflatten_dict will respect the name of child tables.""" # Setup data_navigator = MagicMock() data_navigator.get_children.return_value = ['CHILD_TABLE'] modeler = MagicMock() sampler = Sampler(data_navigator, modeler) flat = { 'first_key__a': 1, 'first_key____CHILD_TABLE__model_param': 0, 'distribs____CHILD_TABLE__distribs__UNIT_PRICE__std__mean': 0 } table_name = 'TABLE_NAME' expected_result = { 'first_key': { 'a': 1, '__CHILD_TABLE': { 'model_param': 0 } }, 'distribs': { '__CHILD_TABLE__distribs__UNIT_PRICE__std': { 'mean': 0 } } } # Run result = sampler._unflatten_dict(flat, table_name) # Check assert result == expected_result modeler.assert_not_called() data_navigator.get_children.assert_called_once_with('TABLE_NAME')
def test_sample_all(self, rows_mock, child_mock, reset_mock, concat_mock): """Check sample_all and returns some value.""" # Setup data_navigator = MagicMock() data_navigator.tables = ['TABLE_A', 'TABLE_B'] data_navigator.get_parents.side_effect = lambda x: x != 'TABLE_A' modeler = MagicMock() sampler = Sampler(data_navigator, modeler) def fake_dataframe(name, number): return pd.DataFrame([{name: 0} for i in range(number)], index=[0]*number) rows_mock.side_effect = fake_dataframe concat_mock.return_value = 'concatenated_dataframe' expected_get_parents_call_list = [(('TABLE_A',), {}), (('TABLE_B',), {})] expected_rows_mock_call_list = [(('TABLE_A', 1), {}) for i in range(5)] # Run result = sampler.sample_all(num_rows=5) # Check assert data_navigator.get_parents.call_args_list == expected_get_parents_call_list assert result == reset_mock.return_value assert rows_mock.call_args_list == expected_rows_mock_call_list assert child_mock.call_count == 5 reset_mock.assert_called_once_with({'TABLE_A': 'concatenated_dataframe'})
def test__sample_model(self, qualified_mock): """_sample_model sample the number of rows from the given model.""" # Setup data_navigator = MagicMock(spec=DataNavigator) modeler = MagicMock(spec=Modeler) sampler = Sampler(data_navigator, modeler) model = MagicMock() values = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]) qualified_mock.return_value = 'package.module.full_qualified_name' model.sample.return_value = values num_rows = 3 columns = list('ABC') expected_result = pd.DataFrame(values, columns=columns) # Run result = sampler._sample_model(model, num_rows, columns) # Check assert result.equals(expected_result) qualified_mock.assert_called_once_with(model) model.sample.assert_called_once_with(3)
def test__unflatten_dict_respect_covariance_matrix(self): """unflatten_dict restructures the covariance matrix into an square matrix.""" # Setup data_navigator = MagicMock() modeler = MagicMock() sampler = Sampler(data_navigator, modeler) def fake_values(i, j): return '{}, {}'.format(i, j) expected_result = { 'covariance': np.array([[fake_values(i, j) for j in range(40)] for i in range(40)]).tolist() } flat = { 'covariance__{}__{}'.format(i, j): fake_values(i, j) for i in range(40) for j in range(40) } # Run result = sampler._unflatten_dict(flat) # Check assert result == expected_result
def test_sample_table(self, rows_mock): """ """ # Setup data_navigator = MagicMock(spec=DataNavigator) data_navigator.tables = { 'table': MagicMock(**{'data.shape': ('rows', 'columns')}) } modeler = MagicMock(spec=Modeler) sampler = Sampler(data_navigator=data_navigator, modeler=modeler) rows_mock.return_value = {'table': 'samples'} table_name = 'table' reset_primary_keys = False expected_result = 'samples' # Run result = sampler.sample_table(table_name, reset_primary_keys=reset_primary_keys) # Check assert result == expected_result rows_mock.assert_called_once_with(sampler, 'table', 'rows', sample_children=False, reset_primary_keys=False)
def test__unflatten_dict(self): """unflatten_dict restructure flatten dicts.""" # Setup data_navigator = MagicMock() modeler = MagicMock() sampler = Sampler(data_navigator, modeler) flat = { 'a__first_key__a': 1, 'a__first_key__b': 2, 'b__second_key__x': 0 } expected_result = { 'a': { 'first_key': { 'a': 1, 'b': 2 }, }, 'b': { 'second_key': { 'x': 0 }, } } # Run result = sampler._unflatten_dict(flat) # Check assert result == expected_result data_navigator.assert_not_called() modeler.assert_not_called()
def test__unflatten_dict_child_name(self): """unflatten_dict will respect the name of child tables.""" # Setup data_navigator = MagicMock() modeler = MagicMock() sampler = Sampler(data_navigator, modeler) flat = { 'first_key__a__b': 1, 'first_key____CHILD_TABLE__model_param': 0, 'distribs____CHILD_TABLE__distribs__UNIT_PRICE__std__mean': 0 } expected_result = { 'first_key': { 'a': { 'b': 1 }, '__CHILD_TABLE': { 'model_param': 0 } }, 'distribs': { '__CHILD_TABLE__distribs__UNIT_PRICE__std': { 'mean': 0 } } } # Run result = sampler._unflatten_dict(flat) # Check assert result == expected_result modeler.assert_not_called() data_navigator.assert_not_called()
def test__sample_model_vine(self, qualified_mock): """_sample_model sample the number of rows from the given model.""" # Setup data_navigator = MagicMock(spec=DataNavigator) modeler = MagicMock(spec=Modeler) sampler = Sampler(data_navigator, modeler) model = MagicMock() values = [ np.array([1, 1, 1]), np.array([2, 2, 2]), np.array([3, 3, 3]) ] qualified_mock.return_value = 'copulas.multivariate.vine.VineCopula' model.sample.side_effect = values num_rows = 3 columns = list('ABC') expected_result = pd.DataFrame(values, columns=columns) # Run result = sampler._sample_model(model, num_rows, columns) # Check assert result.equals(expected_result) qualified_mock.assert_called_once_with(model) assert model.sample.call_args_list == [((3, ), ), ((3, ), ), ((3, ), )]
def test_sample_all(self, rows_mock): """Check sample_all and returns some value.""" # Setup data_navigator = MagicMock() data_navigator.tables = ['TABLE_A', 'TABLE_B'] data_navigator.get_parents.side_effect = lambda x: x != 'TABLE_A' modeler = MagicMock() sampler = Sampler(data_navigator, modeler) def fake_dataframe(*args, **kwargs): kwargs['sampled_data'][args[1]] = 'sampled_data' rows_mock.side_effect = fake_dataframe expected_get_parents_call_list = [(('TABLE_A', ), {}), (('TABLE_B', ), {})] expected_result = {'TABLE_A': 'sampled_data'} # Run result = sampler.sample_all(num_rows=5) # Check assert result == expected_result assert data_navigator.get_parents.call_args_list == expected_get_parents_call_list rows_mock.assert_called_once_with( sampler, 'TABLE_A', 5, sampled_data={'TABLE_A': 'sampled_data'})
def test__get_missing_valid_rows(self): """get_missing_valid_rows return an a dataframe and an integer. The dataframe contains valid_rows concatenated to synthesized and their index reset. The integer is the diference between num_rows and the returned dataframe rows. """ # Setup data_navigator = MagicMock(spec=DataNavigator) modeler = MagicMock(spec=Modeler) sampler = Sampler(data_navigator, modeler) synthesized = pd.DataFrame(columns=list('AB'), index=range(3, 5)) drop_indices = pd.Series(False, index=range(3, 5)) valid_rows = pd.DataFrame(columns=list('AB'), index=range(2)) num_rows = 5 # Run result = sampler._get_missing_valid_rows(synthesized, drop_indices, valid_rows, num_rows) missing_rows, valid_rows = result # Check assert missing_rows == 1 assert valid_rows.equals( pd.DataFrame(columns=list('AB'), index=[0, 1, 2, 3])) data_navigator.assert_not_called() assert data_navigator.method_calls == [] modeler.assert_not_called() assert modeler.method_calls == []
def test_sample_rows_parent_table(self, primary_mock, parent_mock, sample_mock, update_mock, trans_mock): """sample_rows samples using modeler.models if the table hasn't parents.""" # Setup data_navigator = MagicMock(spec=DataNavigator) modeler = MagicMock(spec=Modeler) modeler.models = {'parent_table': 'model for parent table'} sampler = Sampler(data_navigator=data_navigator, modeler=modeler) primary_mock.return_value = ('primary_key', pd.Series(range(5))) parent_mock.return_value = None sample_mock.return_value = pd.DataFrame() update_mock.return_value = {'table_name': 'samples'} trans_mock.return_value = 'transformed rows' expected_result = {'parent_table': 'transformed rows'} # Run result = sampler.sample_rows('parent_table', 5) # Check assert result == expected_result assert sampler.sampled == {'table_name': 'samples'} primary_mock.assert_called_once_with(sampler, 'parent_table', 5) parent_mock.assert_called_once_with(sampler, 'parent_table') sample_mock.assert_called_once_with(sampler, 'model for parent table', 5, 'parent_table') expected_sample_info = ('primary_key', sample_mock.return_value) update_mock.assert_called_once_with({}, 'parent_table', expected_sample_info) trans_mock.assert_called_once_with(sampler, sample_mock.return_value, 'parent_table')
def test__get_model(self): """Test get model""" # Setup sampler = Mock(spec=Sampler) sampler._unflatten_dict.return_value = {'unflatten': 'dict'} sampler._unflatten_gaussian_copula.return_value = { 'unflatten': 'gaussian' } table_model = Mock() table_model.to_dict.return_value = { 'distribution': 'copulas.multivariate.gaussian.GaussianMultivariate' } # Run extension = {'extension': 'dict'} Sampler._get_model(sampler, extension, table_model) # Asserts sampler._unflatten_dict.assert_called_once_with({'extension': 'dict'}) expected_unflatten_gaussian_call = { 'unflatten': 'dict', 'fitted': True, 'distribution': 'copulas.multivariate.gaussian.GaussianMultivariate' } sampler._unflatten_gaussian_copula.assert_called_once_with( expected_unflatten_gaussian_call) table_model.from_dict.assert_called_once_with( {'unflatten': 'gaussian'})
def test_model_database_vine_modeler_single_table(self): """model_database works fine with vine modeler.""" # Setup data_navigator = MagicMock(spec=DataNavigator) modeler = Modeler(data_navigator=data_navigator, model=VineCopula) # Setup - Mock data = pd.DataFrame({ 'column_A': list('abdc'), 'column_B': range(4) }) meta = { 'name': 'table_name', 'fields': { 'column_A': { 'name': 'A', 'type': 'categorical' }, 'column_B': { 'name': 'B', 'type': 'number', 'subtype': 'integer' } } } data_navigator.tables = { 'table_name': Table(data, meta) } data_navigator.get_parents.return_value = set() data_navigator.get_children.return_value = set() data_navigator.transformed_data = { 'table_name': pd.DataFrame({ 'column_A': [0.1, 0.2, 0.5, 1.0], 'column_B': range(4) }) } data_navigator.meta = { 'tables': [ { 'name': meta } ] } data_navigator.ht = MagicMock() data_navigator.ht.transformers = { ('table_name', 'column_A'): None, ('table_name', 'column_B'): None } # Run modeler.model_database() # Check assert 'table_name' in modeler.models sampler = Sampler(data_navigator, modeler) samples = sampler.sample_all() assert 'table_name' in samples
def test__sample_children(self): """Test sample children""" # Setup sampler = Mock(spec=Sampler) sampler.metadata.get_children.return_value = [ 'child A', 'child B', 'child C' ] # Run sampled = {'test': pd.DataFrame({'field': [11, 22, 33]})} Sampler._sample_children(sampler, 'test', sampled) # Asserts sampler.metadata.get_children.assert_called_once_with('test') expected_calls = [ [ 'child A', 'test', pd.Series([11], index=['field'], name=0), sampled ], [ 'child A', 'test', pd.Series([22], index=['field'], name=1), sampled ], [ 'child A', 'test', pd.Series([33], index=['field'], name=2), sampled ], [ 'child B', 'test', pd.Series([11], index=['field'], name=0), sampled ], [ 'child B', 'test', pd.Series([22], index=['field'], name=1), sampled ], [ 'child B', 'test', pd.Series([33], index=['field'], name=2), sampled ], [ 'child C', 'test', pd.Series([11], index=['field'], name=0), sampled ], [ 'child C', 'test', pd.Series([22], index=['field'], name=1), sampled ], [ 'child C', 'test', pd.Series([33], index=['field'], name=2), sampled ], ] actual_calls = sampler._sample_table.call_args_list for result_call, expected_call in zip(actual_calls, expected_calls): assert result_call[0][0] == expected_call[0] assert result_call[0][1] == expected_call[1] assert result_call[0][3] == expected_call[3] pd.testing.assert_series_equal(result_call[0][2], expected_call[2])
def test__unflatten_gaussian_copula_negative_std(self): """_unflatten_gaussian_copula will transform negative or 0 std into positive.""" # Setup data_navigator = MagicMock() modeler = MagicMock() modeler.model_kwargs = { 'distribution': 'distribution_name' } sampler = Sampler(data_navigator, modeler) model_parameters = { 'some': 'key', 'covariance': [ [1], [0, 1] ], 'distribs': { 0: { 'first': 'distribution', 'std': 0 }, 1: { 'second': 'distribution', 'std': -1 } } } expected_result = { 'some': 'key', 'distribution': 'distribution_name', 'covariance': [ [1, 0], [0, 1] ], 'distribs': { 0: { 'type': 'distribution_name', 'fitted': True, 'first': 'distribution', 'std': 1 }, 1: { 'type': 'distribution_name', 'fitted': True, 'second': 'distribution', 'std': np.exp(-1) } } } # Run result = sampler._unflatten_gaussian_copula(model_parameters) # Check assert result == expected_result data_navigator.assert_not_called() modeler.assert_not_called()
def test__unflatten_gaussian_copula(self): """_unflatten_gaussian_copula add the distribution, type and fitted kwargs.""" # Setup data_navigator = MagicMock() modeler = MagicMock() modeler.model_kwargs = { 'distribution': 'distribution_name' } sampler = Sampler(data_navigator, modeler) model_parameters = { 'some': 'key', 'covariance': [ [1], [0, 1] ], 'distribs': { 0: { 'first': 'distribution', 'std': 0 }, 1: { 'second': 'distribution', 'std': 0 } } } expected_result = { 'some': 'key', 'distribution': 'distribution_name', 'covariance': [ [1, 0], [0, 1] ], 'distribs': { 0: { 'type': 'distribution_name', 'fitted': True, 'first': 'distribution', 'std': 1 }, 1: { 'type': 'distribution_name', 'fitted': True, 'second': 'distribution', 'std': 1 } } } # Run result = sampler._unflatten_gaussian_copula(model_parameters) # Check assert result == expected_result data_navigator.assert_not_called() modeler.assert_not_called()
def fit(self): """Transform the data and model the database.""" data_loader = CSVDataLoader(self.meta_file_name) self.dn = data_loader.load_data() # transform data self.dn.transform_data() self.modeler = Modeler(self.dn) self.modeler.model_database() self.sampler = Sampler(self.dn, self.modeler)
def test_sample_no_sample_children(self): """Test sample no sample children""" # Setup sampler = Mock(spec=Sampler) sampler.models = {'test': 'model'} sampler.metadata.get_parents.return_value = None # Run Sampler.sample(sampler, 'test', 5, sample_children=False)
def test__unflatten_dict_raises_error_column_index(self): """Test unflatten dict raises error column_index""" # Setup sampler = Mock(spec=Sampler) flat = {'foo__1__0': 'some value'} # Run with pytest.raises(ValueError): Sampler._unflatten_dict(sampler, flat)
def test_sample_no_sample_children(self): """Test sample no sample children""" # Setup sampler = Mock(spec=Sampler) sampler.models = {'test': 'model'} sampler.metadata.get_parents.return_value = None # Run Sampler.sample(sampler, 'test', 5, sample_children=False) sampler._transform_synthesized_rows.assert_called_once_with( sampler._sample_rows.return_value, 'test')
def test__sample_with_previous(self): """Check _sample with previous""" # Setup get_extension_mock = Mock() get_extension_mock.return_value = {'child_rows': 0.999} get_model_mock = Mock() get_model_mock.return_value = None sample_valid_rows_mock = Mock() sample_valid_rows_mock.return_value = pd.DataFrame({'foo': [0, 1]}) sample_children_mock = Mock() dn_mock = Mock() dn_mock.foreign_keys = { ('DEMO', 'p_name'): ('parent_id', 'foreign_key') } # Run sampler_mock = Mock() sampler_mock._get_extension = get_extension_mock sampler_mock._get_model = get_model_mock sampler_mock._sample_valid_rows = sample_valid_rows_mock sampler_mock._sample_children = sample_children_mock sampler_mock.dn = dn_mock table_name = 'DEMO' parent_name = 'p_name' parent_row = {'parent_id': 'foo'} sampled = {'DEMO': pd.DataFrame({'bar': [1, 2]})} Sampler._sample(sampler_mock, table_name, parent_name, parent_row, sampled) # Asserts exp_dataframe_sampled = pd.DataFrame({ 'bar': [1, 2, np.NaN, np.NaN], 'foo': [np.NaN, np.NaN, 0, 1], 'foreign_key': [np.NaN, np.NaN, 'foo', 'foo'] }) args_sample_children, kwargs_sample_children = sample_children_mock.call_args exp_arg_table_name, exp_arg_sampled = args_sample_children get_extension_mock.assert_called_once_with({'parent_id': 'foo'}, 'DEMO', 'p_name') get_model_mock.assert_called_once_with({'child_rows': 0.999}) sample_valid_rows_mock.assert_called_once_with(None, 1, 'DEMO') assert exp_arg_table_name == 'DEMO' pd.testing.assert_frame_equal(exp_arg_sampled['DEMO'], exp_dataframe_sampled)
def test__reset_primary_keys_generators(self): """Test reset values""" # Run sampler = Mock() sampler.primary_key = 'something' sampler.remaining_primary_key = 'else' Sampler._reset_primary_keys_generators(sampler) # Asserts assert sampler.primary_key == dict() assert sampler.remaining_primary_key == dict()
def test_sample_rows_children_table(self, primary_mock, parent_mock, model_mock, extension_mock, sample_mock, update_mock, trans_mock): """sample_rows samples using extensions when the table has parents.""" # Setup data_navigator = MagicMock(spec=DataNavigator) data_navigator.foreign_keys = { ('child_table', 'parent_name'): ('parent_pk', 'child_fk') } modeler = MagicMock(spec=Modeler) sampler = Sampler(data_navigator=data_navigator, modeler=modeler) primary_mock.return_value = ('primary_key', pd.Series(range(5))) parent_mock.return_value = ('parent_name', 'foreign_key', pd.DataFrame({'foreign_key': [0, 1, 2]})) extension_mock.return_value = 'extension' model_mock.return_value = 'model from extension' sample_mock.return_value = pd.DataFrame() update_mock.return_value = {'table_name': 'samples'} trans_mock.return_value = 'transformed_rows' expected_result = {'child_table': 'transformed_rows'} # Run result = sampler.sample_rows('child_table', 5) # Check assert result == expected_result assert sampler.sampled == {'table_name': 'samples'} primary_mock.assert_called_once_with(sampler, 'child_table', 5) parent_mock.assert_called_once_with(sampler, 'child_table') sample_mock.assert_called_once_with(sampler, 'model from extension', 5, 'child_table') expected_sample_info = ('primary_key', sample_mock.return_value) update_mock.assert_called_once_with({}, 'child_table', expected_sample_info) trans_mock.assert_called_once_with(sampler, sample_mock.return_value, 'child_table') call_args_list = extension_mock.call_args_list assert len(call_args_list) == 1 args, kwargs = call_args_list[0] assert kwargs == {} assert len(args) == 4 assert args[0] == sampler assert args[1].equals(pd.DataFrame({'foreign_key': [0]})) assert args[2] == 'child_table' assert args[3] == 'parent_name' model_mock.assert_called_once_with(sampler, 'extension')
def test__fill_text_columns(self): """Fill columns""" # Setup data_navigator_mock = Mock() data_navigator_mock.tables = { 'DEMO': Table( pd.DataFrame(), { 'fields': { 'a_field': { 'name': 'a_field', 'type': 'id', 'ref': { 'table': 'table_ref', 'field': 'table_ref_id' } }, 'b_field': { 'name': 'b_field', 'type': 'id', 'regex': '^[0-9]{10}$' }, 'c_field': { 'name': 'c_field', 'type': 'text', 'regex': '^[a-z]{10}$' } } }) } sample_rows_mock = Mock() sample_rows_mock.return_value = { 'table_ref_id': { 'name': 'table_ref_id' } } # Run sampler_mock = Mock() sampler_mock.dn = data_navigator_mock sampler_mock.sample_rows = sample_rows_mock row = pd.DataFrame({'c_field': ['foo', 'bar', 'tar']}) labels = ['a_field', 'b_field', 'c_field'] table_name = 'DEMO' Sampler._fill_text_columns(sampler_mock, row, labels, table_name) # Asserts sample_rows_mock.assert_called_once_with('table_ref', 1)
def test__find_parent_id_all_singlar_matrix(self, choice_mock): """If all likelihoods got singular matrix, use num_rows.""" likelihoods = pd.Series([None, None, None, None]) num_rows = pd.Series([1, 2, 3, 4]) Sampler._find_parent_id(likelihoods, num_rows) expected_weights = np.array([1 / 10, 2 / 10, 3 / 10, 4 / 10]) assert choice_mock.call_count == 1 assert list(choice_mock.call_args[0][0]) == list(likelihoods.index) np.testing.assert_array_equal(choice_mock.call_args[1]['p'], expected_weights)
def test__find_parent_id_all_0_or_singlar_matrix(self, choice_mock): """If likehoods are either 0 or NaN, fill the gaps with num_rows.""" likelihoods = pd.Series([0, None, 0, None]) num_rows = pd.Series([1, 2, 3, 4]) Sampler._find_parent_id(likelihoods, num_rows) expected_weights = np.array([0, 2 / 6, 0, 4 / 6]) assert choice_mock.call_count == 1 assert list(choice_mock.call_args[0][0]) == list(likelihoods.index) np.testing.assert_array_equal(choice_mock.call_args[1]['p'], expected_weights)
def test__find_parent_id_some_good(self, choice_mock): """If some likehoods are good, fill the gaps with num_rows.""" likelihoods = pd.Series([0.5, None, 1.5, None]) num_rows = pd.Series([1, 2, 3, 4]) Sampler._find_parent_id(likelihoods, num_rows) expected_weights = np.array([0.5 / 4, 1 / 4, 1.5 / 4, 1 / 4]) assert choice_mock.call_count == 1 assert list(choice_mock.call_args[0][0]) == list(likelihoods.index) np.testing.assert_array_equal(choice_mock.call_args[1]['p'], expected_weights)
def test__find_parent_id_all_good(self, choice_mock): """If all are good, use the likelihoods unmodified.""" likelihoods = pd.Series([0.5, 1, 1.5, 2]) num_rows = pd.Series([1, 2, 3, 4]) Sampler._find_parent_id(likelihoods, num_rows) expected_weights = np.array([0.5 / 5, 1 / 5, 1.5 / 5, 2 / 5]) assert choice_mock.call_count == 1 assert list(choice_mock.call_args[0][0]) == list(likelihoods.index) np.testing.assert_array_equal(choice_mock.call_args[1]['p'], expected_weights)