def test__get_missing_valid_rows_excess_rows(self): """If more rows than required are passed, the result is cut to num_rows.""" # Setup data_navigator = MagicMock(spec=DataNavigator) modeler = MagicMock(spec=Modeler) sampler = Sampler(data_navigator, modeler) synthesized = pd.DataFrame(columns=list('AB'), index=range(3, 7)) drop_indices = pd.Series(False, index=range(3, 7)) valid_rows = pd.DataFrame(columns=list('AB'), index=range(2)) num_rows = 5 # Run result = sampler._get_missing_valid_rows(synthesized, drop_indices, valid_rows, num_rows) missing_rows, valid_rows = result # Check assert missing_rows == 0 assert valid_rows.equals( pd.DataFrame(columns=list('AB'), index=range(5))) data_navigator.assert_not_called() assert data_navigator.method_calls == [] modeler.assert_not_called() assert modeler.method_calls == []
def test__get_missing_valid_rows(self): """get_missing_valid_rows return an a dataframe and an integer. The dataframe contains valid_rows concatenated to synthesized and their index reset. The integer is the diference between num_rows and the returned dataframe rows. """ # Setup data_navigator = MagicMock(spec=DataNavigator) modeler = MagicMock(spec=Modeler) sampler = Sampler(data_navigator, modeler) synthesized = pd.DataFrame(columns=list('AB'), index=range(3, 5)) drop_indices = pd.Series(False, index=range(3, 5)) valid_rows = pd.DataFrame(columns=list('AB'), index=range(2)) num_rows = 5 # Run result = sampler._get_missing_valid_rows(synthesized, drop_indices, valid_rows, num_rows) missing_rows, valid_rows = result # Check assert missing_rows == 1 assert valid_rows.equals( pd.DataFrame(columns=list('AB'), index=[0, 1, 2, 3])) data_navigator.assert_not_called() assert data_navigator.method_calls == [] modeler.assert_not_called() assert modeler.method_calls == []