def test_run_1(self): # should have the same output as test_run_2, but _do_multiprocessing() is a MagicMock in_a = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1]) in_b = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1]) in_c = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1]) in_series = [in_a, in_b, in_c] expected = DataFrame( { 0: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}), 1: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}), 2: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}), u"all": Series({1: 15, 2: 6, 3: 6, 4: 3, 5: 3}), } ) exp = FrequencyExperimenter(in_series) exp._do_multiprocessing = mock.MagicMock() exp._do_multiprocessing.return_value = [ (0, Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1})), (1, Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1})), (2, Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1})), ] actual = exp.run() exp._do_multiprocessing.assert_called_once_with(experimenter_func, [[(0, in_a)], [(1, in_b)], [(2, in_c)]]) self.assertEqual(len(expected.columns), len(actual.columns)) for i in expected.columns: self.assertSequenceEqual(list(expected.loc[:, i].index), list(actual.loc[:, i].index)) self.assertSequenceEqual(list(expected.loc[:, i].values), list(actual.loc[:, i].values))
def test_run_2(self): """two DataFrame, only count some of the results in each""" in_df = [ pandas.DataFrame({ 'a': self.in_a, 'b': self.in_b }), pandas.DataFrame({ 'a': self.in_a, 'b': self.in_b }) ] actual = FrequencyExperimenter(in_df, {'column': 'a'}).run() self.assertEqual(2, len(actual)) for each_df in actual: each_df = each_df['frequency.FrequencyExperimenter'] self.assertEqual(1, len(each_df.columns)) if six.PY2: self.assertItemsEqual(list(self.freq_a.index), list(each_df['a'].index)) else: self.assertCountEqual(list(self.freq_a.index), list(each_df['a'].index)) for each in self.freq_a.index: self.assertEqual(self.freq_a[each], each_df['a'][each])
def test_run_3(self): """two DataFrame, 'column' breaks a MultiIndex""" in_df = [ pandas.DataFrame([self.in_a, self.in_b, self.in_c, self.in_d], index=[['one', 'one', 'two', 'two'], ['a', 'b', 'c', 'd']]).T, pandas.DataFrame([self.in_a, self.in_b, self.in_c, self.in_d], index=[['two', 'two', 'one', 'one'], ['a', 'b', 'c', 'd']]).T ] actual = FrequencyExperimenter(in_df, {'column': 'one'}).run() self.assertEqual(2, len(actual)) left_df = actual[0]['frequency.FrequencyExperimenter'] right_df = actual[1]['frequency.FrequencyExperimenter'] self.assertEqual(2, len(left_df.columns)) self.assertEqual(2, len(right_df.columns)) # left, column 'a' if six.PY2: self.assertItemsEqual(list(self.freq_a.index), list(left_df['a'].index)) else: self.assertCountEqual(list(self.freq_a.index), list(left_df['a'].index)) for each in self.freq_a.index: self.assertEqual(self.freq_a[each], left_df['a'][each]) # left, column 'b' if six.PY2: self.assertItemsEqual(list(self.freq_b.index), list(left_df['b'].index)) else: self.assertCountEqual(list(self.freq_a.index), list(left_df['b'].index)) for each in self.freq_a.index: self.assertEqual(self.freq_b[each], left_df['b'][each]) # rightt, column 'c' if six.PY2: self.assertItemsEqual(list(self.freq_c.index), list(right_df['c'].index)) else: self.assertCountEqual(list(self.freq_a.index), list(right_df['c'].index)) for each in self.freq_a.index: self.assertEqual(self.freq_c[each], right_df['c'][each]) # right, column 'd' (the only column with NaN) if six.PY2: self.assertItemsEqual(list(self.freq_d.index), list(right_df['d'].index)) else: self.assertCountEqual(list(self.freq_a.index), list(right_df['d'].index)) for each in self.freq_a.index: if numpy.isnan(self.freq_d[each]): # pylint: disable=no-member self.assertTrue(numpy.isnan(right_df['d'][each])) # pylint: disable=no-member else: self.assertEqual(self.freq_d[each], right_df['d'][each])
def test_run_2(self): # should have the same output as test_run_1, but without the MagicMock in_a = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1]) in_b = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1]) in_c = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1]) in_series = [in_a, in_b, in_c] expected = DataFrame( { 0: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}), 1: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}), 2: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}), u"all": Series({1: 15, 2: 6, 3: 6, 4: 3, 5: 3}), } ) exp = FrequencyExperimenter(in_series) actual = exp.run() self.assertEqual(len(expected.columns), len(actual.columns)) for i in expected.columns: self.assertSequenceEqual(list(expected.loc[:, i].index), list(actual.loc[:, i].index)) self.assertSequenceEqual(list(expected.loc[:, i].values), list(actual.loc[:, i].values))
def test_run_4(self): # same as test_run_3, but input is a dict in_a = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1]) in_b = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1, 4, 4, 3, 5, 1, 1, 1]) in_c = Series([1, 2, 1, 1, 3, 1, 3, 2, 1]) in_series = {u"hello": in_a, u"zello": in_b, u"jello": in_c} expected = DataFrame( { u"hello": Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}), u"zello": Series({1: 8, 2: 2, 3: 3, 4: 3, 5: 2}), u"jello": Series({1: 5, 2: 2, 3: 2}), u"all": Series({1: 18, 2: 6, 3: 7, 4: 4, 5: 3}), } ) exp = FrequencyExperimenter(in_series) actual = exp.run() # because numpy's NaN != NaN actual = actual.fillna(value=4000) expected = expected.fillna(value=4000) self.assertEqual(len(expected.columns), len(actual.columns)) for i in expected.columns: self.assertSequenceEqual(list(expected.loc[:, i].index), list(actual.loc[:, i].index)) self.assertSequenceEqual(list(expected.loc[:, i].values), list(actual.loc[:, i].values))
def test_run_3(self): # more complicated arithmetic in_a = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1]) in_b = Series([1, 2, 1, 1, 3, 1, 4, 5, 3, 2, 1, 4, 4, 3, 5, 1, 1, 1]) in_c = Series([1, 2, 1, 1, 3, 1, 3, 2, 1]) in_series = [in_a, in_b, in_c] expected = DataFrame( { 0: Series({1: 5, 2: 2, 3: 2, 4: 1, 5: 1}), 1: Series({1: 8, 2: 2, 3: 3, 4: 3, 5: 2}), 2: Series({1: 5, 2: 2, 3: 2}), u"all": Series({1: 18, 2: 6, 3: 7, 4: 4, 5: 3}), } ) exp = FrequencyExperimenter(in_series) actual = exp.run() # because numpy's NaN != NaN actual = actual.fillna(value=4000) expected = expected.fillna(value=4000) self.assertEqual(len(expected.columns), len(actual.columns)) for i in expected.columns: self.assertSequenceEqual(list(expected.loc[:, i].index), list(actual.loc[:, i].index)) self.assertSequenceEqual(list(expected.loc[:, i].values), list(actual.loc[:, i].values))
def test_run_1(self): """single DataFrame, no 'column' setting""" in_df = pandas.DataFrame({'a': self.in_a, 'b': self.in_b}) actual = FrequencyExperimenter(in_df).run() self.assertEqual(1, len(actual)) actual = actual[0]['frequency.FrequencyExperimenter'] self.assertEqual(2, len(actual.columns)) self.assertSequenceEqual(list(self.freq_a.index), list(actual['a'].index)) self.assertSequenceEqual(list(self.freq_a.values), list(actual['a'].values)) self.assertSequenceEqual(list(self.freq_b.index), list(actual['b'].index)) self.assertSequenceEqual(list(self.freq_b.values), list(actual['b'].values))