def test_plot_boxes(self): x = pd.DataFrame( [[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'], [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'], [0.9, 8, 'b'], [1.0, 9, 'a']], columns=['a', 'b', 'c']) y = np.zeros((x.shape[0], ), dtype=np.int) logical = (x.a > 0.5) & (x.c != 'a') y[logical] = 1 logical = (x.a < 0.5) & (x.c != 'b') y[logical] = 1 x['c'] = x['c'].astype('category') box_init = sdutil._make_box(x) boxlim1 = box_init.copy() boxlim1.a = [0.5, 1] boxlim1.c = [ set('b', ), ] * 2 boxlim2 = box_init.copy() boxlim2.a = [0.1, 0.5] boxlim2.c = [ set('a', ), ] * 2 sdutil.plot_boxes(x, [boxlim1, boxlim2], together=True) sdutil.plot_boxes(x, [boxlim1, boxlim2], together=False) plt.draw() plt.close('all')
def test_plot_pairwise_scatter(self): x = pd.DataFrame([[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'], [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'], [0.9, 8, 'b'], [1.0, 9, 'a']], columns=['a', 'b', 'c']) y = np.zeros((x.shape[0],), dtype=np.int) y[(x.a>0.5) & (x.c!='a')] = 1 x['c'] = x['c'].astype('category') box_init = sdutil._make_box(x) boxlim = box_init.copy() boxlim.a = [0.5, 1.0] boxlim.c = [set('b',),]*2 restricted_dims = ['a', 'c'] sdutil.plot_pair_wise_scatter(x, y, boxlim, box_init, restricted_dims) plt.draw() plt.close('all')
def test_plot_boxes(self): x = pd.DataFrame([[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'], [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'], [0.9, 8, 'b'], [1.0, 9, 'a']], columns=['a', 'b', 'c']) y = np.zeros((x.shape[0],), dtype=np.int) logical = (x.a>0.5) & (x.c!='a') y[logical] = 1 logical = (x.a<0.5) & (x.c!='b') y[logical] = 1 x['c'] = x['c'].astype('category') box_init = sdutil._make_box(x) boxlim1 = box_init.copy() boxlim1.a = [0.5, 1] boxlim1.c = [set('b',),]*2 boxlim2 = box_init.copy() boxlim2.a = [0.1, 0.5] boxlim2.c = [set('a',),]*2 sdutil.plot_boxes(x, [boxlim1, boxlim2], together=True) sdutil.plot_boxes(x, [boxlim1, boxlim2], together=False) plt.draw() plt.close('all')
def test_get_sorted_box_lims(self): x = pd.DataFrame([(0, 1, 2), (2, 5, 6), (3, 2, 1)], columns=['a', 'b', 'c']) box_init = sdutil._make_box(x) box_lim = pd.DataFrame([(0, 1, 1), (2, 5, 2)], columns=['a', 'b', 'c']) _, uncs = sdutil._get_sorted_box_lims([box_lim], box_init) self.assertEqual(uncs, ['c', 'a'])
def test_get_sorted_box_lims(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_init = sdutil._make_box(x) box_lim = np.array([(0, 1, 1), (2, 5, 2)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_lims, uncs = sdutil._get_sorted_box_lims([box_lim], box_init) self.assertEqual(uncs, ['c', 'a'])
def test_determine_nr_restricted_dims(self): x = np.random.rand(5, 2) x = pd.DataFrame(x, columns=['a', 'b']) # all dimensions the same box_init = sdutil._make_box(x) n = sdutil._determine_nr_restricted_dims(box_init, box_init) self.assertEqual(n, 0) # dimensions 1 different and dimension 2 the same b = pd.DataFrame([(1, 1), (0, 1)], columns=['a', 'b']) n = sdutil._determine_nr_restricted_dims(b, box_init) self.assertEqual(n, 2)
def test_make_box(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_lims = sdutil._make_box(x) # some test on the box self.assertEqual(np.min(box_lims['a']), 0, 'min a fails') self.assertEqual(np.max(box_lims['a']), 3, 'max a fails') self.assertEqual(np.min(box_lims['b']), 1, 'min b fails') self.assertEqual(np.max(box_lims['b']), 5, 'max c fails') self.assertEqual(np.min(box_lims['c']), 1, 'min c fails') self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
def test_determine_nr_restricted_dims(self): x = np.random.rand(10, ) x = np.asarray(x, dtype=[('a', np.float), ('b', np.float)]) # all dimensions the same box_init = sdutil._make_box(x) n = sdutil._determine_nr_restricted_dims(box_init, box_init) self.assertEqual(n, 0) # dimensions 1 different and dimension 2 the same b = np.array([(1, 1), (0, 1)], dtype=[('a', np.float), ('b', np.float)]) n = sdutil._determine_nr_restricted_dims(b, box_init) self.assertEqual(n, 2)
def test_make_box(self): x = pd.DataFrame([(0, 1, 2), (2, 5, 6), (3, 2, 1)], columns=['a', 'b', 'c']) box_lims = sdutil._make_box(x) # some test on the box self.assertEqual(np.min(box_lims['a']), 0, 'min a fails') self.assertEqual(np.max(box_lims['a']), 3, 'max a fails') self.assertEqual(np.min(box_lims['b']), 1, 'min b fails') self.assertEqual(np.max(box_lims['b']), 5, 'max c fails') self.assertEqual(np.min(box_lims['c']), 1, 'min c fails') self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
def test_determine_nr_restricted_dims(self): x = np.random.rand(5, 2) x = pd.DataFrame(x, columns=['a', 'b']) # all dimensions the same box_init = sdutil._make_box(x) n = sdutil._determine_nr_restricted_dims(box_init, box_init) self.assertEqual(n, 0) # dimensions 1 different and dimension 2 the same b = pd.DataFrame([(1,1), (0,1)], columns=['a', 'b']) n = sdutil._determine_nr_restricted_dims( b, box_init) self.assertEqual(n, 2)
def test_get_sorted_box_lims(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) box_init = sdutil._make_box(x) box_lim = pd.DataFrame([(0,1,1), (2,5,2)], columns=['a', 'b', 'c']) _, uncs = sdutil._get_sorted_box_lims([box_lim], box_init) self.assertEqual(uncs, ['c','a'])
def test_make_box(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) box_lims = sdutil._make_box(x) # some test on the box self.assertEqual(np.min(box_lims['a']), 0, 'min a fails') self.assertEqual(np.max(box_lims['a']), 3, 'max a fails') self.assertEqual(np.min(box_lims['b']), 1, 'min b fails') self.assertEqual(np.max(box_lims['b']), 5, 'max c fails') self.assertEqual(np.min(box_lims['c']), 1, 'min c fails') self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
def test_normalize(self): x = pd.DataFrame([(0, 1, 2), (2, 5, 6), (3, 2, 1)], columns=['a', 'b', 'c']) box_init = sdutil._make_box(x) box_lim = pd.DataFrame([(0, 1, 1), (2, 5, 2)], columns=['a', 'b', 'c']) uncs = box_lim.columns.values.tolist() normalized = sdutil._normalize(box_lim, box_init, uncs) for i, lims in enumerate([(0, 2 / 3), (0, 1), (0, 0.2)]): lower, upper = lims self.assertAlmostEqual(normalized[i, 0], lower, msg='lower unequal for ' + uncs[i]) self.assertAlmostEqual(normalized[i, 1], upper, msg='upper unequal for ' + uncs[i])
def test_determine_nr_restricted_dims(self): x = np.random.rand(10, ) x = np.asarray(x, dtype=[('a', np.float), ('b', np.float)]) # all dimensions the same box_init = sdutil._make_box(x) n = sdutil._determine_nr_restricted_dims(box_init, box_init) self.assertEqual(n, 0) # dimensions 1 different and dimension 2 the same b = np.array([(1,1), (0,1)], dtype=[('a', np.float), ('b', np.float)]) n = sdutil._determine_nr_restricted_dims( b, box_init) self.assertEqual(n, 2)
def test_make_box(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_lims = sdutil._make_box(x) # some test on the box self.assertEqual(np.min(box_lims['a']), 0, 'min a fails') self.assertEqual(np.max(box_lims['a']), 3, 'max a fails') self.assertEqual(np.min(box_lims['b']), 1, 'min b fails') self.assertEqual(np.max(box_lims['b']), 5, 'max c fails') self.assertEqual(np.min(box_lims['c']), 1, 'min c fails') self.assertEqual(np.max(box_lims['c']), 6, 'max c fails')
def test_normalize(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) box_init = sdutil._make_box(x) box_lim = pd.DataFrame([(0,1,1), (2,5,2)], columns=['a', 'b', 'c']) uncs = box_lim.columns.values.tolist() normalized = sdutil._normalize(box_lim, box_init, uncs) for i, lims in enumerate([(0, 2/3),(0, 1),(0,0.2)]): lower, upper = lims self.assertAlmostEqual(normalized[i, 0], lower, msg='lower unequal for '+uncs[i]) self.assertAlmostEqual(normalized[i, 1], upper, msg='upper unequal for '+uncs[i])
def test_get_sorted_box_lims(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_init = sdutil._make_box(x) box_lim = np.array([(0,1,1), (2,5,2)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_lims, uncs = sdutil._get_sorted_box_lims([box_lim], box_init) self.assertEqual(uncs, ['c','a'])
def test_normalize(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_init = sdutil._make_box(x) box_lim = np.array([(0, 1, 1), (2, 5, 2)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) uncs = np.lib.recfunctions.get_names( box_init.dtype) # @UndefinedVariable normalized = sdutil._normalize(box_lim, box_init, uncs) for i, lims in enumerate([(0, 2 / 3), (0, 1), (0, 0.2)]): lower, upper = lims self.assertAlmostEqual(normalized[i, 0], lower, msg='lower unequal for ' + uncs[i]) self.assertAlmostEqual(normalized[i, 1], upper, msg='upper unequal for ' + uncs[i])
def test_plot_pairwise_scatter(self): x = pd.DataFrame( [[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'], [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'], [0.9, 8, 'b'], [1.0, 9, 'a']], columns=['a', 'b', 'c']) y = np.zeros((x.shape[0], ), dtype=np.int) y[(x.a > 0.5) & (x.c != 'a')] = 1 x['c'] = x['c'].astype('category') box_init = sdutil._make_box(x) boxlim = box_init.copy() boxlim.a = [0.5, 1.0] boxlim.c = [ set('b', ), ] * 2 restricted_dims = ['a', 'c'] sdutil.plot_pair_wise_scatter(x, y, boxlim, box_init, restricted_dims) plt.draw() plt.close('all')
def test_normalize(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_init = sdutil._make_box(x) box_lim = np.array([(0,1,1), (2,5,2)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) uncs = np.lib.recfunctions.get_names(box_init.dtype) # @UndefinedVariable normalized = sdutil._normalize(box_lim, box_init, uncs) for i, lims in enumerate([(0, 2/3),(0, 1),(0,0.2)]): lower, upper = lims self.assertAlmostEqual(normalized[i, 0], lower, msg='lower unequal for '+uncs[i]) self.assertAlmostEqual(normalized[i, 1], upper, msg='upper unequal for '+uncs[i])
def test_OutputFormatterMixin(self): x = pd.DataFrame( [[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'], [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'], [0.9, 8, 'b'], [1.0, 9, 'a']], columns=['a', 'b', 'c']) y = np.zeros((x.shape[0], ), dtype=np.int) logical = (x.a > 0.5) & (x.c != 'a') y[logical] = 1 logical = (x.a < 0.5) & (x.c != 'b') y[logical] = 1 x['c'] = x['c'].astype('category') box_init = sdutil._make_box(x) boxlim1 = box_init.copy() boxlim1.a = [0.5, 1] boxlim1.c = [ set('b', ), ] * 2 boxlim2 = box_init.copy() boxlim2.a = [0.1, 0.5] boxlim2.c = [ set('a', ), ] * 2 with self.assertRaises(AttributeError): class WrongTestFormatter(sdutil.OutputFormatterMixin): pass formatter = WrongTestFormatter() formatter.boxes = [boxlim1, boxlim2] formatter.stats = [{ 'coverage': 0.5, 'density': 1 }, { 'coverage': 0.5, 'density': 1 }] class TestFormatter(sdutil.OutputFormatterMixin): boxes = [] stats = [] formatter = TestFormatter() formatter.boxes = [boxlim1, boxlim2] formatter.stats = [{ 'coverage': 0.5, 'density': 1 }, { 'coverage': 0.5, 'density': 1 }] formatter.x = x formatter.show_boxes() plt.draw() plt.close('all') boxes = formatter.boxes_to_dataframe() expected_boxes = pd.DataFrame( [[{'b'}, {'b'}, {'a'}, {'a'}], [0.5, 1, 0.1, 0.5]], index=['c', 'a'], columns=pd.MultiIndex(levels=[['box 1', 'box 2'], ['max', 'min']], codes=[[0, 0, 1, 1], [1, 0, 1, 0]])) self.assertTrue(expected_boxes.equals(boxes)) # check stats stats = formatter.stats_to_dataframe() expected_stats = pd.DataFrame([[0.5, 1], [0.5, 1]], index=['box 1', 'box 2'], columns=['coverage', 'density']) self.assertTrue(expected_stats.equals(stats))
def test_OutputFormatterMixin(self): x = pd.DataFrame([[0.1, 0, 'a'], [0.2, 1, 'b'], [0.3, 2, 'a'], [0.4, 3, 'b'], [0.5, 4, 'a'], [0.6, 5, 'a'], [0.7, 6, 'b'], [0.8, 7, 'a'], [0.9, 8, 'b'], [1.0, 9, 'a']], columns=['a', 'b', 'c']) y = np.zeros((x.shape[0],), dtype=np.int) logical = (x.a>0.5) & (x.c!='a') y[logical] = 1 logical = (x.a<0.5) & (x.c!='b') y[logical] = 1 x['c'] = x['c'].astype('category') box_init = sdutil._make_box(x) boxlim1 = box_init.copy() boxlim1.a = [0.5, 1] boxlim1.c = [set('b',),]*2 boxlim2 = box_init.copy() boxlim2.a = [0.1, 0.5] boxlim2.c = [set('a',),]*2 with self.assertRaises(AttributeError): class WrongTestFormatter(sdutil.OutputFormatterMixin): pass formatter = WrongTestFormatter() formatter.boxes = [boxlim1, boxlim2] formatter.stats = [{'coverage':0.5, 'density':1}, {'coverage':0.5, 'density':1}] class TestFormatter(sdutil.OutputFormatterMixin): boxes = [] stats = [] formatter = TestFormatter() formatter.boxes = [boxlim1, boxlim2] formatter.stats = [{'coverage':0.5, 'density':1}, {'coverage':0.5, 'density':1}] formatter.x = x formatter.show_boxes() plt.draw() plt.close('all') boxes = formatter.boxes_to_dataframe() expected_boxes = pd.DataFrame([[{'b'}, {'b'}, {'a'}, {'a'}], [0.5, 1, 0.1, 0.5]], index=['c', 'a'], columns=pd.MultiIndex(levels=[['box 1', 'box 2'], ['max', 'min']], codes=[[0, 0, 1, 1], [1, 0, 1, 0]])) self.assertTrue(expected_boxes.equals(boxes)) # check stats stats = formatter.stats_to_dataframe() expected_stats = pd.DataFrame([[0.5, 1], [0.5, 1]], index=['box 1', 'box 2'], columns=['coverage', 'density']) self.assertTrue(expected_stats.equals(stats))