def test_partition_iterator(self): fines_issued = np.array([(2001, 12.31), (1999, 14.32), (1999, 120.76), (2002, 32.12), (2004, 98.64), (2005, 32.21), (2002, 100.23), (2006, 123.40), (2000, 72.21)], dtype=[('year', int), ('fine', float)]) y = fines_issued['year'] training_windows = by_window_ranges(1999, 2000, 2004, 2) testing_windows = by_window_ranges(2001, 2002, 2006, 2) ctrls = { ByWindowMode.SLIDING: [(set([8, 1, 2]), set([0, 3, 6])), (set([0, 3, 6]), set([4])), (set([4]), set([5, 7]))], ByWindowMode.EXPANDING: [(set([8, 1, 2]), set([0, 3, 6])), (set([0, 1, 2, 3, 6, 8]), set([4])), (set([0, 1, 2, 3, 4, 6, 8]), set([5, 7]))] } for mode in ctrls: bw = ByWindow(y, training_windows, testing_windows, mode) result = [(set(train_inds), set(test_inds)) for train_inds, test_inds in bw] ctrl = ctrls[mode] self.assertEqual(result, ctrl) self.assertEqual( ByWindow.est_n_folds(y, training_windows, testing_windows, mode), len(result))
def test_by_window_ranges(self): self.assertEqual(by_window_ranges(1999, 2000, 2006, 2), [(1999, 2000), (2001, 2002), (2003, 2004), (2005, 2006)]) self.assertEqual(by_window_ranges(1, 3, 7, 1), [(1, 3), (2, 4), (3, 5), (4, 6), (5, 7)])
def test_partition_iterator(self): fines_issued = np.array([(2001, 12.31), (1999, 14.32), (1999, 120.76), (2002, 32.12), (2004, 98.64), (2005, 32.21), (2002, 100.23), (2006, 123.40), (2000, 72.21)], dtype=[('year', int), ('fine', float)]) y = fines_issued['year'] training_windows = by_window_ranges(1999, 2000, 2004, 2) testing_windows = by_window_ranges(2001, 2002, 2006, 2) ctrls = {ByWindowMode.SLIDING: [(set([8, 1, 2]), set([0, 3, 6])), (set([0, 3, 6]), set([4])), (set([4]), set([5, 7]))], ByWindowMode.EXPANDING: [(set([8, 1, 2]), set([0, 3, 6])), (set([0, 1, 2, 3, 6, 8]), set([4])), (set([0, 1, 2, 3, 4, 6, 8]), set([5, 7]))]} for mode in ctrls: bw = ByWindow(y, training_windows, testing_windows, mode) result = [(set(train_inds), set(test_inds)) for train_inds, test_inds in bw] ctrl = ctrls[mode] self.assertEqual(result, ctrl) self.assertEqual(ByWindow.est_n_folds( y, training_windows, testing_windows, mode), len(result))
def test_by_window_ranges(self): self.assertEqual( by_window_ranges(1999, 2000, 2006, 2), [(1999, 2000), (2001, 2002), (2003, 2004), (2005, 2006)]) self.assertEqual(by_window_ranges(1, 3, 7, 1), [(1, 3), (2, 4), (3, 5), (4, 6), (5, 7)])
def test_wrap_cross_validation(self): X = np.array( [ (0, 2001, 12.31), (1, 1999, 14.32), (2, 1999, 120.76), (3, 2002, 32.12), (4, 2004, 98.64), (5, 2005, 32.21), (6, 2002, 100.23), (7, 2006, 123.40), (8, 2000, 72.21), ], dtype=[("id", int), ("year", int), ("fine", float)], ) y = np.array([(0,), (1,), (0,), (1,), (0,), (1,), (0,), (1,), (0,)], dtype=[("category", int)]) ctrl_inds = [([1, 2, 8], [0, 3, 6]), ([0, 3, 6], [4]), ([4], [5, 7])] p = Pipeline() node_X_in = p.add(NumpyRead(X)) node_y_in = p.add(NumpyRead(y)) node_just_time = p.add(SplitColumns(["year"])) node_just_time(node_X_in) training_windows = by_window_ranges(1999, 2000, 2004, 2) testing_windows = by_window_ranges(2001, 2002, 2006, 2) mode = ByWindowMode.SLIDING node_cv = p.add( wrap_and_make_instance( "upsg.transform.partition_iterators.ByWindow", n_arrays=2, training_windows=training_windows, testing_windows=testing_windows, mode=ByWindowMode.SLIDING, ) ) node_cv(input0=node_X_in, input1=node_y_in, y=node_just_time) self.assertEqual(len(node_cv.output_keys), 2 * 2 * len(ctrl_inds)) out_nodes = [] for i in xrange(len(ctrl_inds)): train_node_X = p.add(NumpyWrite()) train_node_X(node_cv["train0_{}".format(i)]) train_node_y = p.add(NumpyWrite()) train_node_y(node_cv["train1_{}".format(i)]) test_node_X = p.add(NumpyWrite()) test_node_X(node_cv["test0_{}".format(i)]) test_node_y = p.add(NumpyWrite()) test_node_y(node_cv["test1_{}".format(i)]) out_nodes.append((train_node_X, train_node_y, test_node_X, test_node_y)) p.run() for i, (train_node_X, train_node_y, test_node_X, test_node_y) in enumerate(out_nodes): self.assertTrue(np.array_equal(train_node_X.get_stage().result, X[ctrl_inds[i][0]])) self.assertTrue(np.array_equal(train_node_y.get_stage().result, y[ctrl_inds[i][0]])) self.assertTrue(np.array_equal(test_node_X.get_stage().result, X[ctrl_inds[i][1]])) self.assertTrue(np.array_equal(test_node_y.get_stage().result, y[ctrl_inds[i][1]]))
def test_wrap_cross_validation(self): X = np.array([(0, 2001, 12.31), (1, 1999, 14.32), (2, 1999, 120.76), (3, 2002, 32.12), (4, 2004, 98.64), (5, 2005, 32.21), (6, 2002, 100.23), (7, 2006, 123.40), (8, 2000, 72.21)], dtype=[('id', int), ('year', int), ('fine', float)]) y = np.array([(0, ), (1, ), (0, ), (1, ), (0, ), (1, ), (0, ), (1, ), (0, )], dtype=[('category', int)]) ctrl_inds = [([1, 2, 8], [0, 3, 6]), ([0, 3, 6], [4]), ([4], [5, 7])] p = Pipeline() node_X_in = p.add(NumpyRead(X)) node_y_in = p.add(NumpyRead(y)) node_just_time = p.add(SplitColumns(['year'])) node_just_time(node_X_in) training_windows = by_window_ranges(1999, 2000, 2004, 2) testing_windows = by_window_ranges(2001, 2002, 2006, 2) mode = ByWindowMode.SLIDING node_cv = p.add( wrap_and_make_instance( 'upsg.transform.partition_iterators.ByWindow', n_arrays=2, training_windows=training_windows, testing_windows=testing_windows, mode=ByWindowMode.SLIDING)) node_cv(input0=node_X_in, input1=node_y_in, y=node_just_time) self.assertEqual(len(node_cv.output_keys), 2 * 2 * len(ctrl_inds)) out_nodes = [] for i in xrange(len(ctrl_inds)): train_node_X = p.add(NumpyWrite()) train_node_X(node_cv['train0_{}'.format(i)]) train_node_y = p.add(NumpyWrite()) train_node_y(node_cv['train1_{}'.format(i)]) test_node_X = p.add(NumpyWrite()) test_node_X(node_cv['test0_{}'.format(i)]) test_node_y = p.add(NumpyWrite()) test_node_y(node_cv['test1_{}'.format(i)]) out_nodes.append( (train_node_X, train_node_y, test_node_X, test_node_y)) p.run() for i, (train_node_X, train_node_y, test_node_X, test_node_y) in \ enumerate(out_nodes): self.assertTrue( np.array_equal(train_node_X.get_stage().result, X[ctrl_inds[i][0]])) self.assertTrue( np.array_equal(train_node_y.get_stage().result, y[ctrl_inds[i][0]])) self.assertTrue( np.array_equal(test_node_X.get_stage().result, X[ctrl_inds[i][1]])) self.assertTrue( np.array_equal(test_node_y.get_stage().result, y[ctrl_inds[i][1]]))