def test_prep_data(self):
        observed , num_sessions = sframe_sequence_iterator.prep_data(
            self.data, self.features, self.session_id, self.prediction_window,
            self.predictions_in_chunk, target=self.target)

        chunk_size = self.prediction_window * self.predictions_in_chunk
        chunk_targets = [0.0] * self.predictions_in_chunk
        full_chunk_weights = [1.0] * self.predictions_in_chunk
        padded_chunk_weights = [1.0] * 3 + [0.0] * 2

        full_chunk_features = [1.0, 2.0] * chunk_size
        padded_chunk_features = [1.0, 2.0] * 5 + [0.0, 0.0] * 5

        expected = tc.SFrame({
            'session_id': [0] * 2 + [1] * 3 + [2] * 5,
            'chunk_len': [10, 5, 10, 10, 10, 10, 10, 10, 10, 5],
            'features': [full_chunk_features, padded_chunk_features] +
                        [full_chunk_features] * 7 + [padded_chunk_features],
            'target': [chunk_targets] * 10,
            'weights': [full_chunk_weights, padded_chunk_weights] +
                       [full_chunk_weights] * 7 + [padded_chunk_weights]
        })

        tc.util._assert_sframe_equal(expected, observed, check_column_order=False)
        self.assertEqual(num_sessions , len(self.data[self.session_id].unique()))
示例#2
0
    def test_prep_data_no_target(self):
        observed , num_sessions = sframe_sequence_iterator.prep_data(
            self.data, self.features, self.session_id, self.prediction_window,
            self.predictions_in_chunk)

        expected = self.expected_chunked_3_2.remove_columns(['target' , 'weights'])
        tc.util._assert_sframe_equal(expected, observed, check_column_order=False)
        self.assertEqual(num_sessions , len(self.data[self.session_id].unique()))
示例#3
0
    def test_prep_data_case_3(self):
        # This case, which uses prediction_window = 4, chunk_len = 8, covers edge cases:
        # One session with exactly one p_w
        # Last session % chunk_len == 0
        observed , num_sessions = sframe_sequence_iterator.prep_data(
            self.data, self.features, self.session_id, 4, 2, target=self.target)

        tc.util._assert_sframe_equal(self.expected_chunked_4_2, observed, check_column_order=False)
        self.assertEqual(num_sessions , len(self.data[self.session_id].unique()))
示例#4
0
    def test_prep_data_case_2(self):
        # This case, which uses prediction_window = 2, chunk_len = 6, covers edge cases:
        # One session whos length < chunk_len, and length % p_w != 0
        # One session whos length == chunk_len, , tie within to p_w
        # One session > chunk_len. Second part << chunk_len and < p_w
        # Last session % chunk_len != 0 (same as case 1)
        observed , num_sessions = sframe_sequence_iterator.prep_data(
            self.data, self.features, self.session_id, 2, 3, target=self.target)

        tc.util._assert_sframe_equal(self.expected_chunked_2_3, observed, check_column_order=False)
        self.assertEqual(num_sessions , len(self.data[self.session_id].unique()))
示例#5
0
    def test_prep_data_case_1(self):
        # This case, which uses prediction_window = 3, chunk_len = 6, covers edge cases:
        # One session whos length < chunk_len, and length % p_w == 0
        # One session whos length == chunk_len
        # One session > chunk_len. Second part << chunk_len and == p_w
        # Last session % chunk_len != 0
        observed , num_sessions = sframe_sequence_iterator.prep_data(
            self.data, self.features, self.session_id, self.prediction_window,
            self.predictions_in_chunk, target=self.target)

        tc.util._assert_sframe_equal(self.expected_chunked_3_2, observed, check_column_order=False)
        self.assertEqual(num_sessions , len(self.data[self.session_id].unique()))
示例#6
0
    def _test_next(self , batch_size , expected_num_batches , expected_batches, use_taget=True, use_pad=False):
        chunked , num_sessions = sframe_sequence_iterator.prep_data(
            self.data, self.features, self.session_id, self.prediction_window,
            self.predictions_in_chunk, target=self.target)

        seq_iter = sframe_sequence_iterator.SFrameSequenceIter(chunked , len(self.features) , self.prediction_window,
                                                               self.predictions_in_chunk , batch_size ,
                                                               use_target=use_taget, use_pad=use_pad)

        self.assertTrue(seq_iter.batch_size == batch_size)
        self.assertTrue(seq_iter.num_batches == expected_num_batches)

        for batch_num ,expected_batch in enumerate(expected_batches):
            observed_batch = seq_iter.next()

            np.testing.assert_array_equal(expected_batch['features'] , observed_batch.data[0].asnumpy(),
                                          "Error - features in batch %d" % batch_num)

            if use_taget:
                np.testing.assert_array_equal(expected_batch['target'] , observed_batch.label[0].asnumpy(),
                                              "Error - target in batch %d" % batch_num)

                np.testing.assert_array_equal(expected_batch['weights'] , observed_batch.label[1].asnumpy(),
                                              "Error - weights in batch %d" % batch_num)
            else:
                self.assertTrue(observed_batch.label is None)

            if use_pad and ("padding" in expected_batch):
                self.assertTrue(observed_batch.pad == expected_batch["padding"])
            else:
                self.assertTrue(observed_batch.pad == 0)

        # The iterator length should be in the same length as the expected_batches list.
        # We expect a "StopIteration" exception to be raised on the next call to next()
        stop_iteration_raised = False
        try:
            seq_iter.next()
        except StopIteration:
            stop_iteration_raised = True
        self.assertTrue(stop_iteration_raised , "Error - the iterator is longer than expected")