Пример #1
0
    def test_generator_len(self):
        source = PreLoadedSource([0] * 10, [0] * 10)

        adapter = AdapterMock()
        batch_generator = MiniBatchGenerator(source, adapter)

        self.assertEqual(len(batch_generator), len(source))
Пример #2
0
def fetch_strokes(source, num_strokes):
    cropped_strokes = []
    dummy_out = []
    for strokes, text in source.get_sequences():
        for stroke in strokes:
            if len(cropped_strokes) > num_strokes:
                return PreLoadedSource(cropped_strokes, dummy_out)

            try:
                deltas = stroke.stroke_to_points()
            except BadStrokeException:
                continue

            cropped_strokes.append(deltas)
            dummy_out.append('')

    return PreLoadedSource(cropped_strokes, dummy_out)
Пример #3
0
def normalized_source(source, normalizer):
    seqs_in = []
    seqs_out = []
    for seq_in, seq_out in source.get_sequences():
        seqs_in.append(seq_in)
        seqs_out.append(seq_out)

    processed = normalizer.preprocess(seqs_in)

    return PreLoadedSource(processed, seqs_out)
    def test_data_processing_calls(self):
        preprocessor = PreprocessorMock([])
        provider = PreLoadedSource([2, 4], ["two four"])
        repo = DataRepoMock()
        splitter = DataSplitterMock(provider)

        compiler = DataSetCompiler(preprocessor, splitter, repo)
        compiler.compile()
        self.assertEqual(preprocessor.processed_args, [(1, '1'), (2, '2'),
                                                       (3, '3')])
    def test_preprocessor_fits_training_data(self):
        preprocessor = PreprocessorMock([])
        provider = PreLoadedSource([2, 4], ["two four"])
        repo = DataRepoMock()
        splitter = DataSplitterMock(provider)

        compiler = DataSetCompiler(preprocessor, splitter, repo)
        compiler.compile()
        self.assertEqual(list(preprocessor.fit_arg.get_sequences()),
                         [(1, '1')])
Пример #6
0
    def test_fit_and_process_example(self):
        preprocessor = PreProcessor(steps=[DummyStep(), DummyStep()])

        xs = [1, 2]
        ys = [0, 0]
        source = PreLoadedSource(xs, ys)
        preprocessor.fit(source)
        x, y = preprocessor.pre_process_example(1, 2)

        self.assertEqual(x, 13)
        self.assertEqual(y, 2)
    def test_compiler_performs_splits_data(self):
        preprocessor = PreprocessorMock([])
        x = [1, 2, 3]
        y = ['one', 'two', 'three']
        provider = PreLoadedSource(x, y)
        repo = DataRepoMock()

        splitter = DataSplitterMock(provider)
        compiler = DataSetCompiler(preprocessor, splitter, repo)

        compiler.compile()
        self.assertTrue(splitter.called)
Пример #8
0
def dummy_source():
    sin = 'HHHH    eee  lll  lll  ooo  ,,,  www   oooo  rrr   lll  ddd'
    sout = 'Hello, world'

    char_table = CharacterTable()

    codes = [char_table.encode(ch) for ch in sin]

    x = to_categorical(codes, num_classes=len(char_table))

    x = x.reshape(1, len(sin), -1)

    return PreLoadedSource(x, [sout])
Пример #9
0
    def test_values_in_mini_batches(self):
        num_examples = 1
        source = PreLoadedSource([0] * num_examples, [0] * num_examples)

        adapter = AdapterMock()
        batch_size = 2
        batch_generator = MiniBatchGenerator(source,
                                             adapter,
                                             batch_size=batch_size)

        batches = []
        for mini_batch in batch_generator.get_examples():
            if len(batches) >= len(batch_generator) // batch_size + 1:
                break
            batches.append(mini_batch)

        self.assertEqual(batches[0][0], [0])

        self.assertEqual(batches[0][1], [0])
    def test_repo(self):
        preprocessor = PreProcessor([])
        provider = PreLoadedSource([1, 2, 3], ['1', '2', '3'])
        repo = DataRepoMock()
        splitter = DataSplitterMock(provider)

        compiler = DataSetCompiler(preprocessor, splitter, repo)

        compiler.compile()

        self.assertEqual(len(repo.slices), 3)

        train, val, test = repo.slices

        self.assertEqual(len(train), 1)
        self.assertEqual(len(val), 1)
        self.assertEqual(len(test), 1)

        self.assertEqual(train, [(1, '1')])
        self.assertEqual(val, [(2, '2')])
        self.assertEqual(test, [(3, '3')])
 def test_data(self):
     return PreLoadedSource([3], ['3'])
 def validation_data(self):
     return PreLoadedSource([2], ['2'])
 def train_data(self):
     return PreLoadedSource([1], ['1'])