Пример #1
0
 def setUp(self):
     super().setUp()
     self.instances = SequenceTaggingDatasetReader().read(
         self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"
     )
     self.instances_lazy = SequenceTaggingDatasetReader(lazy=True).read(
         self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"
     )
     vocab = Vocabulary.from_instances(self.instances)
     self.vocab = vocab
     self.model_params = Params(
         {
             "text_field_embedder": {
                 "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}}
             },
             "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2},
         }
     )
     self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)
     self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9)
     self.data_loader = DataLoader(self.instances, batch_size=2, collate_fn=allennlp_collate)
     self.data_loader_lazy = DataLoader(
         self.instances_lazy, batch_size=2, collate_fn=allennlp_collate
     )
     self.validation_data_loader = DataLoader(
         self.instances, batch_size=2, collate_fn=allennlp_collate
     )
     self.instances.index_with(vocab)
     self.instances_lazy.index_with(vocab)
Пример #2
0
    def setUp(self):
        super().setUp()
        # TODO make this a set of dataset readers
        # Classification may be easier in this case. Same dataset reader but with different paths 
        self.instances_list = []
        self.instances_list.append(SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / 'data' / 'meta_seq' / 'sequence_tagging.tsv'))
        self.instances_list.append(SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / 'data' / 'meta_seq' / 'sequence_tagging1.tsv'))
        self.instances_list.append(SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / 'data' / 'meta_seq' / 'sequence_tagging2.tsv'))
        # loop through dataset readers and extend vocab
        combined_vocab = Vocabulary.from_instances(self.instances_list[0])

        for instance in self.instances_list:
            combined_vocab.extend_from_instances(Params({}), instances=instance)
        self.vocab = combined_vocab
        # Figure out params TODO 
        self.model_params = Params({
                "text_field_embedder": {
                        "token_embedders": {
                                "tokens": {
                                        "type": "embedding",
                                        "embedding_dim": 5
                                        }
                                }
                        },
                "encoder": {
                        "type": "lstm",
                        "input_size": 5,
                        "hidden_size": 7,
                        "num_layers": 2
                        }
                })
        self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)
        self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9)
        self.iterator = BasicIterator(batch_size=2)
        self.iterator.index_with(combined_vocab)
    def setup_method(self) -> None:
        super().setup_method()

        # use SequenceTaggingDatasetReader as the base reader
        self.base_reader = SequenceTaggingDatasetReader(lazy=True)
        self.base_reader_multi_process = SequenceTaggingDatasetReader(
            lazy=True)
        base_file_path = AllenNlpTestCase.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"

        # Make 100 copies of the data
        raw_data = open(base_file_path).read()

        for i in range(100):
            file_path = self.TEST_DIR / f"identical_{i}.tsv"
            with open(file_path, "w") as f:
                f.write(raw_data)

        self.identical_files_glob = str(self.TEST_DIR / "identical_*.tsv")

        # Also create an archive with all of these files to ensure that we can
        # pass the archive directory.
        current_dir = os.getcwd()
        os.chdir(self.TEST_DIR)
        self.archive_filename = self.TEST_DIR / "all_data.tar.gz"
        with tarfile.open(self.archive_filename, "w:gz") as archive:
            for file_path in glob.glob("identical_*.tsv"):
                archive.add(file_path)
        os.chdir(current_dir)

        self.reader = ShardedDatasetReader(base_reader=self.base_reader)
        self.reader_multi_process = ShardedDatasetReader(
            base_reader=self.base_reader_multi_process, multi_process=True)
Пример #4
0
    def setUp(self):
        super().setUp()

        # A lot of the tests want access to the metric tracker
        # so we add a property that gets it by grabbing it from
        # the relevant callback.
        def metric_tracker(self: CallbackTrainer):
            for callback in self.handler.callbacks():
                if isinstance(callback, TrackMetrics):
                    return callback.metric_tracker
            return None

        setattr(CallbackTrainer, 'metric_tracker', property(metric_tracker))

        self.instances = SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
        vocab = Vocabulary.from_instances(self.instances)
        self.vocab = vocab
        self.model_params = Params({
                "text_field_embedder": {
                        "token_embedders": {
                                "tokens": {
                                        "type": "embedding",
                                        "embedding_dim": 5
                                        }
                                }
                        },
                "encoder": {
                        "type": "lstm",
                        "input_size": 5,
                        "hidden_size": 7,
                        "num_layers": 2
                        }
                })
        self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)
        self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9)
Пример #5
0
    def setUp(self) -> None:
        super().setUp()

        # use SequenceTaggingDatasetReader as the base reader
        self.base_reader = SequenceTaggingDatasetReader(lazy=True)
        base_file_path = AllenNlpTestCase.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'


        # Make 100 copies of the data
        raw_data = open(base_file_path).read()
        for i in range(100):
            file_path = self.TEST_DIR / f'identical_{i}.tsv'
            with open(file_path, 'w') as f:
                f.write(raw_data)

        self.all_distinct_path = str(self.TEST_DIR / 'all_distinct.tsv')
        with open(self.all_distinct_path, 'w') as all_distinct:
            for i in range(100):
                file_path = self.TEST_DIR / f'distinct_{i}.tsv'
                line = f"This###DT\tis###VBZ\tsentence###NN\t{i}###CD\t.###.\n"
                with open(file_path, 'w') as f:
                    f.write(line)
                all_distinct.write(line)

        self.identical_files_glob = str(self.TEST_DIR / 'identical_*.tsv')
        self.distinct_files_glob = str(self.TEST_DIR / 'distinct_*.tsv')

        # For some of the tests we need a vocab, we'll just use the base_reader for that.
        self.vocab = Vocabulary.from_instances(self.base_reader.read(str(base_file_path)))
Пример #6
0
 def setup_method(self):
     super().setup_method()
     self.data_path = str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv")
     self.reader = SequenceTaggingDatasetReader()
     self.data_loader = MultiProcessDataLoader(self.reader, self.data_path, batch_size=2)
     self.data_loader_lazy = MultiProcessDataLoader(
         self.reader, self.data_path, batch_size=2, max_instances_in_memory=10
     )
     self.instances = list(self.data_loader.iter_instances())
     self.vocab = Vocabulary.from_instances(self.instances)
     self.data_loader.index_with(self.vocab)
     self.data_loader_lazy.index_with(self.vocab)
     self.model_params = Params(
         {
             "text_field_embedder": {
                 "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}}
             },
             "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2},
         }
     )
     self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)
     self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9)
     self.validation_data_loader = MultiProcessDataLoader(
         self.reader, self.data_path, batch_size=2
     )
     self.validation_data_loader.index_with(self.vocab)
Пример #7
0
    def setUp(self):
        super(SimpleTaggerTest, self).setUp()
        dataset = SequenceTaggingDatasetReader().read(
            'tests/fixtures/data/sequence_tagging.tsv')
        vocab = Vocabulary.from_dataset(dataset)
        self.vocab = vocab
        dataset.index_instances(vocab)
        self.dataset = dataset

        params = Params({
            "text_field_embedder": {
                "tokens": {
                    "type": "embedding",
                    "embedding_dim": 5
                }
            },
            "stacked_encoder": {
                "type": "lstm",
                "input_size": 5,
                "hidden_size": 7,
                "num_layers": 2
            }
        })

        self.model = SimpleTagger.from_params(self.vocab, params)
Пример #8
0
 def setUp(self):
     super().setUp()
     self.instances = SequenceTaggingDatasetReader().read(
         self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv")
     vocab = Vocabulary.from_instances(self.instances)
     self.vocab = vocab
     self.model = ConstantModel(vocab)
Пример #9
0
 def setUp(self):
     super(TestTrainer, self).setUp()
     dataset = SequenceTaggingDatasetReader().read(
         'tests/fixtures/data/sequence_tagging.tsv')
     vocab = Vocabulary.from_instances(dataset)
     self.vocab = vocab
     dataset.index_instances(vocab)
     self.dataset = dataset
     self.model_params = Params({
         "text_field_embedder": {
             "tokens": {
                 "type": "embedding",
                 "embedding_dim": 5
             }
         },
         "stacked_encoder": {
             "type": "lstm",
             "input_size": 5,
             "hidden_size": 7,
             "num_layers": 2
         }
     })
     self.model = SimpleTagger.from_params(self.vocab, self.model_params)
     self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01)
     self.iterator = BasicIterator(batch_size=2)
Пример #10
0
 def setUp(self):
     super(TestTrainer, self).setUp()
     self.instances = SequenceTaggingDatasetReader().read(
         self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
     vocab = Vocabulary.from_instances(self.instances)
     self.vocab = vocab
     self.model_params = Params({
         "text_field_embedder": {
             "tokens": {
                 "type": "embedding",
                 "embedding_dim": 5
             }
         },
         "encoder": {
             "type": "lstm",
             "input_size": 5,
             "hidden_size": 7,
             "num_layers": 2
         }
     })
     self.model = SimpleTagger.from_params(vocab=self.vocab,
                                           params=self.model_params)
     self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01)
     self.iterator = BasicIterator(batch_size=2)
     self.iterator.index_with(vocab)
    def setUp(self) -> None:
        super().setUp()

        # use SequenceTaggingDatasetReader as the base reader
        self.base_reader = SequenceTaggingDatasetReader(lazy=True)
        base_file_path = AllenNlpTestCase.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"

        # Make 100 copies of the data
        raw_data = open(base_file_path).read()
        for i in range(100):
            file_path = self.TEST_DIR / f"identical_{i}.tsv"
            with open(file_path, "w") as f:
                f.write(raw_data)

        self.identical_files_glob = str(self.TEST_DIR / "identical_*.tsv")
Пример #12
0
 def setup_method(self):
     super().setup_method()
     self.instances = SequenceTaggingDatasetReader().read(
         self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"
     )
     vocab = Vocabulary.from_instances(self.instances)
     self.model_params = Params(
         {
             "text_field_embedder": {
                 "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}}
             },
             "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2},
         }
     )
     self.model = SimpleTagger.from_params(vocab=vocab, params=self.model_params)
Пример #13
0
    def test_default_format(self):
        reader = SequenceTaggingDatasetReader()
        dataset = reader.read('tests/fixtures/data/sequence_tagging.tsv')

        assert len(dataset.instances) == 4
        fields = dataset.instances[0].fields
        assert fields["tokens"].tokens == ["cats", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = dataset.instances[1].fields
        assert fields["tokens"].tokens == ["dogs", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = dataset.instances[2].fields
        assert fields["tokens"].tokens == ["snakes", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = dataset.instances[3].fields
        assert fields["tokens"].tokens == ["birds", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
Пример #14
0
    def test_brown_corpus_format(self):
        reader = SequenceTaggingDatasetReader(word_tag_delimiter='/')
        dataset = reader.read('tests/fixtures/data/brown_corpus.txt')

        assert len(dataset.instances) == 4
        fields = dataset.instances[0].fields
        assert fields["tokens"].tokens == ["cats", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = dataset.instances[1].fields
        assert fields["tokens"].tokens == ["dogs", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = dataset.instances[2].fields
        assert fields["tokens"].tokens == ["snakes", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = dataset.instances[3].fields
        assert fields["tokens"].tokens == ["birds", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
    def setUp(self):
        super().setUp()

        self.base_reader = SequenceTaggingDatasetReader(lazy=True)
        base_file_path = AllenNlpTestCase.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"

        # Make 100 copies of the data
        raw_data = open(base_file_path).read()
        for i in range(100):
            file_path = self.TEST_DIR / f"sequence_tagging_{i}.tsv"
            with open(file_path, "w") as f:
                f.write(raw_data)

        self.glob = str(self.TEST_DIR / "sequence_tagging_*.tsv")

        # For some of the tests we need a vocab, we'll just use the base_reader for that.
        self.vocab = Vocabulary.from_instances(self.base_reader.read(str(base_file_path)))
Пример #16
0
    def test_brown_corpus_format(self):
        reader = SequenceTaggingDatasetReader(word_tag_delimiter=u'/')
        instances = reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'brown_corpus.txt')
        instances = ensure_list(instances)

        assert len(instances) == 4
        fields = instances[0].fields
        assert [t.text for t in fields[u"tokens"].tokens] == [u"cats", u"are", u"animals", u"."]
        assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
        fields = instances[1].fields
        assert [t.text for t in fields[u"tokens"].tokens] == [u"dogs", u"are", u"animals", u"."]
        assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
        fields = instances[2].fields
        assert [t.text for t in fields[u"tokens"].tokens] == [u"snakes", u"are", u"animals", u"."]
        assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
        fields = instances[3].fields
        assert [t.text for t in fields[u"tokens"].tokens] == [u"birds", u"are", u"animals", u"."]
        assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
Пример #17
0
    def test_default_format(self, lazy):
        reader = SequenceTaggingDatasetReader(lazy=lazy)
        instances = reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'data' / u'sequence_tagging.tsv')
        instances = ensure_list(instances)

        assert len(instances) == 4
        fields = instances[0].fields
        assert [t.text for t in fields[u"tokens"].tokens] == [u"cats", u"are", u"animals", u"."]
        assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
        fields = instances[1].fields
        assert [t.text for t in fields[u"tokens"].tokens] == [u"dogs", u"are", u"animals", u"."]
        assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
        fields = instances[2].fields
        assert [t.text for t in fields[u"tokens"].tokens] == [u"snakes", u"are", u"animals", u"."]
        assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
        fields = instances[3].fields
        assert [t.text for t in fields[u"tokens"].tokens] == [u"birds", u"are", u"animals", u"."]
        assert fields[u"tags"].labels == [u"N", u"V", u"N", u"N"]
Пример #18
0
    def test_read_from_file(self):

        reader = SequenceTaggingDatasetReader()
        dataset = reader.read(self.TRAIN_FILE)

        assert len(dataset.instances) == 4
        fields = dataset.instances[0].fields()
        assert fields["tokens"].tokens() == ["cats", "are", "animals", "."]
        assert fields["tags"].tags() == ["N", "V", "N", "N"]
        fields = dataset.instances[1].fields()
        assert fields["tokens"].tokens() == ["dogs", "are", "animals", "."]
        assert fields["tags"].tags() == ["N", "V", "N", "N"]
        fields = dataset.instances[2].fields()
        assert fields["tokens"].tokens() == ["snakes", "are", "animals", "."]
        assert fields["tags"].tags() == ["N", "V", "N", "N"]
        fields = dataset.instances[3].fields()
        assert fields["tokens"].tokens() == ["birds", "are", "animals", "."]
        assert fields["tags"].tags() == ["N", "V", "N", "N"]
    def test_default_format(self, lazy):
        reader = SequenceTaggingDatasetReader(lazy=lazy)
        instances = reader.read('tests/fixtures/data/sequence_tagging.tsv')
        instances = ensure_list(instances)

        assert len(instances) == 4
        fields = instances[0].fields
        assert [t.text for t in fields["tokens"].tokens] == ["cats", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[1].fields
        assert [t.text for t in fields["tokens"].tokens] == ["dogs", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[2].fields
        assert [t.text for t in fields["tokens"].tokens] == ["snakes", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[3].fields
        assert [t.text for t in fields["tokens"].tokens] == ["birds", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
Пример #20
0
 def setUp(self):
     super(TestOptimizer, self).setUp()
     self.instances = SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / u'data' / u'sequence_tagging.tsv')
     vocab = Vocabulary.from_instances(self.instances)
     self.model_params = Params({
             u"text_field_embedder": {
                     u"tokens": {
                             u"type": u"embedding",
                             u"embedding_dim": 5
                             }
                     },
             u"encoder": {
                     u"type": u"lstm",
                     u"input_size": 5,
                     u"hidden_size": 7,
                     u"num_layers": 2
                     }
             })
     self.model = SimpleTagger.from_params(vocab=vocab, params=self.model_params)
 def setUp(self):
     super(TestOptimizer, self).setUp()
     self.instances = SequenceTaggingDatasetReader().read(
         'tests/fixtures/data/sequence_tagging.tsv')
     vocab = Vocabulary.from_instances(self.instances)
     self.model_params = Params({
         "text_field_embedder": {
             "tokens": {
                 "type": "embedding",
                 "embedding_dim": 5
             }
         },
         "encoder": {
             "type": "lstm",
             "input_size": 5,
             "hidden_size": 7,
             "num_layers": 2
         }
     })
     self.model = SimpleTagger.from_params(vocab, self.model_params)
Пример #22
0
    def setUp(self):
        super(SimpleTaggerTest, self).setUp()
        self.write_sequence_tagging_data()

        dataset = SequenceTaggingDatasetReader().read(self.TRAIN_FILE)
        vocab = Vocabulary.from_dataset(dataset)
        self.vocab = vocab
        dataset.index_instances(vocab)
        self.dataset = dataset

        params = Params({
            "text_field_embedder": {
                "tokens": {
                    "type": "embedding",
                    "embedding_dim": 5
                }
            },
            "hidden_size": 7,
            "num_layers": 2
        })

        self.model = SimpleTagger.from_params(self.vocab, params)
Пример #23
0
 def setUp(self):
     super(TestDenseSparseAdam, self).setUp()
     self.instances = SequenceTaggingDatasetReader().read(
         self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
     self.vocab = Vocabulary.from_instances(self.instances)
     self.model_params = Params({
         "text_field_embedder": {
             "tokens": {
                 "type": "embedding",
                 "embedding_dim": 5,
                 "sparse": True
             }
         },
         "encoder": {
             "type": "lstm",
             "input_size": 5,
             "hidden_size": 7,
             "num_layers": 2
         }
     })
     self.model = SimpleTagger.from_params(vocab=self.vocab,
                                           params=self.model_params)
Пример #24
0
    def test_default_format(self):
        reader = SequenceTaggingDatasetReader(max_instances=4)
        instances = list(
            reader.read(AllenNlpTestCase.FIXTURES_ROOT / "data" /
                        "sequence_tagging.tsv"))

        assert len(instances) == 4
        fields = instances[0].fields
        assert [t.text for t in fields["tokens"].tokens
                ] == ["cats", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[1].fields
        assert [t.text for t in fields["tokens"].tokens
                ] == ["dogs", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[2].fields
        assert [t.text for t in fields["tokens"].tokens
                ] == ["snakes", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[3].fields
        assert [t.text for t in fields["tokens"].tokens
                ] == ["birds", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
Пример #25
0
    def test_brown_corpus_format(self):
        reader = SequenceTaggingDatasetReader(word_tag_delimiter="/")
        instances = list(
            reader.read(AllenNlpTestCase.FIXTURES_ROOT / "data" /
                        "brown_corpus.txt"))

        assert len(instances) == 4
        fields = instances[0].fields
        assert [t.text for t in fields["tokens"].tokens
                ] == ["cats", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[1].fields
        assert [t.text for t in fields["tokens"].tokens
                ] == ["dogs", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[2].fields
        assert [t.text for t in fields["tokens"].tokens
                ] == ["snakes", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
        fields = instances[3].fields
        assert [t.text for t in fields["tokens"].tokens
                ] == ["birds", "are", "animals", "."]
        assert fields["tags"].labels == ["N", "V", "N", "N"]
Пример #26
0
def test_run_steps_programmatically(step_cache_class):
    from allennlp.data.dataset_readers import SequenceTaggingDatasetReader
    from allennlp.tango.dataset import DatasetReaderAdapterStep
    from allennlp.tango import TrainingStep
    from allennlp.common import Lazy
    from allennlp.training.optimizers import AdamOptimizer
    from allennlp.tango.dataloader import BatchSizeDataLoader
    from allennlp.models import SimpleTagger
    from allennlp.tango import EvaluationStep

    dataset_step = DatasetReaderAdapterStep(
        reader=SequenceTaggingDatasetReader(),
        splits={
            "train": "test_fixtures/data/sequence_tagging.tsv",
            "validation": "test_fixtures/data/sequence_tagging.tsv",
        },
    )
    training_step = TrainingStep(
        model=Lazy(
            SimpleTagger,
            Params({
                "text_field_embedder": {
                    "token_embedders": {
                        "tokens": {
                            "type": "embedding",
                            "projection_dim": 2,
                            "pretrained_file":
                            "test_fixtures/embeddings/glove.6B.100d.sample.txt.gz",
                            "embedding_dim": 100,
                            "trainable": True,
                        }
                    }
                },
                "encoder": {
                    "type": "lstm",
                    "input_size": 2,
                    "hidden_size": 4,
                    "num_layers": 1
                },
            }),
        ),
        dataset=dataset_step,
        data_loader=Lazy(BatchSizeDataLoader, Params({"batch_size": 2})),
        optimizer=Lazy(AdamOptimizer),
    )
    evaluation_step = EvaluationStep(dataset=dataset_step,
                                     model=training_step,
                                     step_name="evaluation")

    with TemporaryDirectory(prefix="test_run_steps_programmatically-") as d:
        if step_cache_class == DirectoryStepCache:
            cache = DirectoryStepCache(d)
        else:
            cache = step_cache_class()

        assert "random object" not in cache
        assert dataset_step not in cache
        assert training_step not in cache
        assert evaluation_step not in cache
        assert len(cache) == 0
        with pytest.raises(KeyError):
            _ = cache[evaluation_step]

        assert tango_dry_run(evaluation_step, cache) == [
            (dataset_step, False),
            (training_step, False),
            (evaluation_step, False),
        ]
        training_step.ensure_result(cache)
        assert tango_dry_run(evaluation_step, cache) == [
            (dataset_step, True),
            (training_step, True),
            (evaluation_step, False),
        ]

        assert "random object" not in cache
        assert dataset_step in cache
        assert training_step in cache
        assert evaluation_step not in cache
        assert len(cache) == 2
        with pytest.raises(KeyError):
            _ = cache[evaluation_step]