Пример #1
0
    def prepare_data(self):
        # setup training set
        if "timit" in self.hp.data:
            train, val, test = TrainTestDataset.get_datasets(
                path=self.hp.timit_path)
        elif "buckeye" in self.hp.data:
            train, val, test = TrainValTestDataset.get_datasets(
                path=self.hp.buckeye_path, percent=self.hp.buckeye_percent)
        else:
            raise Exception("no such training data!")

        if "libri" in self.hp.data:
            libri_train = LibriSpeechDataset(path=self.hp.libri_path,
                                             subset=self.hp.libri_subset,
                                             percent=self.hp.libri_percent)
            train = ConcatDataset([train, libri_train])
            train.path = "\n\t+".join(
                [dataset.path for dataset in train.datasets])
            print(f"added libri ({len(libri_train)} examples)")

        self.train_dataset = train
        self.valid_dataset = val
        self.test_dataset = test

        line()
        print("DATA:")
        print(f"train: {self.train_dataset.path} ({len(self.train_dataset)})")
        print(f"valid: {self.valid_dataset.path} ({len(self.valid_dataset)})")
        print(f"test: {self.test_dataset.path} ({len(self.test_dataset)})")
        line()