Exemplo n.º 1
0
def download_dataset(data_version: str):
    """
    Download the data and stores the tar.gz file in the specified path
    :param data_version: specifies the version of the data to use (str {"0.01", "0.02"})
    """
    url = "http://download.tensorflow.org/data/speech_commands_v{}.tar.gz".format(data_version)
    urllib.request.urlretrieve(url, get_dataset_filepath(data_version=data_version))
Exemplo n.º 2
0
def decompress_dataset(data_version: str):
    """
    Retrieves the downloaded data and decompresses it
    :param data_version: specifies the version of the data to use (str {"0.01", "0.02"})
    """
    fname = get_dataset_filepath(data_version=data_version)
    assert os.path.exists(fname)
    tar = tarfile.open(fname, "r:gz")
    tar.extractall(path=get_training_data_path(data_version=data_version))
    tar.close()
Exemplo n.º 3
0
 def setUp(self):
     self.wav_filepath = "./tests/examples/testaudio.wav"
     self.data_version = "0.02"
     if not os.path.exists(
             get_dataset_filepath(data_version=self.data_version)):
         download_dataset(data_version=self.data_version)
         decompress_dataset(data_version=self.data_version)
     self.known_commands = [
         "zero", "one", "two", "three", "four", "five", "six", "seven",
         "eight", "nine", "marvin", "sheila", "forward", "backward", "bed",
         "bird", "cat", "dog", "down", "follow", "go", "happy", "house",
         "learn", "no", "yes", "off", "on", "right", "left", "stop", "tree",
         "up", "visual", "wow"
     ]
Exemplo n.º 4
0
 def test_download_and_decompress_data(self):
     data_version = "0.02"
     filepath = get_dataset_filepath(data_version=data_version)
     if os.path.exists(filepath):
         self.assertTrue(True)  # skip
     else:  # this will run in Travis
         download_dataset(data_version=data_version)
         self.assertTrue(os.path.exists(filepath))
         decompress_dataset(data_version=data_version)
         self.assertLess(
             10,
             len(
                 os.listdir(
                     get_training_data_path(data_version=data_version))))
Exemplo n.º 5
0
 def test_get_dataset_filepath(self):
     path = get_dataset_filepath(data_version="unit_testing")
     self.assertTrue(os.path.exists(os.path.split(path)[0]))
     self.assertTrue(path.endswith(".tar.gz"))
Exemplo n.º 6
0
    alias = f"{task}_m-{model_alias}_d-{data_version}"
    assert task in available_tasks
    known_commands = commands[task][:]
    include_unknown = unknown_class_addition[task]

    n_jobs = multiprocessing.cpu_count()
    n_epochs = json.load(open(experiment_settings_filepath))["n_epochs"]
    batch_size = json.load(open(experiment_settings_filepath))["batch_size"]
    run_in_gpu = json.load(open(experiment_settings_filepath))["run_in_gpu"]
    n_augmentations = json.load(open(experiment_settings_filepath))["n_augmentations"]
    bn_momentum = json.load(open(experiment_settings_filepath))["bn_momentum"]
    weight_decay = json.load(open(experiment_settings_filepath))["weight_decay"]


    # Download and decompress the data if necessary
    if not os.path.exists(get_dataset_filepath(data_version)):
        download_dataset(data_version)
        decompress_dataset(data_version)

    # Generate data augmentations if required
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.random.manual_seed(random_seed)

    if n_augmentations > 0:
        train_files, _, _ = get_list_of_wav_paths(data_version=data_version)
        for i in range(n_augmentations):
            if len(os.listdir(get_augmented_data_folder(data_version, str(i)))) == 0:
                print("Generating augmentation no. {}".format(i))
                batch_augment_files(data_version=data_version, list_of_files=train_files, folder_name=str(i),
                                    n_jobs=n_jobs)