def loadDataForServer(): training_dataset = NSynth( "/local/sandbox/nsynth/nsynth-train", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family", "instrument_source"]) validation_dataset = NSynth( "/local/sandbox/nsynth/nsynth-valid", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist synth_lead instrument categorical_field_list=["instrument_family", "instrument_source"]) testing_dataset = NSynth( "/local/sandbox/nsynth/nsynth-test", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family", "instrument_source"]) # create dataLoaders train_loader = torch.utils.data.DataLoader(dataset=training_dataset, batch_size=BATCH_SIZE) validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=1) test_loader = torch.utils.data.DataLoader(dataset=testing_dataset, batch_size=1) print('Finished preparing data loaders for server testing') return train_loader, validation_loader, test_loader
def main(): # Subsampling subsample_transform = transforms.Lambda(lambda x: x[::4]) toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max) trainData = NSynth( "/local/sandbox/nsynth/nsynth-train", transform=transforms.Compose([subsample_transform, toFloat]), blacklist_pattern=["synth_lead"], categorical_field_list=["instrument_family", "instrument_source"]) validation_dataset = NSynth( "/local/sandbox/nsynth/nsynth-valid", transform=transforms.Compose([subsample_transform, toFloat]), blacklist_pattern=["synth_lead"], categorical_field_list=["instrument_family", "instrument_source"]) test_dataset = NSynth( "/local/sandbox/nsynth/nsynth-test", transform=transforms.Compose([subsample_transform, toFloat]), blacklist_pattern=["synth_lead"], categorical_field_list=["instrument_family", "instrument_source"]) print(len(trainData)) train_loader = data.DataLoader(trainData, batch_size=64, shuffle=True) valid_loader = data.DataLoader(validation_dataset, batch_size=64, shuffle=True) test_loader = data.DataLoader(test_dataset, batch_size=64, shuffle=True) train(train_loader, valid_loader, len(trainData), len(validation_dataset)) test(test_loader)
def read_dataset(datasetLocation, INPUT_SIZE): """ Method to read the data sets from the given dataset location. :param datasetLocation: Base location where training, test and validation datasets are located :return: training loader, test loader and validation loader data sets """ # Pre-processing transform to get the dataset in the range of [-1, 1] and reshape the input to multiple bins maxIntValue = np.iinfo(np.int16).max # toFloat = transforms.Lambda(lambda x: ((x / maxIntValue) + 1) / 2) toFloat = transforms.Compose([ transforms.Lambda(lambda x: x / maxIntValue), transforms.Lambda(lambda x: x.reshape(-1, INPUT_SIZE)) ]) # Reading the train dataset trainFolderName = "nsynth-train" train_dataset = NSynth( datasetLocation + trainFolderName, transform=toFloat, blacklist_pattern=["synth_lead"], categorical_field_list=["instrument_family", "instrument_source"]) train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True) # Read the test dataset testFolderName = "nsynth-test" test_dataset = NSynth( datasetLocation + testFolderName, transform=toFloat, blacklist_pattern=["synth_lead"], categorical_field_list=["instrument_family", "instrument_source"]) test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=True) # Read the validation dataset validationFolderName = "nsynth-valid" validation_dataset = NSynth( datasetLocation + validationFolderName, transform=toFloat, blacklist_pattern=["synth_lead"], categorical_field_list=["instrument_family", "instrument_source"]) validation_loader = data.DataLoader(validation_dataset, batch_size=32, shuffle=True) # return the dataset loader return train_loader, test_loader, validation_loader
def testProcess(toFloat): testSet = NSynth( "/local/sandbox/nsynth/nsynth-test", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family_str"]) return testSet
def validProcess(toFloat): validSet = NSynth( "/local/sandbox/nsynth/nsynth-valid", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family_str"]) input_dimension = 0 return validSet, input_dimension
def dataLoaders(batch_size): ''' This function loads the data for training and testing of the model :param batch_size: the size of mini batches :return: dataloader objects of train, validation and testing data ''' # audio samples are loaded as an int16 numpy array # rescale intensity range as float [-1, 1] print("--- Loading data ---") toFloat = transforms.Lambda(lambda x: (x / np.iinfo( np.int16).max) + 1) # Added +1 for solving negative number problem # normalizeValue = transforms.Normalize(torch.mean(x),torch.std(x)) # use instrument_family and instrument_source as classification targets dataset_Train = NSynth( "/local/sandbox/nsynth/nsynth-train", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family", "instrument_source"]) loader_Train = data.DataLoader(dataset_Train, batch_size=batch_size, shuffle=True) dataset_Valid = NSynth( "/local/sandbox/nsynth/nsynth-valid", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family", "instrument_source"]) loader_Valid = data.DataLoader(dataset_Valid, batch_size=batch_size, shuffle=True) dataset_Test = NSynth( "/local/sandbox/nsynth/nsynth-test", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family", "instrument_source"]) loader_Test = data.DataLoader(dataset_Test, batch_size=batch_size, shuffle=False) return loader_Train, loader_Valid, loader_Test
def loadDataForLocal(want_to_test): training_dataset = NSynth( "./nsynth-test", transform=toFloat, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family", "instrument_source"]) # Splitting training dataset into training and validation and testing num_train = len(training_dataset) indices = list(range(num_train)) splitVal = int(np.floor(VALIDATION_SPLIT * num_train)) splitTest = int(np.floor(TESTING_SPLIT * num_train)) + splitVal # Make sure you get same numbers every time when rand_seed = 0 np.random.seed(seed=RAND_SEED) # Shuffle the indices np.random.shuffle(indices) # Get training set index and validation set index validation_idx, test_idx, train_idx = indices[splitVal:], \ indices[splitVal:splitTest], \ indices[splitTest:] # create samplers train_sampler = data_utils.SubsetRandomSampler(train_idx) test_sampler = data_utils.SubsetRandomSampler(test_idx) validation_sampler = data_utils.SubsetRandomSampler(validation_idx) # create dataLoaders train_loader = torch.utils.data.DataLoader(dataset=training_dataset, batch_size=BATCH_SIZE, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset=training_dataset, batch_size=1, sampler=validation_sampler) test_loader = torch.utils.data.DataLoader(dataset=training_dataset, batch_size=1, sampler=test_sampler) if want_to_test == '1': test_loader = torch.utils.data.DataLoader(dataset=training_dataset, batch_size=1) print('Finished preparing data loaders for local testing') return train_loader, validation_loader, test_loader
def loader_function(path): #audio files are loaded as an int16 numpy array #rescaling intensity of float[-1,1] selectColmns = transforms.Lambda(lambda x: x[0:16000]) toFloat = transforms.Lambda(lambda x: x / np.iinfo(np.int16).max + 1) dataset = NSynth( path, transform=transforms.Compose([selectColmns, toFloat]), blacklist_pattern=["synth_lead"], #blaclkist synth_lead instrument categorical_field_list=["instrument_family", "instrument_source"]) question2(dataset[0][0], "1-D_audio_waveform1.png", "1-D audio waveform1") return dataset, torch_data.DataLoader(dataset, batch_size=64, shuffle=True, num_workers=16)
from torch.utils.data import DataLoader import matplotlib.pyplot as plt import sklearn.metrics as sk import wave import sys import pandas as pd transform = transforms.Compose([ transforms.Lambda(lambda x: x / np.iinfo(np.int16).max), transforms.Lambda(lambda x: torch.from_numpy(x).float()), transforms.Lambda(lambda x: x[0:16000]) ]) train_dataset = NSynth( "/local/sandbox/nsynth/nsynth-train", transform=transform, blacklist_pattern=["synth_lead"], # blacklist string istrument categorical_field_list=["instrument_family", "instrument_source"]) print(type(train_dataset)) train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True) test_dataset = NSynth( "/local/sandbox/nsynth/nsynth-test", transform=transform, blacklist_pattern=["synth_lead"], # blacklist string instrument categorical_field_list=["instrument_family", "instrument_source"]) test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=True) loss_validation = [] loss_train = [] valid_dataset = NSynth(