def predict(model, device, test_loader): # evaluate the model model.eval() pred_results = np.asarray([]) #test_pred = torch.LongTensor() #print('Testing..') loghub.logMsg(msg="{}: Predicting...".format(__name__), otherlogs=["test_acc"]) # Use no gradient backpropagations (as we are just testing) with torch.no_grad(): # for every testing batch for i_batch, sample_batched in enumerate(test_loader): # for every batch, extract data (16, 1, 40, 500) and label (16, 1) data, invalid_label = sample_batched # Map the variables to the current device (CPU or GPU) data = data.to(device, dtype=torch.float) # get the predictions output = model(data) # get the predictions pred = output.argmax(dim=1, keepdim=True) # collate the predicted results pred = np.squeeze(pred.cpu().numpy()) pred_results = np.concatenate((pred_results, pred)) #test_pred = torch.cat((test_pred, pred), dim=0) return pred_results
def load_all_data(self, include_test=False, with_labels=True): """ load all data, extract the features and save as filename """ # Read the training & testing data from the csv file #print("Loading all data...") loghub.logMsg(msg="{}: Loading all data...".format(__name__), otherlogs=["test_acc"]) self.train_data_list, self.train_label_list, self.train_label_indices = self.__read_DCASE_csv_file(self.train_csv_filepath, "train") if with_labels: self.test_data_list, self.test_label_list, self.test_label_indices = self.__read_DCASE_csv_file(self.test_csv_filepath, "test") else: self.test_data_list, self.test_label_list, self.test_label_indices = self.__read_DCASE_csv_file(self.test_csv_filepath, "evaluate") self.audio_files = self.train_data_list + self.test_data_list self.audio_labels = self.train_label_list + self.test_label_list self.audio_label_indices = self.train_label_indices + self.test_label_indices self.data_type = [0] * len(self.train_data_list) + [1] * len(self.test_data_list) self.base = len(self.train_data_list) if include_test: self.train_data_list = self.train_data_list + self.test_data_list self.train_label_list = self.train_label_list + self.test_label_list self.train_label_indices = self.train_label_indices + self.test_label_indices self.data_type = np.asarray(self.data_type) #print("All data loaded.") loghub.logMsg(msg="{}: All data loaded.".format(__name__), otherlogs=["test_acc"])
def test(args, model, device, test_loader, data_type): # evaluate the model model.eval() # init test loss test_loss = 0 correct = 0 pred_results = np.asarray([]) #print('Testing..') loghub.logMsg(msg="{}: Testing...".format(__name__), otherlogs=["test_acc"]) # Use no gradient backpropagations (as we are just testing) with torch.no_grad(): # for every testing batch for i_batch, sample_batched in enumerate(test_loader): # for every batch, extract data (16, 1, 40, 500) and label (16, 1) data, label = sample_batched # Map the variables to the current device (CPU or GPU) data = data.to(device, dtype=torch.float) label = label.to(device, dtype=torch.long) # get the predictions output = model(data) # accumulate the batchwise loss test_loss += F.nll_loss(output, label, reduction='sum').item() # get the predictions pred = output.argmax(dim=1, keepdim=True) # accumulate the correct predictions correct += pred.eq(label.view_as(pred)).sum().item() # collate the predicted results pred = np.squeeze(pred.cpu().numpy()) pred_results = np.concatenate((pred_results, pred)) # normalize the test loss with the number of test samples test_loss /= len(test_loader.dataset) # print the results #print('Model prediction on ' + data_type + ': Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( # test_loss, correct, len(test_loader.dataset), # 100. * correct / len(test_loader.dataset))) loghub.logMsg(msg="{}: Model prediction on {}: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format( __name__, data_type, test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)), otherlogs=["test_acc"]) return pred_results
def apply_k_fold(self, K=5): """ K (int): K folds Split train data into K folds and returns an array of an array of indices - Fold #1 (train_indices, test_indices) - .... - Fold #K (train_indices, test_indices) """ # check that data have been loaded if not self.train_data_list: #print("Data have not been loaded. Running data_manager.load_all_data()...") loghub.logMsg(msg="{}: Data have not been loaded. Running data_manager.load_all_data()...".format(__name__), otherlogs=["test_acc"], level="warning") self.load_all_data() # Initialize array kfolds_arr = [] for i in range(K): kfolds_arr.append([]) # axis 0 = folds # K FOLDS fold_counter = 0 for i in range(len(self.train_data_list)): kfolds_arr[fold_counter].append(i) fold_counter = (fold_counter + 1) % K # Generate the cross validation array kfolds = [] # axis 0 = folds # For each folds for i in range(K): # Initialize the array test_indices = [] train_indices = [] # let the fold index be the test indices test_indices = kfolds_arr[i] # combine the rest to be the train indices for j in range(K): if i == j: continue train_indices += kfolds_arr[j] kfolds.append((train_indices, test_indices)) return kfolds
def get_data_index_from_map(self, idx, data_type): """ All data are loaded into a single main file in prepare_data(). This is to get the index of the data in the main file based on self.train_idx_map or self.test_idx_map data_type (string): two types ["train" or "test"] """ if (not self.train_idx_map) or (not self.test_idx_map): # Mapping is empty return idx else: # Mapping is not empty if data_type == "train": return self.train_idx_map[idx] elif data_type == "test": return self.test_idx_map[idx] else: #print("Error! Invalid data type") loghub.logMsg(msg="{}: Error! Invalid data type".format(__name__), otherlogs=["test_acc"], level="error") return
def NormalizeData(train_labels_dir, root_dir, dcase_dataset): """ Compute the mean/std which will be used to normalized the dataset """ # concatenate the mel spectrograms in time-dimension, this variable accumulates the spectrograms melConcat = np.asarray([]) # flag for the first element flag = 0 # generate a random permutation, because it's fun. there's no specific reason for that. rand = np.random.permutation(len(dcase_dataset)) # for all the training samples for i in range(len(dcase_dataset)): # extract the sample sample = dcase_dataset[rand[i]] data, label = sample # print because we like to see it working #print('NORMALIZATION (FEATURE SCALING) : ' + str(i) + ' - data shape: ' + str(data.shape) + ', label: ' + str(label) + ', current accumulation size: ' + str(melConcat.shape)) loghub.logMsg( msg= "{}: NORMALIZATION (FEATURE SCALING) : {} - data shape: {}, label: {}, current accumulation size: {}" .format(__name__, str(i), str(data.shape), str(label), str(melConcat.shape)), level="info") if flag == 0: # get the data and init melConcat for the first time melConcat = data flag = 1 else: # concatenate spectrograms from second iteration melConcat = np.concatenate((melConcat, data), axis=2) # extract std and mean std = np.std(melConcat, axis=2) mean = np.mean(melConcat, axis=2) return mean, std
def prepare_test_data(self, test_csv="test_dataset.csv"): """ This is used when testing model. Instead of preparing both train/test csv in prepare_data(). This function only prepares the test.csv """ # Prepare csv file path test_filepath = os.path.join(self.root_dir, test_csv) # Extract data for test.csv test_csv_data = [] for i in range(self.get_test_data_size()): # Get dataset dataset = [] dataset.append(self.test_data_list[i]) test_csv_data.append(dataset) # Write into test csv file util.write_to_csv_file(test_csv_data, test_filepath) #print("Test Data Labels generated in %s (test)" % test_filepath) loghub.logMsg(msg="{}: Test Data Labels generated in {} (test)".format(__name__, test_filepath), otherlogs=["test_acc"]) return test_filepath
def train(args, model, device, train_loader, optimizer, epoch): model.train() # training module for batch_idx, sample_batched in enumerate(train_loader): # for every batch, extract data (16, 1, 40, 500) and label (16, 1) data, label = sample_batched # Map the variables to the current device (CPU or GPU) data = data.to(device, dtype=torch.float) label = label.to(device, dtype=torch.long) # set initial gradients to zero : https://discuss.pytorch.org/t/why-do-we-need-to-set-the-gradients-manually-to-zero-in-pytorch/4903/9 optimizer.zero_grad() # pass the data into the model output = model(data) # get the loss using the predictions and the label loss = F.nll_loss(output, label) # backpropagate the losses loss.backward() # update the model parameters : https://discuss.pytorch.org/t/how-are-optimizer-step-and-loss-backward-related/7350 optimizer.step() # Printing the results if batch_idx % args.log_interval == 0: #print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( # epoch, batch_idx * len(data), len(train_loader.dataset), # 100. * batch_idx / len(train_loader), loss.item())) loghub.logMsg(msg="{}: Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( __name__, epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item()), otherlogs=["test_acc"])
def build_stack_model(): """ Stacking (Meta Ensembling) - Ensemble Technique to combine multiple models to generate a new model Referenced from http://blog.kaggle.com/2016/12/27/a-kagglers-guide-to-model-stacking-in-practice/ Referenced from https://towardsdatascience.com/how-to-train-an-image-classifier-in-pytorch-and-use-it-to-perform-basic-inference-on-single-images-99465a1e9bf5 """ # 0. Split training & test data (should be the same as the one used to train the models) ############################## # MOVED TO GLOBAL VARIABLES """ train_labels_dir = '../Dataset/train/train_labels.csv' test_labels_dir = '../Dataset/test/test_labels.csv' root_dir = '../Dataset' processed_root_dir = 'processed_data' """ # Load all the dataset data_manager = DatasetManager(train_labels_dir, test_labels_dir, root_dir) data_manager.load_all_data(include_test=True) # 1. Partition Training Data into K folds ############################################################################# kfolds = data_manager.apply_k_fold(K_FOLD) # 2. Create 2 dataset (train_meta & test_meta) with n empty columsn (M1, M2, ... Mn) where n = number of models ############################## # use k-fold of train data to fill up train_meta = np.empty( (data_manager.get_train_data_size(), len(save_models))) # (n x m) where n = audio data, m = model # use all of train data to fill up test_meta = np.empty( (data_manager.get_test_data_size(), len(save_models))) # (n x m) where n = audio data, m = model # 3. Apply K-fold cross validation to fill up empty columns (M1, M2, .... Mn) of train_meta with prediction results for each folds ############################## #print("Getting Prediction Results to fill in train_meta") loghub.logMsg( msg="{}: Getting Prediction Results to fill in train_meta".format( __name__), otherlogs=["test_acc"]) fold = 0 # fold counter for train, validate in kfolds: # train, validate is a list of index #print("Cross Validation Fold #%i..." % (fold+1)) loghub.logMsg(msg="{}: Cross Validation Fold #{}...".format( __name__, (fold + 1)), otherlogs=["test_acc"]) # For each model for i in range(len(save_models)): #print("Fold #%i for model (%s)..." % ((fold+1), save_models[i])) loghub.logMsg(msg="{}: Fold #{} for model ({})...".format( __name__, (fold + 1), save_models[i]), otherlogs=["test_acc"]) # Get feature index fid = feat_indices[i] # Load/Preprocess Feature for model preprocessed_features_filepath = os.path.join( processed_root_dir, preprocessed_features[i]) data_manager.load_feature(fid, preprocessed_features_filepath) # Prepare data train_csv, test_csv = data_manager.prepare_data( train_indices=train, test_indices=validate, train_csv=temp_train_csv_file, test_csv=temp_test_csv_file, train_only=True) # Load Normalized data norm_std = os.path.join(processed_root_dir, fold_norm_stds[i][fold]) norm_mean = os.path.join(processed_root_dir, fold_norm_means[i][fold]) # Build Model & get prediction results model, predictions = bm.buildCNNModel( train_csv=train_csv, test_csv=test_csv, norm_std=norm_std, norm_mean=norm_mean, data_manager=data_manager, num_of_channel=num_of_channels[i], save_model=False) # Fill up the train_meta with predictions results of test.csv for j in range(len(validate)): v_idx = validate[j] train_meta[v_idx][i] = predictions[j] # data x model #print("End of Fold #%i." % (fold+1)) loghub.logMsg(msg="{}: End of Fold #{}".format(__name__, (fold + 1)), otherlogs=["test_acc"]) fold += 1 #print("Train_meta generated successfully.") loghub.logMsg( msg="{}: Train_meta generated successfully.".format(__name__), otherlogs=["test_acc"]) # 4. Fit each model to the full training dataset & make predictions on the test dataset, store into test_meta ############################## #print("Getting Prediction Results to fill in test_meta...") loghub.logMsg( msg="{}: Getting Prediction Results to fill in test_meta...".format( __name__), otherlogs=["test_acc"]) # For each model for i in range(len(save_models)): # Get feature index fid = feat_indices[i] # Load/Preprocess Feature for model preprocessed_features_filepath = os.path.join(processed_root_dir, preprocessed_features[i]) data_manager.load_feature(fid, preprocessed_features_filepath) # Prepare data train_csv, test_csv = data_manager.prepare_data( train_csv=temp_train_csv_file, test_csv=temp_test_csv_file) # Get Normalized preprocessed data file norm_std = os.path.join(processed_root_dir, norm_stds[i]) norm_mean = os.path.join(processed_root_dir, norm_means[i]) # Get save model model_name = os.path.join(processed_root_dir, save_models[i]) # Build Model & get prediction results model, predictions = bm.buildCNNModel( train_csv=train_csv, test_csv=test_csv, norm_std=norm_std, norm_mean=norm_mean, data_manager=data_manager, num_of_channel=num_of_channels[i], saved_model_name=model_name, save_model=True) # Fill up the train_meta with predictions results of test.csv for j in range(data_manager.get_test_data_size()): test_meta[j][i] = predictions[j] # data x model #print("Test_meta generated successfully.") loghub.logMsg(msg="{}: Test_meta generated successfully.".format(__name__), otherlogs=["test_acc"]) # 5. Fit (stacking model S) to train_meta, using (M1, M2, ... Mn) as features. ############################################################ # 6. Use the stacked model S to make final predictions on test_meta ############################################################ # get the training/testing label train_meta_labels = np.asarray(data_manager.train_label_indices) test_meta_labels = np.asarray(data_manager.test_label_indices) # Fit and Train classifier Model (step 5 & 6) classifier = ClassifierModel(train_meta, train_meta_labels, test_meta, test_meta_labels) predicts = classifier.run_decision_tree_classification() # Evaluate precision, recall, f1_measure = classifier.evaluate_prediction(predicts) correct, total = classifier.get_accuracy(predicts) percentage = 100 * correct / total #print("Stacked Model Prediction:\nAccuracy: {}/{} ({:.0f}%)\n\tPrecision: {}\n\tRecall: {}\n\tF1 Measure:{}".format( # correct, total, percentage, precision, recall, f1_measure)) loghub.logMsg( msg= "{}: Stacked Model Prediction:\nAccuracy: {}/{} ({:.0f}%)\n\tPrecision: {}\n\tRecall: {}\n\tF1 Measure:{}" .format(__name__, correct, total, percentage, precision, recall, f1_measure), otherlogs=["test_acc"]) # 7. Save the ensemble model ######################################################################################################################## stacked_model_filepath = os.path.join(processed_root_dir, stacked_model_name) classifier.save_model(stacked_model_filepath)
def predict_with_stack_model(with_labels=True): """ load previously saved model to predict labels on test with_labels (bool): Indicator to tell us if there is labels in test data. - evaluation data has no labels - test data has labels """ # 1. Load the Testing Data ####################################################################################### # MOVE TO GLOBAL VARIABLES """ train_labels_dir = '../Dataset/train/train_labels.csv' test_labels_dir = '../Dataset/test/test_labels.csv' eval_labels_dir = "../Dataset/evaluate/evaluate_labels.csv" root_dir = '../Dataset' processed_root_dir = 'processed_data' """ # Load all the dataset if with_labels: # Test Datset (with labels) data_manager = DatasetManager(train_labels_dir, test_labels_dir, root_dir) # Load all the dataset data_manager.load_all_data(with_labels=True) else: # Evaluation Datset (with no labels) data_manager = DatasetManager("", eval_labels_dir, root_dir) # Load all the dataset data_manager.load_all_data(with_labels=False) # Initialize the input_vector for stacked model input_vect = np.empty( (data_manager.get_test_data_size(), len(save_models))) # (n x m) where n = audio data, m = model # 2. Get Prediction Results from each Model ####################################################################### # For each model for i in range(len(save_models)): # Get feature index fid = feat_indices[i] # Preprocess Feature for model if with_labels: # Test Datset (with labels) preprocessed_features_filepath = os.path.join( processed_root_dir, preprocessed_features[i]) else: # Evaluation Datset (with no labels) preprocessed_features_filepath = os.path.join( processed_root_dir, preprocessed_features_test[i]) data_manager.load_feature( fid, preprocessed_features_filepath ) # THIS HAVE TO BE REMOVED (BECAUSE WHEN PREDICTING, we won't have preprocess thea udio file as we don't know what it is. leave it balnk) # Prepare data if with_labels: # Test Datset (with labels) train_csv, test_csv = data_manager.prepare_data( train_csv=temp_train_csv_file, test_csv=temp_test_csv_file) else: # Evaluation Datset (with no labels) test_csv = data_manager.prepare_test_data( test_csv=temp_test_csv_file) # Get Normalized preprocessed data file norm_std = os.path.join(processed_root_dir, norm_stds[i]) norm_mean = os.path.join(processed_root_dir, norm_means[i]) # Get saved model path saved_model_path = os.path.join(processed_root_dir, save_models[i]) # Test the saved model & get prediction results if with_labels: # Test Data set (with labels) predictions = bm.testCNNModel(saved_model_path=saved_model_path, test_csv=test_csv, norm_std=norm_std, norm_mean=norm_mean, data_manager=data_manager, num_of_channel=num_of_channels[i], with_labels=with_labels) else: # Evaluation Dataset (with no labels) predictions = bm.testCNNModel(saved_model_path=saved_model_path, test_csv=test_csv, norm_std=norm_std, norm_mean=norm_mean, data_manager=data_manager, num_of_channel=num_of_channels[i], with_labels=with_labels) # Fill up the input_vector with predictions results from model for j in range(data_manager.get_test_data_size()): input_vect[j][i] = predictions[j] # 3. Get Prediction Results from Stack Model based on input_vector #################################################### # Load the stacked model stacked_model_filepath = os.path.join(processed_root_dir, stacked_model_name) stacked_em = pickle.load(open(stacked_model_filepath, 'rb')) # Get Prediction Results predicts = stacked_em.predict(input_vect) # Print prediction Accuracy if with_labels: # Test Dataset (with labels) correct, total = util.compare_list_elements( predicts, data_manager.test_label_indices) percentage = 100 * correct / total #print("Stacked Model Prediction Accuracy: {}/{} ({:.0f}%)".format(correct, total, percentage)) loghub.logMsg( msg="{}: Stacked Model Prediction Accuracy: {}/{} ({:.0f}%)". format(__name__, correct, total, percentage), otherlogs=["test_acc"]) #np.set_printoptions(precision=2) # Plot non-normalized confusion matrix #mk.plot_confusion_matrix(data_manager.test_label_indices, predicts, classes=[ # 'airport', 'bus', 'metro', 'metro_station', 'park', 'public_square', 'shopping_mall', # 'street_pedestrian', 'street_traffic', 'tram' # ], title='Confusion matrix') #plt.show() else: # Evaluation Datset (with no labels) # Store the prediction results dcase_eval_data = DCASEDataset(eval_labels_dir, root_dir, data_manager) results = [] headers = ["filename", "label", "label_index"] for i in range(len(dcase_eval_data) - 1): result = [] # Get prediction results for each audio file result.append(dcase_eval_data.datalist[ i + 1]) # first line is header...(so add 1 to skip it) pred_idx = int(predicts[i]) result.append(dcase_eval_data.default_labels[pred_idx]) result.append(pred_idx) # Add to list results.append(result) # Write to csv file util.write_to_csv_file(results, predict_results_csv, headers)
parser.add_argument("--em", help="Ensemble Mode", choices=['build', "test", 'predict']) parser.add_argument("--ename", help="Stacked Model name (eg. stackedModel.sav)") process_arguments(parser) # 2. Set up logging loghub.init_main_logger(os.path.join("log_files", main_log)) loghub.setup_logger("test_acc", os.path.join("log_files", test_accu_log)) # 3. Run Ensemble Learning if ensemble_mode == 0: #print("Building Stacked Ensemble Model (Meta Ensembling)...") loghub.logMsg( msg="{}: Building Stacked Ensemble Model (Meta Ensembling)...". format(__name__), otherlogs=["test_acc"]) build_stack_model() elif ensemble_mode == 1: #print("Testing Stacked Ensemble Model...") loghub.logMsg( msg="{}: Testing Stacked Ensemble Model...".format(__name__), otherlogs=["test_acc"]) predict_with_stack_model(with_labels=True) elif ensemble_mode == 2: #print("Predicting with Stacked Ensemble Model...") loghub.logMsg( msg="{}: Predicting with Stacked Ensemble Model...".format( __name__), otherlogs=["test_acc"]) predict_with_stack_model(with_labels=False)
def buildCNNModel(train_csv, test_csv, norm_std, norm_mean, data_manager, num_of_channel, split_valid=False, saved_model_name="", test_batch_size=16, batch_size=16, epochs=200, lr=0.01, no_cuda=False, seed=1, log_interval=10, save_model=True): """ Build and Train CNN model Required Parameters: train_csv (string): file that contains all train data labels. test_csv (string): file that contains all test data labels. norm_std (string): file that contains the normalized std norm_mean (string): file that contains the normalized mean data_manager (DataManager): contains all the loaded train/test dataset num_of_channel (int): number of channels for input features split_valid (bool): True = split train data into train/validate, False = use test data as validate data saved_model (string): name to use when saving Optional Parameters batch_size (int): input batch size for training test_batch_size (int): input batch size of testing epochs (int): number of epochs to train lr (float): learning rate no_cuda (bool): disables CUDA training seed (int): random seed log_interval (int): how many batches to wait before logging training status save_model (bool): for saving the current model """ # Step 0: Setting up Training Settings ################################################## # Training settings use_cuda = not no_cuda and torch.cuda.is_available() torch.manual_seed(seed) device = torch.device("cuda" if use_cuda else "cpu") args = { "batch_size": batch_size, "test_batch_size": test_batch_size, "epochs": epochs, "lr": lr, "no_cuda": no_cuda, "seed": seed, "log_interval": log_interval, "save_model": save_model } args = Namespace(**args) # Step 1a: Preparing Data - Extract data ########################################################### # init the train directories train_labels_dir = train_csv test_labels_dir = test_csv root_dir = data_manager.root_dir # Step 1b: Preparing Data - Transform Data ######################################################### # Compute Normalization Score if os.path.isfile(norm_std) and os.path.isfile(norm_mean): #print("Loading Normalization Data...") loghub.logMsg(msg="{}: Loading Normalization Data...".format(__name__), otherlogs=["test_acc"]) # load the npy files mean = np.load(norm_mean) std = np.load(norm_std) else: # Run the normalization and save mean/std if not already computed #print('DATA NORMALIZATION : ACCUMULATING THE DATA') loghub.logMsg( msg="{}: DATA NORMALIZATION : ACCUMULATING THE DATA".format( __name__), otherlogs=["test_acc"]) # Load dataset dcase_dataset = DCASEDataset(train_labels_dir, root_dir, data_manager, True) mean, std = NormalizeData(train_labels_dir, root_dir, dcase_dataset) # Save the model np.save(norm_mean, mean) np.save(norm_std, std) #print('DATA NORMALIZATION COMPLETED') loghub.logMsg(msg="{}: DATA NORMALIZATION COMPLETED".format(__name__), otherlogs=["test_acc"]) # Convert to Torch Tensors mean = torch.from_numpy(mean) std = torch.from_numpy(std) # convert to torch variables mean = torch.reshape( mean, [num_of_channel, 40, 1] ) # numpy broadcast (CxHxW). last dimension is 1 -> which will be automatically broadcasted to 500 (time) std = torch.reshape(std, [num_of_channel, 40, 1]) # init the data_transform data_transform = transforms.Compose( [cnn.ToTensor(), cnn.Normalize(mean, std)]) #print("Preparing Data...") loghub.logMsg(msg="{}: Preparing Data...".format(__name__), otherlogs=["test_acc"]) # init the datasets dcase_dataset = DCASEDataset(csv_file=train_labels_dir, root_dir=root_dir, data_manager=data_manager, is_train_data=True, transform=data_transform) dcase_dataset_test = DCASEDataset(csv_file=test_labels_dir, root_dir=root_dir, data_manager=data_manager, is_train_data=False, transform=data_transform) # Step 1c: Preparing Data - Load Data ############################################################### # set number of cpu workers in parallel kwargs = {'num_workers': 16, 'pin_memory': True} if use_cuda else {} # get the training and testing data loader train_loader = torch.utils.data.DataLoader(dcase_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) valid_loader = torch.utils.data.DataLoader(dcase_dataset_test, batch_size=args.test_batch_size, shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(dcase_dataset_test, batch_size=args.test_batch_size, shuffle=False, **kwargs) # Update data loader if split_valid: # Split Train data into train/validate data valid_ratio = 0.2 num_train_data = len(dcase_dataset) indices = list(range(num_train_data)) split = int(np.floor(valid_ratio * num_train_data)) np.random.shuffle(indices) train_idx, valid_idx = indices[split:], indices[:split] # Initialize Random Sampler train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # get the training and testing data loader train_loader = torch.utils.data.DataLoader(dcase_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs) valid_loader = torch.utils.data.DataLoader( dcase_dataset, batch_size=args.test_batch_size, sampler=valid_sampler, **kwargs) # Step 2: Build Model ############################################################### # init the model model = BaselineASC(num_of_channel).to(device) # init the optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) # Step 3: Train Model ############################################################### #print('MODEL TRAINING START') loghub.logMsg(msg="{}: MODEL TRAINING START".format(__name__), otherlogs=["test_acc"]) # train the model for epoch in range(1, args.epochs + 1): cnn.train(args, model, device, train_loader, optimizer, epoch) #print("MODEL: %s" % saved_model_name) loghub.logMsg(msg="{}: EPOCH {} - MODEL: {}".format( __name__, epoch, saved_model_name), otherlogs=["test_acc"]) cnn.test(args, model, device, valid_loader, "Validation Data") #cnn.test(args, model, device, train_loader, 'Training Data') #cnn.test(args, model, device, test_loader, 'Testing Data') #print('MODEL TRAINING END') loghub.logMsg(msg="{}: MODEL TRAINING END".format(__name__), otherlogs=["test_acc"]) # Step 4. Test Model ############################################################### #print("Model TESTING START") loghub.logMsg(msg="{}: MODEL TESTING START".format(__name__), otherlogs=["test_acc"]) # test the model if split_valid: predictions = cnn.test(args, model, device, valid_loader, "Validation Data") else: predictions = cnn.test(args, model, device, test_loader, "Testing Data") #print("Model TESTING END") loghub.logMsg(msg="{}: MODEL TESTING END".format(__name__), otherlogs=["test_acc"]) # Step 5: Save Model ################################################################ # save the model if (args.save_model): torch.save(model.state_dict(), saved_model_name) return model, predictions
def testCNNModel(saved_model_path, test_csv, norm_std, norm_mean, data_manager, num_of_channel, with_labels, test_batch_size=16, no_cuda=False, seed=1): """ Test the trained CNN model Required Parameters: saved_model_path (BaselineASC): saved CNN model path test_csv (string): file that contains all test data labels. norm_std (string): file that contains the normalized std norm_mean (string): file that contains the normalized mean data_manager (DataManager): contains all the loaded train/test dataset num_of_channel (int): number of channels for input features with_labels (bool): Indicator if test_data has labels Optional Parameters test_batch_size (int): input batch size of testing no_cuda (bool): disables CUDA training seed (int): random seed """ # Step 0: Setting up Training Settings ################################################## # Training settings use_cuda = not no_cuda and torch.cuda.is_available() torch.manual_seed(seed) device = torch.device("cuda" if use_cuda else "cpu") args = { "test_batch_size": test_batch_size, "no_cuda": no_cuda, "seed": seed, } args = Namespace(**args) # Step 1a: Preparing Data - Extract data ########################################################### # init the train directories test_labels_dir = test_csv root_dir = data_manager.root_dir # Step 1b: Preparing Data - Transform Data ######################################################### # Load normalization score #print("Loading Normalization Data...") loghub.logMsg(msg="{}: Loading Normalization Data...".format(__name__), otherlogs=["test_acc"]) mean = np.load(norm_mean) std = np.load(norm_std) #print('Normalization Data Loaded.') loghub.logMsg(msg="{}: Normalization Data Loaded.".format(__name__), otherlogs=["test_acc"]) # Convert to Torch Tensors mean = torch.from_numpy(mean) std = torch.from_numpy(std) # convert to torch variables mean = torch.reshape( mean, [num_of_channel, 40, 1] ) # numpy broadcast (CxHxW). last dimension is 1 -> which will be automatically broadcasted to 500 (time) std = torch.reshape(std, [num_of_channel, 40, 1]) # init the data_transform data_transform = transforms.Compose( [cnn.ToTensor(), cnn.Normalize(mean, std)]) #print("Preparing Data...") loghub.logMsg(msg="{}: Preparing Data...".format(__name__), otherlogs=["test_acc"]) # init the datasets dcase_dataset_test = DCASEDataset(csv_file=test_labels_dir, root_dir=root_dir, data_manager=data_manager, is_train_data=False, transform=data_transform) # Step 1c: Preparing Data - Load Data ############################################################### # set number of cpu workers in parallel kwargs = {'num_workers': 16, 'pin_memory': True} if use_cuda else {} # get the testing data loader test_loader = torch.utils.data.DataLoader(dcase_dataset_test, batch_size=args.test_batch_size, shuffle=False, **kwargs) # Step 2: Test Model ############################################################### #print("Model TESTING START...") loghub.logMsg(msg="{}: Model TESTING START...".format(__name__), otherlogs=["test_acc"]) # load the model model = BaselineASC(num_of_channel).to(device) model.load_state_dict(torch.load(saved_model_path)) # test the model if with_labels: predictions = cnn.test(args, model, device, test_loader, "Testing Data") else: # Evaluation Datset (with no labels) predictions = cnn.predict(model, device, test_loader) #print("Model TESTING END.") loghub.logMsg(msg="{}: Model TESTING END.".format(__name__), otherlogs=["test_acc"]) return predictions
def load_feature(self, feature_index, filename): """ filename (string): name of the file to save the extracted features eg feature.npy feature_index (int): index to indicate which feature to extract Load or Extract the features for all audio files. """ # check that data have been loaded if not self.audio_files: #print("Data have not been loaded. Running data_manager.load_all_data()...") loghub.logMsg(msg="{}: Data have not been loaded. Running data_manager.load_all_data()...".format(__name__), otherlogs=["test_acc"], level="warning") self.load_all_data() # Extract features #print("Loading/Extracting feature %i from audio files..." % feature_index) loghub.logMsg(msg="{}: Loading/Extracting feature {} from audio files...".format(__name__, feature_index), otherlogs=["test_acc"]) if os.path.isfile(filename): # file already exists self.audio_data = np.load(filename) else: # file does not exists (extract spectrogram of feature and save the data) mel_specs = [] specA = specB = None # Load preprocessed data if exists if feature_index == 3: if os.path.isfile("processed_data/left_spec.npy") and os.path.isfile("processed_data/right_spec.npy"): specA = np.load("processed_data/left_spec.npy") specB = np.load("processed_data/right_spec.npy") elif feature_index == 6: if os.path.isfile("processed_data/LR_spec.npy") and os.path.isfile("processed_data/diff_spec.npy"): specA = np.load("processed_data/LR_spec.npy") specB = np.load("processed_data/diff_spec.npy") elif feature_index == 8: if os.path.isfile("processed_data/hpss_spec.npy") and os.path.isfile("processed_data/mono_spec.npy"): specA = np.load("processed_data/hpss_spec.npy") specB = np.load("processed_data/mono_spec.npy") elif feature_index == 15: if os.path.isfile("processed_data/mfcc_left_spec.npy") and os.path.isfile("processed_data/mfcc_right_spec.npy"): specA = np.load("processed_data/mfcc_left_spec.npy") specB = np.load("processed_data/mfcc_right_spec.npy") elif feature_index == 16: if os.path.isfile("processed_data/mfcc_LR_spec.npy") and os.path.isfile("processed_data/mfcc_diff_spec.npy"): specA = np.load("processed_data/mfcc_LR_spec.npy") specB = np.load("processed_data/mfcc_diff_spec.npy") elif feature_index == 17: if os.path.isfile("processed_data/hpssmono_spec.npy") and os.path.isfile("processed_data/LR_spec.npy"): specA = np.load("processed_data/hpssmono_spec.npy") specB = np.load("processed_data/LR_spec.npy") elif feature_index == 18: if os.path.isfile("processed_data/mono_spec.npy") and os.path.isfile("processed_data/LRD_spec.npy"): specA = np.load("processed_data/mono_spec.npy") specB = np.load("processed_data/LRD_spec.npy") elif feature_index == 19: if os.path.isfile("processed_data/mfcc_mono_spec.npy") and os.path.isfile("processed_data/mfcc_LRD_spec.npy"): specA = np.load("processed_data/mfcc_mono_spec.npy") specB = np.load("processed_data/mfcc_LRD_spec.npy") # Extract features from audio file for i in range(len(self.audio_files)): wav_name = os.path.join(self.root_dir, self.audio_files[i]) if feature_index == 0: # Extracting Mel Spectrogram for Mono Channel (1 channel) mel_specs.append(ap.extract_mel_spectrogram_for_mono_channel(wav_name)) elif feature_index == 1: # Extracting Mel Spectrogram for Left Channel (1 channel) mel_specs.append(ap.extract_mel_spectrogram_for_left_channel(wav_name)) elif feature_index == 2: # Extracting Mel Spectrogram for Right Channel (1 channel) mel_specs.append(ap.extract_mel_spectrogram_for_right_channel(wav_name)) elif feature_index == 3: # Extracting Mel Spectrogram for left & right Channel (2 channel) if specA != None and specB != None: mel_specs.append(ap.combine_left_and_right_mel_spectrogram(wav_name, specA[i], specB[i])) else: mel_specs.append(ap.combine_left_and_right_mel_spectrogram(wav_name)) elif feature_index == 4: # Extracting Mel Spectrogram for difference of left & right Channel (1 channel) mel_specs.append(ap.extract_mel_spectrogram_for_difference_of_left_right_channel(wav_name)) elif feature_index == 5: # Extracting Mel Spectrogram for sum of left & right Channel (1 channel) mel_specs.append(ap.extract_mel_spectrogram_for_sum_of_left_right_channel(wav_name)) elif feature_index == 6: # Extracting Mel Spectrogram of left & right & leftrightdiff Channel (3 channel) if specA != None and specB != None: mel_specs.append(ap.combine_left_right_with_LRdifference(wav_name, specA[i], specB[i])) else: mel_specs.append(ap.combine_left_right_with_LRdifference(wav_name)) elif feature_index == 7: # Extracting Mel Spectrogram of mono Channel with hpss applied (2 channel) mel_specs.append(ap.extract_mel_spectrogram_for_hpss(wav_name)) elif feature_index == 8: # Extracting Mel Spectrogram of mono Channel & hpss (3 channel) if specA != None and specB != None: mel_specs.append(ap.combine_hpss_and_mono_mel_spectrogram(wav_name, specA[i], specB[i])) else: mel_specs.append(ap.combine_hpss_and_mono_mel_spectrogram(wav_name)) elif feature_index == 9: # Extracting Chroma feature (1 channel) mel_specs.append(ap.extract_chroma_for_mono_channel(wav_name)) elif feature_index == 10: # Extracting Zero Crossing feature (1 channel) mel_specs.append(ap.extract_zero_crossing_for_mono_channel(wav_name)) elif feature_index == 11: # Extracting MFCC feature from mono channel (1 channel) mel_specs.append(ap.extract_mfcc_for_mono_channel(wav_name)) elif feature_index == 12: # Extracting MFCC feature from left channel (1 channel) mel_specs.append(ap.extract_mfcc_spectrogram_for_left_channel(wav_name)) elif feature_index == 13: # Extracting MFCC feature from right channel (1 channel) mel_specs.append(ap.extract_mfcc_spectrogram_for_right_channel(wav_name)) elif feature_index == 14: # Extracting MFCC feature from difference of left & right channel (1 channel) mel_specs.append(ap.extract_mfcc_spectrogram_for_difference_of_left_right_channel(wav_name)) elif feature_index == 15: # Extracting MFCC feature from left & right & leftrightdiff channel (3 channel) if specA != None and specB != None: mel_specs.append(ap.combine_mfcc_left_and_right(wav_name, specA[i], specB[i])) else: mel_specs.append(ap.combine_mfcc_left_and_right(wav_name)) elif feature_index == 16: # Extracting MFCC feature from left & right & leftrightdiff channel (3 channel) if specA != None and specB != None: mel_specs.append(ap.combine_mfcc_left_right_with_LRdifference(wav_name, specA[i], specB[i])) else: mel_specs.append(ap.combine_mfcc_left_right_with_LRdifference(wav_name)) elif feature_index == 17: # Combine left mel + right mel + hpss + mono mel if specA != None and specB != None: mel_specs.append(ap.extract_early_fusion_left_right_3f(wav_name, specA[i], specB[i])) else: mel_specs.append(ap.extract_early_fusion_left_right_3f(wav_name)) elif feature_index == 18: # Combine left mel + right mel + diff mel + mono mel if specA != None and specB != None: mel_specs.append(ap.extract_early_fusion_left_right_diff_mono(wav_name, specA[i], specB[i])) else: mel_specs.append(ap.extract_early_fusion_left_right_diff_mono(wav_name)) elif feature_index == 19: # Combine left mfcc + right mfcc + diff mfcc + mono mfcc if specA != None and specB != None: mel_specs.append(ap.extract_early_fusion_MFCC_left_right_diff_mono(wav_name, specA[i], specB[i])) else: mel_specs.append(ap.extract_early_fusion_MFCC_left_right_diff_mono(wav_name)) if filename: np.save(filename, mel_specs) mel_specs = np.asarray(mel_specs) self.audio_data = mel_specs #print("Feature %i extracted." % feature_index) loghub.logMsg(msg="{}: Feature {} extracted.".format(__name__, feature_index), otherlogs=["test_acc"])
def prepare_data(self, train_indices=None, test_indices=None, train_only=False, train_csv="train_dataset.csv", test_csv="test_dataset.csv"): """ train_indices (array of index): indices of all training audio files test_indices (array of index): indicies of all testing audio files train_only (bool): indicator on whether train_indices and test_indices are all from training data train_csv (string): filename of newly generated train dataset test_csv (string): filename of newly generated test dataset Prepare data for training/testing model. As we loaded all the features and store them into a single data file, this function is to generate a train.csv and test.csv which will be used to build the model. The index of audio files in train.csv/test.csv will be map to the index of the main data file. Purpose is to improve efficiency by not recomputing/extracting all the features in the audio files whenever the train/test data changes """ #print("Generating train.csv and test.csv for building model...") loghub.logMsg(msg="{}: Generating train.csv and test.csv for building model...".format(__name__), otherlogs=["test_acc"]) self.train_idx_map = [] self.test_idx_map = [] if train_indices == None and test_indices == None: # using the original indices order train_indices = np.arange(self.get_train_data_size()) # Train indices = all of train data test_indices = np.arange(self.get_test_data_size()) # Test indices = all of test data # Extract data for train.csv train_csv_data = [] for i in range(len(train_indices)): # get index index = train_indices[i] # Get Dataset dataset = [] dataset.append(self.train_data_list[index]) dataset.append(self.train_label_list[index]) dataset.append(self.train_label_indices[index]) train_csv_data.append(dataset) # Map index to main data list self.train_idx_map.append(index) # Extract data for test.csv test_csv_data = [] base = self.base # main data = train + test (hence index of test starts after train) for i in range(len(test_indices)): # get index index = test_indices[i] # check if test_indices is from train or test data if train_only: # test_indices is a validation set (from training data) # Get dataset dataset = [] dataset.append(self.train_data_list[index]) dataset.append(self.train_label_list[index]) dataset.append(self.train_label_indices[index]) test_csv_data.append(dataset) # Map index to main data list self.test_idx_map.append(index) # index = index of self.audio else: # test indices is a test set (from testing data) # Get dataset dataset = [] dataset.append(self.test_data_list[index]) dataset.append(self.test_label_list[index]) dataset.append(self.test_label_indices[index]) test_csv_data.append(dataset) # Map index to main data list self.test_idx_map.append(base + index) # base+index = index of self.audio # Prepare csv file path train_filepath = os.path.join(self.root_dir, train_csv) test_filepath = os.path.join(self.root_dir, test_csv) # Write into train csv file util.write_to_csv_file(train_csv_data, train_filepath) # Write into test csv file util.write_to_csv_file(test_csv_data, test_filepath) #print("Data labels generated in %s (train) and %s (test)" % (train_filepath, test_filepath)) loghub.logMsg(msg="{}: Data labels generated in {} (train) and {} (test)".format(__name__, train_filepath, test_filepath), otherlogs=["test_acc"]) return train_filepath, test_filepath
def main(): # Initialize Timer timer = StopWatch() timer.startTimer() # Step 0: Setting up Training Settings ################################################## # Training settings parser = argparse.ArgumentParser( description='PyTorch Baseline code for ASC Group Project (CS4347)') parser.add_argument('--batch-size', type=int, default=16, metavar='N', help='input batch size for training (default: 16)') parser.add_argument('--test-batch-size', type=int, default=16, metavar='N', help='input batch size for testing (default: 16)') parser.add_argument('--epochs', type=int, default=200, metavar='N', help='number of epochs to train (default: 200)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") # Step 1a: Preparing Data - Extract data ########################################################### # init the train and test directories train_labels_dir = '../Dataset/train/train_labels.csv' test_labels_dir = '../Dataset/test/test_labels.csv' root_dir = '../Dataset' # Load all the dataset data_manager = DatasetManager(train_labels_dir, test_labels_dir, root_dir) data_manager.load_all_data(include_test=False) # Load/Preprocess Feature for model data_manager.load_feature(feature_index, preprocessed_features) # Prepare data train_labels_dir, test_labels_dir = data_manager.prepare_data( train_csv=temp_train_csv_file, test_csv=temp_test_csv_file) # Step 1b: Preparing Data - Transform Data ######################################################### # Compute Normalization score if os.path.isfile(preprocessed_norm_mean_file) and os.path.isfile( preprocessed_norm_std_file): # get the mean and std. If Normalized already, just load the npy files and comment the NormalizeData() function above mean = np.load(preprocessed_norm_mean_file) std = np.load(preprocessed_norm_std_file) else: # If not, run the normalization and save the mean/std #print('DATA NORMALIZATION : ACCUMULATING THE DATA') loghub.logMsg( msg="{}: DATA NORMALIZATION : ACCUMULATING THE DATA".format( __name__), otherlogs=["test_acc"]) # load the datase dcase_dataset = DCASEDataset(train_labels_dir, root_dir, data_manager, True) mean, std = NormalizeData(train_labels_dir, root_dir, dcase_dataset) np.save(preprocessed_norm_mean_file, mean) np.save(preprocessed_norm_std_file, std) #print('DATA NORMALIZATION COMPLETED') loghub.logMsg(msg="{}: DATA NORMALIZATION COMPLETED".format(__name__), otherlogs=["test_acc"]) # Convert to Torch Tensors mean = torch.from_numpy(mean) std = torch.from_numpy(std) # convert to torch variables mean = torch.reshape( mean, [num_of_channel, 40, 1] ) # numpy broadcast (CxHxW). last dimension is 1 -> which will be automatically broadcasted to 500 (time) std = torch.reshape(std, [num_of_channel, 40, 1]) # init the data_transform data_transform = transforms.Compose( [cnn.ToTensor(), cnn.Normalize(mean, std)]) # init the datasets dcase_dataset = DCASEDataset(csv_file=train_labels_dir, root_dir=root_dir, data_manager=data_manager, is_train_data=True, transform=data_transform) dcase_dataset_test = DCASEDataset(csv_file=test_labels_dir, root_dir=root_dir, data_manager=data_manager, is_train_data=False, transform=data_transform) # Step 1c: Preparing Data - Load Data ############################################################### # set number of cpu workers in parallel kwargs = {'num_workers': 16, 'pin_memory': True} if use_cuda else {} # get the training and testing data loader train_loader = torch.utils.data.DataLoader(dcase_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(dcase_dataset_test, batch_size=args.test_batch_size, shuffle=False, **kwargs) # Step 2: Build Model ############################################################### # init the model model = BaselineASC(num_of_channel).to(device) # init the optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) # Step 3: Train Model ############################################################### #print('MODEL TRAINING START') loghub.logMsg(msg="{}: MODEL TRAINING START.".format(__name__), otherlogs=["test_acc"]) # train the model for epoch in range(1, args.epochs + 1): cnn.train(args, model, device, train_loader, optimizer, epoch) cnn.test(args, model, device, train_loader, 'Training Data') cnn.test(args, model, device, test_loader, 'Test Data') #print('MODEL TRAINING END') loghub.logMsg(msg="{}: MODEL TRAINING END.".format(__name__), otherlogs=["test_acc"]) # Step 4: Save Model ################################################################ # save the model if (args.save_model): torch.save(model.state_dict(), saved_model) # stop timer timer.stopTimer() timer.printElapsedTime()