def trainGMM(gen_train_data, spoof_train_data, mixtures, gPath, sPath, init): #init='kmeans' or 'random' makeDirectory(gPath) makeDirectory(sPath) for component in mixtures: print( 'Training GMM for genuine using %d GMM with diagonal cov and kmeans initialization' % component) gmmGen = mixture.GaussianMixture( n_components=component, covariance_type='diag', max_iter=100, init_params=init, verbose=2) #using maximum 10 EM iterations dint help gmmGen.fit(gen_train_data) # Train GMM for Spoof data print( 'Training GMM for spoof using %d GMM with diagonal cov and kmeans initialization' % component) gmmSpoof = mixture.GaussianMixture(n_components=component, covariance_type='diag', init_params='kmeans', verbose=2) gmmSpoof.fit(spoof_train_data) gModelName = 'genuine_model_' + str(component) + '.p' sModelName = 'spoof_model_' + str(component) + '.p' # Save the models using pickle pickle.dump(gmmGen, open(gPath + gModelName, 'wb')) pickle.dump(gmmSpoof, open(sPath + sModelName, 'wb'))
def test_GMM_Models(mixtures, gmmModelPath, scoreSavePath, test_feature_file,pca,dim): genModelPath = gmmModelPath+'/genuine/' spoofModelPath = gmmModelPath+'/spoof/' makeDirectory(scoreSavePath) test_data = loadFeatures(test_feature_file) if len(test_data) != 3014 or len(test_data) != 1710 or len(test_data)!=13306: test_data=get_data_in_matrix(test_data) #apply pca if pca != None: test_data = pca.transform(test_data)[:,0:dim] scoreTestFile(mixtures, test_data, genModelPath, spoofModelPath, scoreSavePath)
def train_all_SVMs(penalty_list, featPath, savePath, folderName, train_on): for penalty in penalty_list: #1. Training on Train features print('Training SVM ...') savePath = savePath + folderName + str(penalty) makeDirectory(savePath) svm, scaler = train_svm(featPath, savePath, penalty, train_on) # Testing print('Testing SVM ...') test_svm(svm, scaler, featPath + '/train/features.npz', savePath + '/train_prediction.txt') test_svm(svm, scaler, featPath + '/dev/features.npz', savePath + '/dev_prediction.txt') test_svm(svm, scaler, featPath + '/eval/features.npz', savePath + '/eval_prediction.txt')
def main(argv): try: configFile = join(dirname(abspath(__file__)), 'config.ini') config = configparser.ConfigParser() config.read(configFile) pathEntity = config['PATHS']['pathEntity'] pathSource = config['PATHS']['pathSource'] primaryKeys = config['OTHERS']['primaryKeys'].split(",") outputDirectory = config['OTHERS']['outputDirectory'] outputFileName = config['OTHERS']['outputFileName'] start = datetime.now() print("STARTED on %s" % start) tableDict = getTablesWithPrimaryKey(pathEntity, primaryKeys).keys() tableList = list(tableDict) print("No. of tables identified: ", len(tableList)) if not outputDirectory: outputDirectory = join(dirname(abspath(__file__)), "output") makeDirectory(outputDirectory) print("Output folder: %s" % outputDirectory) outputFileLog = join(outputDirectory, '%s.txt' % outputFileName) outputFileXls = join(outputDirectory, '%s.xlsx' % outputFileName) doValidate(tableList, pathSource, primaryKeys, outputFileLog, outputFileXls) finish = datetime.now() print("ENDED on %s" % finish) print("Scan Duration: %s" % (finish - start)) except Exception as ex: print('\nERROR FOUND!\n{}'.format(ex)) finally: input("\nPress enter to exit!")
mat = scipy.io.loadmat(matFile) data = mat['features'] features = list() for matrix in data: features.append(matrix[0]) # save the file np.savez_compressed(saveFile, features=features) #return np.asarray(new_data) #featTypes=['IMFCC', 'LPCC', 'LFCC', 'RFCC', 'CQCC.60','MFCC'] #'SCMC' featTypes = ['CQCC.60', 'MFCC'] #'SCMC' base = '/homes/bc305/myphd/stage2/stage1_scripts/afterInterspeech/repeat/individual_systems/' saveBase = '/homes/bc305/myphd/stage2/deeplearning.experiment1/features/' for feat in featTypes: savePath = saveBase + feat makeDirectory(savePath) print('Converting training features..') train = base + feat + '/features/20ms/train.mat' convert_matlab_to_numpy(train, savePath + '/train') print('Converting dev featrues..') dev = base + feat + '/features/20ms/dev.mat' convert_matlab_to_numpy(dev, savePath + '/dev') #test=base+feat+ '/features/20ms/eval.mat' #convert_matlab_to_numpy(test, savePath+'/eval')
def run_new(self): val_accs, test_accs = [], [] makeDirectory('torch_saved/') save_path = 'torch_saved/{}'.format(self.p.name) if self.p.restore: self.load_model(save_path) print('Successfully Loaded previous model') if torch.cuda.is_available(): torch.cuda.synchronize() # Reinitialise model and optimizer for each fold self.model = self.addModel() self.optimizer = self.addOptimizer() dataset = self.data if self.p.dataset != "wechat": num_training = int(len(dataset) * 0.5) num_val = int(len(dataset) * 0.75) - num_training num_test = len(dataset) - (num_training + num_val) else: num_training = dataset.get_samples_num("train") num_val = dataset.get_samples_num("valid") num_test = dataset.get_samples_num("test") logger.info("num train %d, num valid %d, num test %d", num_training, num_val, num_test) # training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test]) train_dataset = dataset[:num_training] val_dataset = dataset[num_training:(num_training + num_val)] test_dataset = dataset[(num_training + num_val):] if 'adj' in train_dataset[0]: train_loader = DenseLoader(train_dataset, self.p.batch_size, shuffle=True) val_loader = DenseLoader(val_dataset, self.p.batch_size, shuffle=False) test_loader = DenseLoader(test_dataset, self.p.batch_size, shuffle=False) else: train_loader = DataLoader(train_dataset, self.p.batch_size, shuffle=True) val_loader = DataLoader(val_dataset, self.p.batch_size, shuffle=False) test_loader = DataLoader(test_dataset, self.p.batch_size, shuffle=False) if torch.cuda.is_available(): torch.cuda.synchronize() best_val_acc, best_test_acc = 0.0, 0.0 best_thr = None val_metrics, val_loss, thr = self.evaluate(val_loader, -1, return_best_thr=True) test_metrics, test_loss, _ = self.evaluate(test_loader, -1, thr=0.5) for epoch in range(1, self.p.max_epochs + 1): train_loss = self.run_epoch(train_loader, epoch) val_metrics, val_loss, thr = self.evaluate(val_loader, epoch, return_best_thr=True) test_metrics, test_loss, _ = self.evaluate(test_loader, epoch, thr=thr) val_auc = val_metrics[-1] # lr_decay if epoch % self.p.lr_decay_step == 0: for param_group in self.optimizer.param_groups: param_group[ 'lr'] = self.p.lr_decay_factor * param_group['lr'] # save model for best val score if val_auc > best_val_acc: best_val_acc = val_auc best_thr = thr self.save_model(save_path) logger.info("************BEST UNTIL NOW**************") print('---[INFO]---{:03d}: Loss: {:.4f}\tVal Acc: {:.4f}'.format( epoch, train_loss, best_val_acc)) print('---[INFO]---{:03d}: Test metrics'.format(epoch), test_metrics) # load best model for testing self.load_model(save_path) test_metrics, test_loss, _ = self.evaluate(test_loader, self.p.max_epochs + 1, thr=thr) print('---[INFO]---Finally: Test metrics', test_metrics)
def run(self): val_accs, test_accs = [], [] makeDirectory('torch_saved/') save_path = 'torch_saved/{}'.format(self.p.name) if self.p.restore: self.load_model(save_path) print('Successfully Loaded previous model') if torch.cuda.is_available(): torch.cuda.synchronize() # iterate over 10 folds for fold, (train_idx, test_idx, val_idx) in enumerate(zip(*self.k_fold())): # Reinitialise model and optimizer for each fold self.model = self.addModel() self.optimizer = self.addOptimizer() train_dataset = self.data[train_idx] test_dataset = self.data[test_idx] val_dataset = self.data[val_idx] if 'adj' in train_dataset[0]: train_loader = DenseLoader(train_dataset, self.p.batch_size, shuffle=True) val_loader = DenseLoader(val_dataset, self.p.batch_size, shuffle=False) test_loader = DenseLoader(test_dataset, self.p.batch_size, shuffle=False) else: train_loader = DataLoader(train_dataset, self.p.batch_size, shuffle=True) val_loader = DataLoader(val_dataset, self.p.batch_size, shuffle=False) test_loader = DataLoader(test_dataset, self.p.batch_size, shuffle=False) if torch.cuda.is_available(): torch.cuda.synchronize() best_val_acc, best_test_acc = 0.0, 0.0 for epoch in range(1, self.p.max_epochs + 1): train_loss = self.run_epoch(train_loader) val_acc = self.predict(val_loader) # lr_decay if epoch % self.p.lr_decay_step == 0: for param_group in self.optimizer.param_groups: param_group[ 'lr'] = self.p.lr_decay_factor * param_group['lr'] # save model for best val score if val_acc > best_val_acc: best_val_acc = val_acc self.save_model(save_path) print( '---[INFO]---{:02d}/{:03d}: Loss: {:.4f}\tVal Acc: {:.4f}'. format(fold + 1, epoch, train_loss, best_val_acc)) # load best model for testing self.load_model(save_path) best_test_acc = self.predict(test_loader) if torch.cuda.is_available(): torch.cuda.synchronize() val_accs.append(best_val_acc) test_accs.append(best_test_acc) val_acc_mean = np.round(np.mean(val_accs), 4) test_acc_mean = np.round(np.mean(test_accs), 4) print('---[INFO]---Val Acc: {:.4f}, Test Accuracy: {:.3f}'.format( val_acc_mean, test_acc_mean)) return val_acc_mean, test_acc_mean
def get_weights_biases(): model_path = '../models/model1/using_1sec_cnnModel1_global_Normalization_dropout_0.1_0.4/' save_path = '../model_parameters/pindrop_model1_keep0.1_0.2_0.4/' makeDirectory(save_path) access_learned_parameters(model_path, save_path)