Пример #1
0
def trainGMM(gen_train_data, spoof_train_data, mixtures, gPath, sPath, init):
    #init='kmeans' or 'random'
    makeDirectory(gPath)
    makeDirectory(sPath)

    for component in mixtures:
        print(
            'Training GMM for genuine using %d GMM with diagonal cov and kmeans initialization'
            % component)
        gmmGen = mixture.GaussianMixture(
            n_components=component,
            covariance_type='diag',
            max_iter=100,
            init_params=init,
            verbose=2)  #using maximum 10 EM iterations dint help
        gmmGen.fit(gen_train_data)

        # Train GMM for Spoof data
        print(
            'Training GMM for spoof using %d GMM with diagonal cov and kmeans initialization'
            % component)
        gmmSpoof = mixture.GaussianMixture(n_components=component,
                                           covariance_type='diag',
                                           init_params='kmeans',
                                           verbose=2)
        gmmSpoof.fit(spoof_train_data)

        gModelName = 'genuine_model_' + str(component) + '.p'
        sModelName = 'spoof_model_' + str(component) + '.p'

        # Save the models using pickle
        pickle.dump(gmmGen, open(gPath + gModelName, 'wb'))
        pickle.dump(gmmSpoof, open(sPath + sModelName, 'wb'))
Пример #2
0
def test_GMM_Models(mixtures, gmmModelPath, scoreSavePath, test_feature_file,pca,dim):
    genModelPath = gmmModelPath+'/genuine/'
    spoofModelPath = gmmModelPath+'/spoof/'    
    makeDirectory(scoreSavePath)
    
    test_data = loadFeatures(test_feature_file)
    if len(test_data) != 3014 or len(test_data) != 1710 or len(test_data)!=13306:
        test_data=get_data_in_matrix(test_data)
    
    #apply pca
    if pca != None:
        test_data = pca.transform(test_data)[:,0:dim]     
        
    scoreTestFile(mixtures, test_data, genModelPath, spoofModelPath, scoreSavePath)
Пример #3
0
def train_all_SVMs(penalty_list, featPath, savePath, folderName, train_on):
    for penalty in penalty_list:

        #1. Training on Train features
        print('Training SVM ...')
        savePath = savePath + folderName + str(penalty)

        makeDirectory(savePath)
        svm, scaler = train_svm(featPath, savePath, penalty, train_on)

        # Testing
        print('Testing SVM ...')
        test_svm(svm, scaler, featPath + '/train/features.npz',
                 savePath + '/train_prediction.txt')
        test_svm(svm, scaler, featPath + '/dev/features.npz',
                 savePath + '/dev_prediction.txt')
        test_svm(svm, scaler, featPath + '/eval/features.npz',
                 savePath + '/eval_prediction.txt')
Пример #4
0
def main(argv):
    try:
        configFile = join(dirname(abspath(__file__)), 'config.ini')
        config = configparser.ConfigParser()
        config.read(configFile)

        pathEntity = config['PATHS']['pathEntity']
        pathSource = config['PATHS']['pathSource']
        primaryKeys = config['OTHERS']['primaryKeys'].split(",")
        outputDirectory = config['OTHERS']['outputDirectory']
        outputFileName = config['OTHERS']['outputFileName']

        start = datetime.now()
        print("STARTED on %s" % start)

        tableDict = getTablesWithPrimaryKey(pathEntity, primaryKeys).keys()
        tableList = list(tableDict)
        print("No. of tables identified: ", len(tableList))

        if not outputDirectory:
            outputDirectory = join(dirname(abspath(__file__)), "output")

        makeDirectory(outputDirectory)
        print("Output folder: %s" % outputDirectory)
        outputFileLog = join(outputDirectory, '%s.txt' % outputFileName)
        outputFileXls = join(outputDirectory, '%s.xlsx' % outputFileName)
        doValidate(tableList, pathSource, primaryKeys, outputFileLog,
                   outputFileXls)

        finish = datetime.now()
        print("ENDED on %s" % finish)
        print("Scan Duration: %s" % (finish - start))
    except Exception as ex:
        print('\nERROR FOUND!\n{}'.format(ex))
    finally:
        input("\nPress enter to exit!")
Пример #5
0
    mat = scipy.io.loadmat(matFile)
    data = mat['features']

    features = list()
    for matrix in data:
        features.append(matrix[0])

    # save the file
    np.savez_compressed(saveFile, features=features)
    #return np.asarray(new_data)


#featTypes=['IMFCC', 'LPCC', 'LFCC', 'RFCC', 'CQCC.60','MFCC'] #'SCMC'
featTypes = ['CQCC.60', 'MFCC']  #'SCMC'
base = '/homes/bc305/myphd/stage2/stage1_scripts/afterInterspeech/repeat/individual_systems/'
saveBase = '/homes/bc305/myphd/stage2/deeplearning.experiment1/features/'

for feat in featTypes:
    savePath = saveBase + feat
    makeDirectory(savePath)

    print('Converting training features..')
    train = base + feat + '/features/20ms/train.mat'
    convert_matlab_to_numpy(train, savePath + '/train')

    print('Converting dev featrues..')
    dev = base + feat + '/features/20ms/dev.mat'
    convert_matlab_to_numpy(dev, savePath + '/dev')

    #test=base+feat+ '/features/20ms/eval.mat'
    #convert_matlab_to_numpy(test, savePath+'/eval')
Пример #6
0
    def run_new(self):
        val_accs, test_accs = [], []

        makeDirectory('torch_saved/')
        save_path = 'torch_saved/{}'.format(self.p.name)

        if self.p.restore:
            self.load_model(save_path)
            print('Successfully Loaded previous model')

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        # Reinitialise model and optimizer for each fold
        self.model = self.addModel()
        self.optimizer = self.addOptimizer()

        dataset = self.data

        if self.p.dataset != "wechat":
            num_training = int(len(dataset) * 0.5)
            num_val = int(len(dataset) * 0.75) - num_training
            num_test = len(dataset) - (num_training + num_val)
        else:
            num_training = dataset.get_samples_num("train")
            num_val = dataset.get_samples_num("valid")
            num_test = dataset.get_samples_num("test")
        logger.info("num train %d, num valid %d, num test %d", num_training,
                    num_val, num_test)
        # training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test])
        train_dataset = dataset[:num_training]
        val_dataset = dataset[num_training:(num_training + num_val)]
        test_dataset = dataset[(num_training + num_val):]

        if 'adj' in train_dataset[0]:
            train_loader = DenseLoader(train_dataset,
                                       self.p.batch_size,
                                       shuffle=True)
            val_loader = DenseLoader(val_dataset,
                                     self.p.batch_size,
                                     shuffle=False)
            test_loader = DenseLoader(test_dataset,
                                      self.p.batch_size,
                                      shuffle=False)
        else:
            train_loader = DataLoader(train_dataset,
                                      self.p.batch_size,
                                      shuffle=True)
            val_loader = DataLoader(val_dataset,
                                    self.p.batch_size,
                                    shuffle=False)
            test_loader = DataLoader(test_dataset,
                                     self.p.batch_size,
                                     shuffle=False)

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        best_val_acc, best_test_acc = 0.0, 0.0
        best_thr = None

        val_metrics, val_loss, thr = self.evaluate(val_loader,
                                                   -1,
                                                   return_best_thr=True)
        test_metrics, test_loss, _ = self.evaluate(test_loader, -1, thr=0.5)

        for epoch in range(1, self.p.max_epochs + 1):
            train_loss = self.run_epoch(train_loader, epoch)
            val_metrics, val_loss, thr = self.evaluate(val_loader,
                                                       epoch,
                                                       return_best_thr=True)
            test_metrics, test_loss, _ = self.evaluate(test_loader,
                                                       epoch,
                                                       thr=thr)
            val_auc = val_metrics[-1]

            # lr_decay
            if epoch % self.p.lr_decay_step == 0:
                for param_group in self.optimizer.param_groups:
                    param_group[
                        'lr'] = self.p.lr_decay_factor * param_group['lr']
            # save model for best val score
            if val_auc > best_val_acc:
                best_val_acc = val_auc
                best_thr = thr
                self.save_model(save_path)
                logger.info("************BEST UNTIL NOW**************")

            print('---[INFO]---{:03d}: Loss: {:.4f}\tVal Acc: {:.4f}'.format(
                epoch, train_loss, best_val_acc))
            print('---[INFO]---{:03d}: Test metrics'.format(epoch),
                  test_metrics)

        # load best model for testing
        self.load_model(save_path)
        test_metrics, test_loss, _ = self.evaluate(test_loader,
                                                   self.p.max_epochs + 1,
                                                   thr=thr)
        print('---[INFO]---Finally: Test metrics', test_metrics)
Пример #7
0
    def run(self):
        val_accs, test_accs = [], []

        makeDirectory('torch_saved/')
        save_path = 'torch_saved/{}'.format(self.p.name)

        if self.p.restore:
            self.load_model(save_path)
            print('Successfully Loaded previous model')

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        # iterate over 10 folds
        for fold, (train_idx, test_idx,
                   val_idx) in enumerate(zip(*self.k_fold())):

            # Reinitialise model and optimizer for each fold
            self.model = self.addModel()
            self.optimizer = self.addOptimizer()

            train_dataset = self.data[train_idx]
            test_dataset = self.data[test_idx]
            val_dataset = self.data[val_idx]

            if 'adj' in train_dataset[0]:
                train_loader = DenseLoader(train_dataset,
                                           self.p.batch_size,
                                           shuffle=True)
                val_loader = DenseLoader(val_dataset,
                                         self.p.batch_size,
                                         shuffle=False)
                test_loader = DenseLoader(test_dataset,
                                          self.p.batch_size,
                                          shuffle=False)
            else:
                train_loader = DataLoader(train_dataset,
                                          self.p.batch_size,
                                          shuffle=True)
                val_loader = DataLoader(val_dataset,
                                        self.p.batch_size,
                                        shuffle=False)
                test_loader = DataLoader(test_dataset,
                                         self.p.batch_size,
                                         shuffle=False)

            if torch.cuda.is_available():
                torch.cuda.synchronize()

            best_val_acc, best_test_acc = 0.0, 0.0

            for epoch in range(1, self.p.max_epochs + 1):
                train_loss = self.run_epoch(train_loader)
                val_acc = self.predict(val_loader)

                # lr_decay
                if epoch % self.p.lr_decay_step == 0:
                    for param_group in self.optimizer.param_groups:
                        param_group[
                            'lr'] = self.p.lr_decay_factor * param_group['lr']
                # save model for best val score
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    self.save_model(save_path)

                print(
                    '---[INFO]---{:02d}/{:03d}: Loss: {:.4f}\tVal Acc: {:.4f}'.
                    format(fold + 1, epoch, train_loss, best_val_acc))

            # load best model for testing
            self.load_model(save_path)
            best_test_acc = self.predict(test_loader)

            if torch.cuda.is_available():
                torch.cuda.synchronize()

            val_accs.append(best_val_acc)
            test_accs.append(best_test_acc)

        val_acc_mean = np.round(np.mean(val_accs), 4)
        test_acc_mean = np.round(np.mean(test_accs), 4)

        print('---[INFO]---Val Acc: {:.4f}, Test Accuracy: {:.3f}'.format(
            val_acc_mean, test_acc_mean))

        return val_acc_mean, test_acc_mean
Пример #8
0
def get_weights_biases():
    model_path = '../models/model1/using_1sec_cnnModel1_global_Normalization_dropout_0.1_0.4/'
    save_path = '../model_parameters/pindrop_model1_keep0.1_0.2_0.4/'
    makeDirectory(save_path)
    access_learned_parameters(model_path, save_path)