def find_best_parameter(self, ratio, hdn_nstates_list, niter, nrepetitions, data, labels):
        """
        parameter search over number of hidden states
        @param hdn_nstates_list: list of number of hidden states to try, e.g. range(2,10)
        @param ratio: ratio of the dataset split for train, e.g. 0.7
        @param niter: number of iterations for hmm model to perform, e.g. 10
        @param nrepetitions: number of repeated runs for the same hidden state, but for the different split, e.g. 5
        """
        accuracy_results = {}
        for nstates in hdn_nstates_list:
            print 'state' + str(nstates)
            accuracy_results[nstates] = []
            for run in range(nrepetitions):
                print 'repetition' + ' ' + str(run) 
                
                # make new random split  
                train_data, train_labels, val_data, val_labels = DataHandler.split(ratio, data, labels)
                
                # train a model on this split
                model_pos, model_neg = self.train(nstates, niter, train_data, train_labels)
                
                # test the model and store the results
                acc = self.test(model_pos, model_neg, val_data, val_labels)
                print nstates, acc
                accuracy_results[nstates].append(acc)

        with open("../../Results/crossvalidated_accuracy.txt","w") as f:
            for nstates in hdn_nstates_list:
                print nstates, np.mean(accuracy_results[nstates]), np.std(accuracy_results[nstates])
                f.write("%d, %s\n" % (nstates, ", ".join([str(x) for x in accuracy_results[nstates]])))
Пример #2
0
    def find_best_parameter(self, ratio, hdn_nstates_list, niter, nrepetitions,
                            data, labels):
        """
        parameter search over number of hidden states
        @param hdn_nstates_list: list of number of hidden states to try, e.g. range(2,10)
        @param ratio: ratio of the dataset split for train, e.g. 0.7
        @param niter: number of iterations for hmm model to perform, e.g. 10
        @param nrepetitions: number of repeated runs for the same hidden state, but for the different split, e.g. 5
        """
        accuracy_results = {}
        for nstates in hdn_nstates_list:
            print 'state' + str(nstates)
            accuracy_results[nstates] = []
            for run in range(nrepetitions):
                print 'repetition' + ' ' + str(run)

                # make new random split
                train_data, train_labels, val_data, val_labels = DataHandler.split(
                    ratio, data, labels)

                # train a model on this split
                model_pos, model_neg = self.train(nstates, niter, train_data,
                                                  train_labels)

                # test the model and store the results
                acc = self.test(model_pos, model_neg, val_data, val_labels)
                print nstates, acc
                accuracy_results[nstates].append(acc)

        with open("../../Results/crossvalidated_accuracy.txt", "w") as f:
            for nstates in hdn_nstates_list:
                print nstates, np.mean(accuracy_results[nstates]), np.std(
                    accuracy_results[nstates])
                f.write("%d, %s\n" % (nstates, ", ".join(
                    [str(x) for x in accuracy_results[nstates]])))
Пример #3
0
def ecoglstm(lstmsize, fcsize, dropout, optim):

    print("Reading data...")
    data = np.load(
        "/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy")
    labels = np.load(
        "/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy")
    train_data, train_labels, val_data, val_labels = DataHandler.split(
        0.7, data, labels)

    lstmcl = LSTMDiscriminative(lstmsize[0], fcsize[0], dropout[0], optim[0],
                                10, 128)
    model = lstmcl.train(train_data, train_labels)
    result = -lstmcl.test(model, val_data, val_labels)

    print('Result = %f' % result)
    return result
# parameters
NSTATES = 2
NITERS = 2

# load the data
print "Reading data..."
train_data = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_data.npy")
train_labels = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_labels.npy")
test_data = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_data.npy")
test_labels = np.load("/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_labels.npy")

# split the training data into two halves
#   fh stands for first half
#   sh stands for second half
print "Splitting data in two halves..."
fh_data, fh_labels, sh_data, sh_labels = DataHandler.split(0.5, train_data, train_labels)

# train HMM on first 50% of the training set
print "Training HMM classifier..."
hmmcl = HMMClassifier()
model_pos, model_neg = hmmcl.train(NSTATES, NITERS, fh_data, fh_labels)

# feed second 50% of the training set into the HMM to obtain
# pos/neg ratio for every sequence in the second half of the training set
print "Extracting ratios based on the HMM model..."
sh_ratios = hmmcl.pos_neg_ratios(model_pos, model_neg, sh_data)
test_ratios = hmmcl.pos_neg_ratios(model_pos, model_neg, test_data)

# apply fourier transform on the second 50% of the training set
print "Fouriering the second half of the dataset..."
fourier_sh_data = Fourier.data_to_fourier(sh_data)
# load the data
print "Reading data..."
train_data = np.load(
    "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_data.npy")
train_labels = np.load(
    "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/train_labels.npy")
test_data = np.load(
    "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_data.npy")
test_labels = np.load(
    "/storage/hpc_anna/GMiC/Data/ECoG/preprocessed/test_labels.npy")

# split the training data into two halves
#   fh stands for first half
#   sh stands for second half
print "Splitting data in two halves..."
fh_data, fh_labels, sh_data, sh_labels = DataHandler.split(
    0.5, train_data, train_labels)

# train HMM on first 50% of the training set
print "Training HMM classifier..."
hmmcl = HMMClassifier()
model_pos, model_neg = hmmcl.train(NSTATES, NITERS, fh_data, fh_labels)

# feed second 50% of the training set into the HMM to obtain
# pos/neg ratio for every sequence in the second half of the training set
print "Extracting ratios based on the HMM model..."
sh_ratios = hmmcl.pos_neg_ratios(model_pos, model_neg, sh_data)
test_ratios = hmmcl.pos_neg_ratios(model_pos, model_neg, test_data)

# apply fourier transform on the second 50% of the training set
print "Fouriering the second half of the dataset..."
fourier_sh_data = Fourier.data_to_fourier(sh_data)