示例#1
0
def part(x, y):
    partition = partition_data(x, y, [0.8, 0.2])
    mli = lambda x: np.array(x)
    train_x = mli(partition[0][0])
    train_y = mli(partition[0][1])
    test_x = mli(partition[1][0])
    test_y = mli(partition[1][1])
    return train_x, train_y, test_x, test_y
示例#2
0
##GET RUN PARAMETER 
util.get_run_parameters(SB)

#READ INPUT FILES
reader.read_input(SB)		#READ INPUT FILE AND ADD INFO TO SB
reader.read_pot_file(SB)	#READ NN FILE AND ADD INFO TO SB 
reader.read_database(SB);	#READ DATABASES AND ADD INFO TO SB 

#WRITE POSCAR IF DESIRED 
if(SB['dump_poscars']):	util.dump_poscars(SB)() #MOVE TO read_data

#COMPUTE NEIGHBORLIST (NBL) AND LSP FOR ALL STRUCTURES
util.compute_all_nbls(SB)	
util.compute_all_lsps(SB)	
util.partition_data(SB)

if(SB['normalize_gi']):	
	raise Exception("ERROR: NORMALIZATION OF Gi IS CURRENTLY DISABLED")
	util.collect_all_lsp(SB) 	#MAKE A SINGLE MATRIX WITH ALL GI
	util.normalize_lsp(SB)


#-------------------------------------------------------------------------
#PART-2: TRAIN
#-------------------------------------------------------------------------

t		=	0;  
max_iter	=	SB['max_iter']
training_set	=	SB['training_set']
示例#3
0
import numpy as np
from sklearn.lda import LDA
from sklearn.metrics import accuracy_score

from util import DataReader, partition_data

fp = '../data/E-GEOD-48350/E-GEOD-48350-combined.csv'

x, y = DataReader(fp).get_data()
argmax = lambda x: x[0] if x[0] == 1 else x[1]
y = list(map(argmax, y))
partition = partition_data(x, y, [0.8, 0.2])

mli = lambda x: np.array(x).astype(float)
train_x = mli(partition[0][0])
train_y = mli(partition[0][1])
test_x = mli(partition[1][0])
test_y = mli(partition[1][1])

lda = LDA(n_components=2, shrinkage='auto', solver='lsqr')
lda.fit(train_x, train_y)
test_y_pred = lda.predict(test_x)
print(accuracy_score(test_y_pred, test_y))
示例#4
0
def mtl3(params):
    # Parameters
    n_hidden_1 = params['n_hidden_1']
    n_hidden_2 = params['n_hidden_1']
    n_hidden_3 = params['n_hidden_1']
    n_classes = 2
    fold = 10  # Cross validation
    learning_rate = 1
    dropout_prob = 0.5  # keep_prob = 1 - dropout_prob

    ad_data = '../data/48350-AD_6-HD.csv'
    hd_data = '../data/6-HD_48350-AD.csv'

    # Read and parse input data
    adx, ady = DataReader(ad_data).get_data()
    hdx, hdy = DataReader(hd_data).get_data()

    def inverse_argmax(l):
        m = 2
        for i in range(len(l)):
            hold = l[i]
            l[i] = [0] * m
            l[i][hold] = 1

    inverse_argmax(ady)
    inverse_argmax(hdy)

    ad_partition = partition_data(adx, ady, [0.8, 0.2])
    ad_batches = CrossValidation(ad_partition[0][0], ad_partition[0][1], 10)
    adx_test, ady_test = np.array(ad_partition[1][0]), np.array(
        ad_partition[1][1])

    hd_partition = partition_data(hdx, hdy, [0.8, 0.2])
    hd_batches = CrossValidation(hd_partition[0][0], hd_partition[0][1], 10)
    hdx_test, hdy_test = np.array(hd_partition[1][0]), np.array(
        hd_partition[1][1])

    n_input = len(adx_test[0])
    batch_size = 50

    # Tensorflow variables

    # Shared layer for MTL hard parameter sharing
    shared_layer_weights = tf.Variable(
        tf.random_normal([n_hidden_1, n_hidden_2]))
    shared_layer_biases = tf.Variable(tf.random_normal([n_hidden_2]))

    ad_weights = {
        'hidden_1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
        'hidden_2': shared_layer_weights,
        'hidden_3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
        'out': tf.Variable(tf.random_normal([n_hidden_3, n_classes]))
    }

    ad_biases = {
        'hidden_1': tf.Variable(tf.random_normal([n_hidden_1])),
        'hidden_2': shared_layer_biases,
        'hidden_3': tf.Variable(tf.random_normal([n_hidden_3])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    }

    hd_weights = {
        'hidden_1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
        'hidden_2': shared_layer_weights,
        'hidden_3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
        'out': tf.Variable(tf.random_normal([n_hidden_3, n_classes]))
    }

    hd_biases = {
        'hidden_1': tf.Variable(tf.random_normal([n_hidden_1])),
        'hidden_2': shared_layer_biases,
        'hidden_3': tf.Variable(tf.random_normal([n_hidden_3])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    }

    # Placeholders
    x = tf.placeholder(tf.float32, [None, n_input])
    y = tf.placeholder(tf.float32, [None, n_classes])
    dim = tf.placeholder(tf.int32)

    model = FFNN  #either MAX or FFNN

    ad_pred = model(x, ad_weights, ad_biases)
    ad_cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=ad_pred, labels=y))

    hd_pred = model(x, hd_weights, hd_biases)
    hd_cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=hd_pred, labels=y))

    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.1
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               50,
                                               0.5,
                                               staircase=False)

    ad_optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(ad_cost)
    hd_optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(hd_cost)

    ad_correct_pred = tf.equal(tf.argmax(ad_pred, 1), tf.argmax(y, 1))
    hd_correct_pred = tf.equal(tf.argmax(hd_pred, 1), tf.argmax(y, 1))
    ad_accuracy = tf.reduce_mean(tf.cast(ad_correct_pred, tf.float32))
    hd_accuracy = tf.reduce_mean(tf.cast(hd_correct_pred, tf.float32))

    init = tf.global_variables_initializer()
    v_acc_hist = []
    # Train
    with tf.Session() as sess:
        sess.run(init)
        steps = 2001
        #print('Iteration | Valid loss | Valid acc')
        for i in range(steps):
            if i % 2 == 0:
                train_x, train_y = ad_batches.next_batch(batch_size)
                sess.run(ad_optimizer,
                         feed_dict={
                             x: train_x,
                             y: train_y,
                             global_step: i
                         })
                test_x, test_y = adx_test, ady_test
                v_acc = sess.run(ad_accuracy,
                                 feed_dict={
                                     x: test_x,
                                     y: test_y,
                                     dim: len(test_x)
                                 })
                v_lss = sess.run(ad_cost,
                                 feed_dict={
                                     x: test_x,
                                     y: test_y,
                                     dim: len(test_x)
                                 })
                #print('{0:9} |   {1:2.5f} |   {2:2.5f}'.format(i, v_lss, v_acc))
                v_acc_hist.append(v_acc)
            else:
                train_x, train_y = hd_batches.next_batch(batch_size)
                sess.run(hd_optimizer,
                         feed_dict={
                             x: train_x,
                             y: train_y,
                             global_step: i
                         })
                test_x, test_y = hdx_test, hdy_test
                v_acc = sess.run(hd_accuracy,
                                 feed_dict={
                                     x: test_x,
                                     y: test_y,
                                     dim: len(test_x)
                                 })
                v_lss = sess.run(hd_cost,
                                 feed_dict={
                                     x: test_x,
                                     y: test_y,
                                     dim: len(test_x)
                                 })
                #print('{0:9} |   {1:2.5f} |   {2:2.5f}'.format(i, v_lss, v_acc))

    # Plot validation set and test set loss over epochs
    print(params, sum(v_acc_hist[-10:]) / 10)
示例#5
0
hdx, hdy = DataReader(hd_data).get_data()

def inverse_argmax(l):
    m = 2
    for i in range(len(l)):
        hold = l[i]
        l[i] = [0] * m
        l[i][hold] = 1


print(sum(ady), len(ady))
print(sum(hdy), len(hdy))
inverse_argmax(ady)
inverse_argmax(hdy)

ad_partition = partition_data(adx, ady, [0.8, 0.2])
ad_batches = CrossValidation(ad_partition[0][0], ad_partition[0][1], 10)
adx_test, ady_test = np.array(ad_partition[1][0]), np.array(ad_partition[1][1])


hd_partition = partition_data(hdx, hdy, [0.8, 0.2])
hd_batches = CrossValidation(hd_partition[0][0], hd_partition[0][1], 10)
hdx_test, hdy_test = np.array(hd_partition[1][0]), np.array(hd_partition[1][1])

n_input = len(adx_test[0])
print(n_input, n_hidden_1, n_hidden_2, n_classes)
batch_size = 50

# Tensorflow variables

# Shared layer for MTL hard parameter sharing