Пример #1
0
def test_run():
    dates = pd.date_range('2014-01-01', '2015-02-10')
    symbols = ['SPY', 'googl', 'gld']
    df = get_data(symbols, dates)
    #plot_data(df)

    dailyReturns = compute_daily_returns(df)
    #plot_data(dailyReturns,title='daily returns',ylabel='daily returns',xlabel='date')

    #fig, axes = plt.subplots(nrows=2, ncols=1)
    #print(axes)
    dailyReturns.plot(kind='scatter', x='SPY', y='GOOGL')
    betaGoogle, alphaGoogle = np.polyfit(dailyReturns['SPY'],
                                         dailyReturns['GOOGL'], 1)
    #using the line equation plot the scatter plot with the line. beta is the slope and alpha is the intersect with the x axis
    plt.plot(dailyReturns["SPY"],
             betaGoogle * dailyReturns['SPY'] + alphaGoogle,
             '-',
             color='red')
    #plt.subplot(2,1,1)
    dailyReturns.plot(kind='scatter', x='SPY', y='GLD')
    betaGold, alphaGold = np.polyfit(dailyReturns['SPY'], dailyReturns['GLD'],
                                     1)
    plt.plot(dailyReturns['SPY'],
             betaGold * dailyReturns['SPY'] + alphaGold,
             '-',
             color='red')
    print('Alpha Gold:', alphaGold)
    print('Beta Gold:', betaGold)
    print('Alpha Google:', alphaGoogle)
    print('Beta Google:', betaGoogle)
    plt.show()

    print(dailyReturns.corr(method='pearson'))
def get_trainable_data():
    tr, te, mean, std = get_data()
    print(tr.head())
    print(te.head())

    x_train, y_train = df_to_keras_format(tr)
    x_test, y_test = df_to_keras_format(te)

    return (x_train, y_train), (x_test, y_test), mean, std
Пример #3
0
def main(_):
    data = get_data(FLAGS.data)
    train, val, test = split_data(data)

    out_count = 1
    input_count = train.shape[1] - out_count

    batch_xs, batch_ys = get_batch(train, 2**4, input_count)
    call_model(FLAGS, 'model', batch_xs)
    print(batch_ys)
Пример #4
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--file_paths', default="data/files.txt")
    parser.add_argument('--landmark_paths', default="data/landmarks.txt")
    parser.add_argument('--landmark', type=int, default=0)
    parser.add_argument('--save_path')
    parser.add_argument('--num_epochs', type=int, default=int(1e9))
    parser.add_argument('--log_freq', type=int, default=100)
    parser.add_argument('--separator', default=",")
    parser.add_argument('--batch_size', type=int, default=8)
    args = parser.parse_args()

    file_paths = args.file_paths
    landmark_paths = args.landmark_paths
    landmark_wanted = args.landmark
    num_epochs = args.num_epochs
    log_freq = args.log_freq
    save_path = args.save_path

    x, y = get_data(file_paths,
                    landmark_paths,
                    landmark_wanted,
                    separator=args.separator)
    print(f"Got {len(x)} images with {len(y)} landmarks")

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("device", device)

    dataset = TensorDataset(torch.Tensor(x), torch.Tensor(y))
    dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)

    unet = UNet(in_dim=1, out_dim=6, num_filters=4)
    criterion = torch.nn.CrossEntropyLoss(weight=get_weigths(y))
    optimizer = optim.SGD(unet.parameters(), lr=0.001, momentum=0.9)

    unet.to(device)

    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(dataloader):
            inputs, labels = data
            optimizer.zero_grad()

            outputs = unet(inputs)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f"[{epoch+1}/{num_epochs}] loss: {running_loss}")
        if epoch % log_freq == log_freq - 1:
            if save_path is not None:
                torch.save(unet.state_dict(),
                           os.path.join(save_path, f"unet-{epoch}.pt"))
Пример #5
0
def plot_2_histograms():
    df = get_data(['SPY', 'GOOGL'], dates)
    #plot_data(df)

    daily_returns = compute_daily_returns(df)
    #plot_data(daily_returns, title="Daily returs")

    #daily_returns.hist(bins=20)
    daily_returns['SPY'].hist(bins=20, label='SPY')
    daily_returns['GOOGL'].hist(bins=20, label='GOOGL')
    plt.legend(loc='upper right')
    plt.show()
Пример #6
0
def _test(train_args, pretrain_args, args):
    """Test saved model on specified speakers."""
    print('Testing', ', '.join(args.speakers), '...')

    # update args with new test args
    test_args = utils.set_new_args(train_args, args)
    # get test data and id_to_word lookup
    _, _, test_data, id_to_word = data_reader.get_data(test_args)
    # set configurations/hyperparameters for model
    _, test_config = utils.set_config(test_args, id_to_word)

    # model requires init embed but this will be overridden by restored model
    init_embed = utils.init_embedding(id_to_word,
                                      dim=test_args.embed_size,
                                      init_scale=test_args.init_scale,
                                      embed_path=test_args.embed_path)

    with tf.Graph().as_default():
        with tf.name_scope('Test'):
            with tf.variable_scope('Model', reuse=None):
                m_test = model.Model(test_args,
                                     is_training=False,
                                     config=test_config,
                                     init_embed=init_embed,
                                     name='Test')
                m_test.build_graph()

        init = tf.global_variables_initializer()

        # if pretrained, must create dict to initialize TF Saver
        if bool(pretrain_args):
            # get trainable variables and convert to dict for Saver
            reuse_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            reuse_vars_dict = dict([(var.op.name, var) for var in reuse_vars])
            # create saver for TF session (see function for addl details)
            saver = utils.create_tf_saver(args, pretrain_args, reuse_vars_dict)
        else:
            saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(init)
            print('Restoring model...')
            saver.restore(sess, test_args.load_path)

            # test model on specified speakers
            for test_ind, test_speaker in enumerate(test_args.speakers):
                for train_ind, train_speaker in enumerate(train_args.speakers):
                    print('Testing {0} with {1} model'.format(
                        test_speaker, train_speaker))
                    test_perplexity = _run_epoch(sess, m_test, test_args,
                                                 test_data, train_ind,
                                                 test_ind)
                    print('Test Perplexity: {0:.3f}'.format(test_perplexity))
Пример #7
0
def main():
    filepath = "spam.dat"
    x,X_test,y,Y_test = data_reader.get_data(filepath)

    confusion = MLP.runalt(x,y, X_test, Y_test, display=True, optimize=False)
    confusion = DTC.run(x,y, X_test, Y_test, display=True, optimize=False)
    # confusion = DTC.runRFE(x,y, X_test, Y_test, display=True)

    # confusion = SVM.run(x,y, X_test, Y_test, True)
    # confusion = Bayes.run(x,y, X_test, Y_test, True)
    confusion = KNN.runalt(x,y, X_test, Y_test, display=True, optimization=False)
    plt.show()
def run_program(model_path, data_path):
    place = fluid.CPUPlace()
    inputs = []
    labels = []
    config = None
    if test_args.use_ptq:
        warmup_data, inputs, labels = get_data_with_ptq_warmup(
            data_path, place)
        config = set_config_ptq(model_path, warmup_data)
    else:
        inputs, labels = get_data(data_path, place)
        config = set_config(model_path)

    predictor = create_paddle_predictor(config)
    all_hz_num = 0
    ok_hz_num = 0
    all_ctc_num = 0
    ok_ctc_num = 0
    dataset_size = len(inputs)
    start = time.time()
    for i in range(dataset_size):
        if i == test_args.warmup_iter:
            start = time.time()
        hz_out, ctc_out = predictor.run([inputs[i]])
        np_hz_out = np.array(hz_out.data.float_data()).reshape(-1)
        np_ctc_out = np.array(ctc_out.data.int64_data()).reshape(-1)
        out_hz_label = np.argmax(np_hz_out)
        this_label = labels[i]
        this_label_data = np.array(this_label.data.int32_data()).reshape(-1)
        if this_label.shape[0] == 1:
            all_hz_num += 1
            best = this_label_data[0]
            if out_hz_label == best:
                ok_hz_num += 1
            if this_label_data[0] <= 6350:
                all_ctc_num += 1
                if np_ctc_out.shape[0] == 1 and np_ctc_out.all(
                ) == this_label_data.all():
                    ok_ctc_num += 1
        else:
            all_ctc_num += 1
            if np_ctc_out.shape[0] == this_label.shape[0] and np_ctc_out.all(
            ) == this_label_data.all():
                ok_ctc_num += 1
        if all_ctc_num > 1000 or all_hz_num > 1000:
            break
    end = time.time()
    fps = (dataset_size - test_args.warmup_iter) / (end - start)
    hx_acc = ok_hz_num / all_hz_num
    ctc_acc = ok_ctc_num / all_ctc_num
    return hx_acc, ctc_acc, fps
Пример #9
0
def _generate(train_args, pretrain_args, args):
    """Restore trained model and use to generate sample text."""
    # update args with new generate args
    gen_args = utils.set_new_args(train_args, args)
    # get id_to_word lookup
    _, _, _, id_to_word = data_reader.get_data(gen_args)
    # # get hyperparameters corresponding to text generation
    gen_config, _ = utils.set_config(gen_args, id_to_word)

    # model requires init embed but this will be overridden by restored model
    init_embed = utils.init_embedding(id_to_word,
                                      dim=gen_args.embed_size,
                                      init_scale=gen_args.init_scale,
                                      embed_path=gen_args.embed_path)

    with tf.Graph().as_default():
        # use Train name scope as this contains trained model parameters
        with tf.name_scope('Train'):
            with tf.variable_scope('Model', reuse=None):
                m_gen = model.Model(gen_args,
                                    is_training=False,
                                    config=gen_config,
                                    init_embed=init_embed,
                                    name='Generate')
                m_gen.build_graph()

        init = tf.global_variables_initializer()

        # if pretrained, must create dict to initialize TF Saver
        if bool(pretrain_args):
            # get trainable variables and convert to dict for Saver
            reuse_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            reuse_vars_dict = dict([(var.op.name, var) for var in reuse_vars])
            # create saver for TF session (see function for addl details)
            saver = utils.create_tf_saver(args, pretrain_args, reuse_vars_dict)
        else:
            saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(init)
            print('Restoring model...')
            saver.restore(sess, gen_args.load_path)

            # generate text for all specified speakers
            for gen_ind, gen_speaker in enumerate(gen_args.speakers):
                print('Generating text for {0}'.format(gen_speaker))
                for train_ind, train_speaker in enumerate(train_args.speakers):
                    if gen_speaker == train_speaker:
                        generate_text(sess, m_gen, id_to_word, train_ind,
                                      args.temp)
Пример #10
0
def run_algorithm(algorithm):

    if algorithm == 'apriori':
        data = get_data("shopping.json")
        apriori_result = apriori(data, 0.3, 0.5)
        apriori_result = apriori_result.sort_values(by=['ir'],
                                                    ascending=[True])

        plt.show(
            apriori_result.query('ir > 0.0').plot(kind='bar',
                                                  x='rule',
                                                  y=['ir', 'kulczynski'],
                                                  rot=45,
                                                  fontsize=6))

        print(apriori_result)
Пример #11
0
def main(_):
    input_count = 12
    kde_models = get_kde_models(FLAGS.kde_model)

    data = get_data(FLAGS.data)
    plus = data.loc[(data['change'] == 1)]
    minus = data.loc[(data['change'] == 0)]

    names = [  # "current_slice",
        "blue", "blue_1", "green", "green_1", "red", "red_1", "nir", "nir_1",
        "swir1", "swir1_1", "swir2", "swir2_1"
    ]

    plus = np.array(plus[names])
    minus = np.array(minus[names])

    # plus_sample = plus[:10, :]
    # minus_sample = minus[:10, :]

    # expected = eval_kde(kde_models, plus_sample)
    # recived = call_model(FLAGS, 'model', plus_sample)

    # print(np.hstack([expected, recived]))

    # expected = eval_kde(kde_models, minus_sample)
    # recived = call_model(FLAGS, 'model', minus_sample)
    # print(np.hstack([expected, recived]))

    density_plus = call_model(FLAGS, 'model', plus)
    print(density_plus)
    density_minus = call_model(FLAGS, 'model', minus)
    print(density_minus)

    true_plus = density_plus[:, 0] > density_plus[:, 1]
    true_plus_prop = 1.0 * sum(true_plus.astype(np.int)) / len(true_plus)

    true_minus = density_minus[:, 1] > density_minus[:, 0]
    true_minus_prop = 1.0 * sum(true_minus.astype(np.int)) / len(true_minus)

    print('Plus prob', true_plus_prop)
    print('Minus prob', true_minus_prop)
Пример #12
0
def test_run():
    df = get_data([], dates)
    #plot_data(df)

    daily_returns = compute_daily_returns(df)
    #plot_data(daily_returns, title='Daily Returns', ylabel='Daily returns')

    #plot
    daily_returns.hist(bins=20)
    #plt.show()

    mean = daily_returns['SPY'].mean()
    print("mean=", mean)
    std = daily_returns['SPY'].std()
    print("std=", std)

    plt.axvline(mean, color='w', linestyle='dashed', linewidth=2)
    plt.axvline(std, color='r', linestyle='dashed', linewidth=2)
    plt.axvline(-std, color='r', linestyle='dashed', linewidth=2)
    plt.show()

    print(daily_returns.kurtosis())
Пример #13
0
num_input = 6 * 2  # Prosody
timesteps = 1200  # 60 sec * 20 frames/sec = 1200
num_hidden = 30  # num units in LSTM cell
keep_prob_train = 0.75
experiments = [5,10,20,40,60]


for experiment in experiments:

    tf.reset_default_graph()
    num_output_units = experiment  # 20 frames/sec

    # Reading data
    print("Reading data...")
    x_train, y_train, x_test, y_test = data_reader.get_data()
    y_train = y_train[:,:,0:num_output_units]
    y_test = y_test[:,:,0:num_output_units]
    print(x_train.shape)

    # tf Graph input
    X = tf.placeholder("float", [None, timesteps, num_input])
    Y = tf.placeholder("float", [None, timesteps, num_output_units])
    keep_prob = tf.placeholder(tf.float32)  # dropout (keep probability)

    # Define weights/biases
    weights = {
        'hidden1': tf.get_variable("w_hid1", shape=(num_input, num_input),
                                 # initializer=tf.random_normal_initializer()),
                                 initializer=tf.contrib.layers.xavier_initializer()),
import numpy as np
from settings import LIBLINEAR_DIR
from data_reader import load, get_data
from settings import DATA_DIR

sys.path.append(LIBLINEAR_DIR)

from liblinearutil import *

train_set = 'cifar_train_triplet_100_x.npz'
test_set = 'cifar_test_triplet_100_x.npz'

# Perform only model selection (finding best C for linear SVM using CV)
only_model_selection = False

# Save final model
save_model = False
model_name = 'model_best_triplet'

trainx = get_data(train_set)
_, trainy = load(DATA_DIR, subset='train')

testx = get_data(test_set)
_, testy = load(DATA_DIR, subset='test')

result = train(trainy, trainx, '-C')
if not only_model_selection:
    m = train(trainy, trainx, '-c ' + str(result[0]))
    p_label, p_acc, p_val = predict(testy, testx, m)
    if save_model:
        save_model(model_name, m)
Пример #15
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from data_reader import load, get_data
from settings import DATA_DIR, DATA_SEED

samples_per_class = 500

data_type = 'train'  # 'test' or 'train'
data_name = 'cifar_train_triplet_100_x.npz'

datax = get_data(data_name)
_, datay = load(DATA_DIR, subset=data_type)
pca = PCA(n_components=2)
X_new = pca.fit_transform(datax)
print(datax.shape)
rng_data = np.random.RandomState(DATA_SEED)
inds = rng_data.permutation(X_new.shape[0])

X_new = X_new[inds]
datay = datay[inds]

plt.rcParams["figure.figsize"] = (15, 12)
for j in range(10):
    txs = X_new[datay == j][:samples_per_class]
    plt.scatter(txs[:, 0], txs[:, 1])

plt.title('PCA 2D transform on ' + data_name, fontsize=20)
plt.xlabel('PC1', fontsize=18)
plt.ylabel('PC2', fontsize=18)
plt.savefig('pca_' + data_name + '.png')
Пример #16
0
def main(_):
    kde_models = get_kde_models(FLAGS.kde_model)

    data = get_data(FLAGS.data)
    names = [  # "current_slice",
        "blue", "blue_1", "green", "green_1", "red", "red_1", "nir", "nir_1",
        "swir1", "swir1_1", "swir2", "swir2_1"
    ]
    data = np.array(data[names])

    out_count = 2
    input_count = 12

    x = tf.placeholder(tf.float32, [None, input_count], name='input')

    with tf.name_scope('weights'):
        W1 = tf.Variable(tf.truncated_normal([input_count, FLAGS.layer1],
                                             stddev=0.5),
                         name='w1')
        W2 = tf.Variable(tf.truncated_normal([FLAGS.layer1, FLAGS.layer2],
                                             stddev=0.5),
                         name='w2')
        W3 = tf.Variable(tf.truncated_normal([FLAGS.layer2, out_count],
                                             stddev=0.5),
                         name='w3')

    with tf.name_scope('biases'):
        b1 = tf.Variable(tf.zeros([FLAGS.layer1]), name='b1')
        b2 = tf.Variable(tf.zeros([FLAGS.layer2]), name='b2')
        b3 = tf.Variable(tf.zeros([out_count]), name='b3')

    y = model(x, W1, W2, W3, b1, b2, b3)

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, out_count], name='target')

    loss1 = tf.reduce_mean(tf.losses.absolute_difference(labels=y_,
                                                         predictions=y),
                           name='loss1')
    tf.summary.scalar('abs diff', loss1)

    # reg_w = 0.00000001
    loss = tf.reduce_mean(
        tf.losses.mean_squared_error(labels=y_, predictions=y)
        # + reg_w*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W3)),
        ,
        name='loss')

    tf.summary.scalar('Regularized loss', loss)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    optimizer = optimizer.minimize(loss)

    saver = tf.train.Saver(max_to_keep=1)

    sess = tf.InteractiveSession()

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                         sess.graph)

    tf.global_variables_initializer().run()
    # Train
    for epoch in range(FLAGS.max_epoch):
        # получение выборки данных - очень дорогая операция Поэтому будем использовать данные многократно
        # и, чтобы меньше переобучаться, сразу много
        batch_xs, batch_ys = get_batch(kde_models, data, 384 * 50, input_count)
        for i in range(5000):
            _ = sess.run([optimizer], feed_dict={x: batch_xs, y_: batch_ys})

        # Test trained model
        # if epoch % 100 == 99:
        summary, train_loss, train_loss1 = sess.run([merged, loss1, loss],
                                                    feed_dict={
                                                        x: batch_xs,
                                                        y_: batch_ys
                                                    })
        print('EPOCH', epoch + 1, '\tloss', train_loss, '\tloss1', train_loss1)
        train_writer.add_summary(summary, epoch)

        saver.save(sess, os.path.join(FLAGS.model_dir, "model"))

    print_model(FLAGS, 'model')
Пример #17
0
def main(_):
    data = get_data(FLAGS.data)
    train, val, test = split_data(data)

    out_count = 1
    input_count = train.shape[1] - out_count

    x = tf.placeholder(tf.float32, [None, input_count], name='input')

    with tf.name_scope('weights'):
        W1 = tf.Variable(tf.truncated_normal([input_count, FLAGS.layer1], stddev=0.5), name='w1')
        W2 = tf.Variable(tf.truncated_normal([FLAGS.layer1, FLAGS.layer2], stddev=0.5), name='w2')
        W3 = tf.Variable(tf.truncated_normal([FLAGS.layer2, input_count], stddev=0.5), name='w3')

    with tf.name_scope('biases'):
        b1 = tf.Variable(tf.zeros([FLAGS.layer1]), name='b1')
        b2 = tf.Variable(tf.zeros([FLAGS.layer2]), name='b2')
        b3 = tf.Variable(tf.zeros([input_count]), name='b3')

    logits = model(x, W1, W2, W3, b1, b2, b3)
    y = tf.nn.relu(logits, name='result')


    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, input_count], name='target')

    loss1 = tf.reduce_mean(
        tf.losses.absolute_difference(labels=y_, predictions=y), name='loss1')
    tf.summary.scalar('abs diff', loss1)

    reg_w = 0.00000000001
    loss = tf.reduce_mean(
        tf.losses.absolute_difference(labels=y_, predictions=y)
            + reg_w*(tf.nn.l2_loss(W1)+tf.nn.l2_loss(W2)+tf.nn.l2_loss(W3)),
        name='loss'
    )

    tf.summary.scalar('Regularized loss', loss)

    saver = tf.train.Saver(max_to_keep=1)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
    optimizer = optimizer.minimize(loss)

    sess = tf.InteractiveSession()

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph)
    test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test')

    tf.global_variables_initializer().run()
    # Train
    best_loss = 9999999999999;
    for epoch in range(FLAGS.max_epoch):
        batch_xs, batch_ys = get_batch(train, 2**12, input_count)
        _, summary, train_loss, train_loss1 = sess.run(
            [optimizer, merged, loss, loss1], feed_dict={x: batch_xs, y_: batch_xs}
        )
        train_writer.add_summary(summary, epoch)

        # Test trained model
        if epoch % 100 == 99:
            val_loss = sess.run(loss, feed_dict={x: val[:, :input_count], y_: val[:, :input_count]})
            print('EPOCH', epoch+1, 'Loss: \tval', sess.run(loss1, feed_dict={x: val[:, :input_count], y_: val[:, :input_count]}), '\ttrain', train_loss1)
            test_writer.add_summary(summary, epoch)

            # if val_loss < best_loss:
            #     best_loss = val_loss

            saver.save(sess, os.path.join(FLAGS.model_dir, "model"))

    print_model(FLAGS, 'model', test[:, :input_count], test[:, :input_count])
Пример #18
0
import sys
import numpy as np
from scipy.stats import mode

from sklearn.metrics import accuracy_score

from data_reader import load, get_data
from settings import DATA_DIR, LIBLINEAR_DIR

sys.path.append(LIBLINEAR_DIR)

from liblinearutil import *

data_train = get_data('cifar_train_triplet_100_x.npz')
data_test = get_data('cifar_test_triplet_100_x.npz')

# For more general
K = 10
cs = 0.250000

_, trainy = load(DATA_DIR, subset='train')
_, testy = load(DATA_DIR, subset='test')

joined = []
for k in range(K):
    ind1 = np.random.choice(data_train.shape[0], data_train.shape[0])
    trainx_temp = data_train[ind1]
    trainx_temp = trainx_temp + np.random.normal(0, 0.3, trainx_temp.shape)
    trainy_temp = trainy[ind1]
    m = train(trainy_temp, trainx_temp, '-c ' + str(cs))
    p_label, p_acc, p_val = predict(testy, data_test, m)
Пример #19
0
from data_reader import get_data
import cv2
import keras
import numpy as np
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D, Dropout
from keras.models import Sequential
from keras import Input, Model
import matplotlib.pylab as plt


d = get_data()
index = 0
images = []
for i in d["smiles"]:
    img = cv2.imread("images/" + str(index) + ".png")
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    images.append(gray)
    index += 1

target1_for_training = []

for t in d["target1"]:
    if t != "":
        target1_for_training.append(int(t))
    else:
        target1_for_training.append(0)

target1_for_training = target1_for_training[0:1000]
img_x = 300
img_y = 300
Пример #20
0
    'cifar_train_triplet_1024_x.npz', 'cifar_train_triplet_100_x.npz',
    'cifar_train_triplet_2048_x.npz', 'cifar_train_triplet_2048_L2_x.npz',
    'cifar_train_x.npz'
]
data_tests = [
    'cifar_test_triplet_1024_x.npz', 'cifar_test_triplet_100_x.npz',
    'cifar_test_triplet_2048_x.npz', 'cifar_test_triplet_2048_L2_x.npz',
    'cifar_test_x.npz'
]

cs = [0.015625, 0.031250, 0.031250, 0.250000, 0.031250]

_, trainy = load(DATA_DIR, subset='train')
_, testy = load(DATA_DIR, subset='test')

joined = []
for k in range(len(cs)):
    trainx = get_data(data_trains[k])
    testx = get_data(data_tests[k])
    trainx = trainx + np.random.normal(0, 0.3, trainx.shape)
    m = train(trainy, trainx, '-c ' + str(cs[k]))
    p_label, p_acc, p_val = predict(testy, testx, m)
    joined.append(np.expand_dims(p_label, axis=0))
joined = np.transpose(np.concatenate(joined, axis=0), (1, 0))
m_voting = []
for k in range(joined.shape[0]):
    m_voting.append(mode(joined[k])[0][0])

acc = accuracy_score(m_voting, testy)
print(acc)
Пример #21
0
#setting = [4048, 4048, 1024]
#setting = [2048, 1048, 100]
setting = [4048, 4048, 2048]
''' '' if we use loss from https://arxiv.org/abs/1704.02227
'L2' if we use loss max(d_+ - d_- + \lambda, 0), where \lambda=10.0'''
l_type = 'L2'

layers = [LL.InputLayer(shape=(None, 2048))]
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[0]))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[1]))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[2]))

trainx = get_data('cifar_train_x.npz')
_, trainy = load(DATA_DIR, subset='train')

print(trainx.shape)

x_lab = T.matrix()
output_lab = LL.get_output(layers[-1], x_lab, deterministic=False)


def get_triplets(prediction, size):
    a = prediction[0:size]  # query case (positive)
    b = prediction[size:2 * size]  # positive case
    c = prediction[2 * size:3 * size]  # negative

    return a, b, c
Пример #22
0
def main(flags):
    sample_size = flags.sample_size
    model_filename = flags.model_name
    data = get_data(flags.data)

    plus = data.loc[(data['change'] == 1)]
    minus = data.loc[(data['change'] == 0)]

    names = [  # "current_slice",
        "blue", "blue_1",
        "green", "green_1",
        "red", "red_1",
        "nir", "nir_1",
        "swir1", "swir1_1",
        "swir2", "swir2_1"
    ]

    plus = np.array(plus[names])
    minus = np.array(minus[names])

    if minus.shape[0] > sample_size:
        idx = np.random.randint(minus.shape[0], size=sample_size)
        minus_sample = minus[idx, :]
    else:
        minus_sample = minus

    if plus.shape[0] > sample_size:
        idx = np.random.randint(plus.shape[0], size=sample_size)
        plus_sample = plus[idx, :]
    else:
        plus_sample = plus


    grid = GridSearchCV(KernelDensity(),
                        {'bandwidth': np.linspace(0.005, 0.025, 11)},
                        n_jobs=-1,
                        cv=10) # 10-fold cross-validation
    grid.fit(plus_sample)
    print('Best bandwidth (plus):', grid.best_params_)
    kde_plus = grid.best_estimator_

    # import ipdb; ipdb.set_trace()

    grid.fit(minus_sample)
    print('Best bandwidth (minus):', grid.best_params_)
    kde_minus = grid.best_estimator_

    density_plus_p = kde_plus.score_samples(plus)
    density_plus_m = kde_plus.score_samples(minus)

    density_minus_p = kde_minus.score_samples(plus)
    density_minus_m = kde_minus.score_samples(minus)

    true_plus = density_plus_p > density_minus_p
    true_plus_prop =  1.0 * sum(true_plus.astype(np.int))/len(true_plus)

    true_minus = density_minus_m > density_plus_m
    true_minus_prop = 1.0 * sum(true_minus.astype(np.int)) / len(true_minus)

    print('True plus:', true_plus_prop)
    print('True minus:', true_minus_prop)

    models = {'plus_model': kde_plus, 'minus_model': kde_minus}

    pickle.dump(models, open(model_filename, 'wb'))
Пример #23
0
        area_object = {
            'id': int(elf_id[1:]),
            'x': int(top),
            'y': int(left[:-1]),
            'width': int(width),
            'height': int(height)
        }
        refactored.append(area_object)

    return refactored


def create_matrix(obj):
    fab = np.zeros((1000, 1000), dtype=np.int)
    for o in obj:
        area = fab[o['y']:o['y'] + o['height'], o['x']:o['x'] + o['width']]
        area[:] = area + 1

    for o in obj:
        area = fab[o['y']:o['y'] + o['height'], o['x']:o['x'] + o['width']]
        if np.sum(np.where(area == 1, 0, 2)) < 1:
            print(o['id'])

    return fab


if __name__ == '__main__':
    initial_data = get_data(False)
    data = refactor_data(initial_data)
    fabric = create_matrix(data)
Пример #24
0
def _train(args, pretrain_args):
    """Train the language model.

    Creates train/valid/test models, runs training epochs, saves model and
    writes results to database if specified.
    """
    start_time = time.time()
    print('Training', ', '.join(args.speakers), '...')

    # randomly sample validation set monte_carlo_cv_num times
    for num in range(args.monte_carlo_cv_num):
        # get seed used to sub-sample validation dataset (use 42 for 1st run)
        seed = utils.get_seed(num)

        # get train/valid/test data and convert to sequences
        train_data, valid_data, test_data, id_to_word = data_reader.get_data(
            args, seed=seed)
        # set configurations/hyperparameters for model
        config, test_config = utils.set_config(args, id_to_word)

        # initialize word embeddings
        init_embed = utils.init_embedding(id_to_word,
                                          dim=args.embed_size,
                                          init_scale=args.init_scale,
                                          embed_path=args.embed_path)

        with tf.Graph().as_default():
            # initializer used to initialize TensorFlow variables
            initializer = tf.random_uniform_initializer(
                -config['init_scale'], config['init_scale'])
            # create Train model
            with tf.name_scope('Train'):
                with tf.variable_scope('Model',
                                       reuse=None,
                                       initializer=initializer):
                    m_train = model.Model(args,
                                          is_training=True,
                                          config=config,
                                          init_embed=init_embed,
                                          name='Train')
                    m_train.build_graph()

            # create Valid model
            with tf.name_scope('Valid'):
                with tf.variable_scope('Model',
                                       reuse=True,
                                       initializer=initializer):
                    m_valid = model.Model(args,
                                          is_training=False,
                                          config=config,
                                          init_embed=init_embed,
                                          name='Valid')
                    m_valid.build_graph()

            # create Test model
            with tf.name_scope('Test'):
                with tf.variable_scope('Model',
                                       reuse=True,
                                       initializer=initializer):
                    m_test = model.Model(args,
                                         is_training=False,
                                         config=test_config,
                                         init_embed=init_embed,
                                         name='Test')
                    m_test.build_graph()

            # create summaries to be viewed in TensorBoard
            tb_summaries = utils.TensorBoardSummaries()
            tb_summaries.create_ops()

            init = tf.global_variables_initializer()

            # if pretrained, must create dict to initialize TF Saver
            if bool(pretrain_args):
                # get trainable variables and convert to dict for Saver
                reuse_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES)
                reuse_vars_dict = dict([(var.op.name, var)
                                        for var in reuse_vars])
                # create saver for TF session (see function for addl details)
                saver = utils.create_tf_saver(args, pretrain_args,
                                              reuse_vars_dict)
            else:
                saver = tf.train.Saver()

            # ppls dict has perplexities that are stored in results database
            ppls = {}
            ppls, _ = _update_ppls(ppls, initialize=True)

            with tf.Session() as sess:
                sess.run(init)

                if args.load_path != '':
                    print('Restoring model...')
                    saver.restore(sess, args.load_path)

                for epoch in range(config['max_epoch']):
                    print('Epoch: {0} Learning rate: {1:.3f}\n'.format(
                        epoch + 1, sess.run(m_train.lr)))
                    for i, speaker in enumerate(args.speakers):
                        print('Training {0} ...'.format(speaker))

                        # run epoch on training data
                        train_perplexity = _run_epoch(
                            sess,
                            m_train,
                            args,
                            train_data,
                            i,
                            tb_summaries,
                            id_to_word,
                            train_op=m_train.train_op,
                            verbose=True)
                        print('Epoch: {0} Train Perplexity: {1:.3f}'.format(
                            epoch + 1, train_perplexity))
                        ppls, _ = _update_ppls(ppls,
                                               epoch=epoch + 1,
                                               speaker=speaker,
                                               ppl=train_perplexity,
                                               dataset='train')

                        print('Validating...')
                        # run epoch on validation data
                        valid_perplexity = _run_epoch(sess,
                                                      m_valid,
                                                      args,
                                                      valid_data,
                                                      i,
                                                      tb_summaries,
                                                      id_to_word,
                                                      verbose=True)
                        print('Epoch: {0} Valid Perplexity: {1:.3f}'.format(
                            epoch + 1, valid_perplexity))
                        ppls, improved = _update_ppls(ppls,
                                                      epoch=epoch + 1,
                                                      speaker=speaker,
                                                      ppl=valid_perplexity,
                                                      dataset='valid')

                        if improved:
                            # save model if valid ppl is lower than current
                            # best valid ppl
                            if args.save_path != '':
                                print('Saving model to {0}.'.format(
                                    args.save_path))
                                saver.save(sess, args.save_path)

                for i, speaker in enumerate(args.speakers):
                    print('Testing {0} ...'.format(speaker))
                    print('Restoring best model for testing...')
                    saver.restore(sess, args.save_path)
                    # run model on test data
                    test_perplexity = _run_epoch(sess, m_test, args, test_data,
                                                 i)
                    ppls['test_ppl_' + speaker] = test_perplexity
                    print('Test Perplexity: {0:.3f}'.format(test_perplexity))

            if args.insert_db == 'True':
                # write params/config/results to sql database
                results_db.insert_results(args, config, start_time, ppls)