Пример #1
0
def create_data_processor(is_undersampled, is_oversampled, seed=0):
    global DataProcessor
    if is_undersampled:
        DataProcessor = data_processor.DataProcessor(
            TrainData).with_scaling().with_undersampling(seed)
    else:
        if is_oversampled:
            DataProcessor = data_processor.DataProcessor(
                TrainData).with_scaling().with_oversampling(seed)
        else:
            DataProcessor = data_processor.DataProcessor(
                TrainData).with_scaling()
Пример #2
0
print("Batch size: " + str(batch_size))
print("Epochs: " + str(num_epochs))
print("Evaluate every steps: " + str(evaluate_steps))
print("Early Stopping: " + str(early_stopping))
print("Patience: " + str(patience))

# Data set parameters
vocab_size = experiment_params['vocab_size']
max_seq_length = experiment_params['max_seq_length']
to_tokens = experiment_params['to_tokens']
embedding_dim = experiment_params['embedding_dim']
embedding_type = experiment_params['embedding_type']
embedding_source = experiment_params['embedding_source']

# Initialize the dataset and embedding processor
data_set = data_processor.DataProcessor(task_name, dataset_dir, max_seq_length, vocab_size=vocab_size, to_tokens=to_tokens)
embedding = embedding_processor.get_embedding(embedding_type)

# If dataset folder is empty get the metadata and datasets
if not os.listdir(dataset_dir):
    data_set.get_dataset()

# Load the metadata
vocabulary, labels = data_set.load_metadata()

# Generate the embedding matrix
embedding_matrix = embedding.get_embedding_matrix(embeddings_dir, embedding_source, embedding_dim, vocabulary)

# Loop over each experiment in the optimiser
for experiment in model_optimiser.get_experiments(project_name=experiment_params['project_name'], workspace="nathanduran", auto_output_logging='simple'):
Пример #3
0
import torch.optim as optim
import torch.utils.data as torch_data
from torch.autograd import Variable
import matplotlib.pyplot as plt
import time
import numpy as np
import global_define
import network as nw
import data_processor
import torch
import torch.nn as nn

IfInitial = False
dp = data_processor.DataProcessor()
if IfInitial:
    dp.init_data()
    print('initial done!')

# get training data and test data
train_set_np = np.load("data/train_set.npy")
train_set_label_np = np.load("data/train_set_label.npy")

test_set_np = np.load("data/test_set.npy")
test_set_label_np = np.load("data/test_set_label.npy")

# network
G = nw.Generator().cuda()
D = nw.Discriminator().cuda()

G.weight_init(mean=0, std=0.01)
D.weight_init(mean=0, std=0.01)
Пример #4
0
import data_processor as dp
import recommendation as rec
import numpy as np

processor = dp.DataProcessor()
print processor.data

print processor.simi(0, 1)

print processor.simi(3, 1)

print processor.simi(4, 1)

recomend = rec.Recommendation()

print recomend.get_non_watched_movies(1)
Пример #5
0
    def train(self, epochs, batch_size, g_iter, d_iter, model_dir, model_name):
        print('Training . . .')
        dp = data_processor.DataProcessor()
        sample_noise = self.get_noise(5 * 5)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            ckpt = tf.train.get_checkpoint_state(model_dir)
            print(ckpt)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print('Reloading model parameters...')
                self.saver.restore(sess, ckpt.model_checkpoint_path)

            for epoch in range(epochs):
                d_loss, g_loss = 0, 0
                batched_img, batched_label, batched_wrong_label = dp.get_batch(
                    batch_size, d_iter)
                dataset = list(
                    zip(batched_img, batched_label, batched_wrong_label))

                for b, (img, label, wrong_label) in enumerate(dataset):
                    z = self.get_noise(batch_size)
                    for it in range(d_iter):
                        _, d_loss, d_real, d_fake = sess.run(
                            [
                                self.d_optim, self.d_loss, self.d_real_loss,
                                self.d_fake_loss
                            ],
                            feed_dict={
                                self.g_inputs: z,
                                self.d_inputs: img,
                                self.label: label,
                                self.wrong_label: wrong_label
                            })
# 						print('Epoch [{:>2}/{:>2}] | Batch [{:>3}/{:>3}] | Iter [{:>2}/{:>2}] | W_dist: {:.6f}'
# 							  .format(epoch+1, end_epoch, b+1, len(batched_img), it+1, d_iter, w_distance), end='\r')

                    z = self.get_noise(batch_size)
                    for it in range(g_iter):
                        _, g_loss = sess.run([self.g_optim, self.g_loss],
                                             feed_dict={
                                                 self.g_inputs: z,
                                                 self.label: label
                                             })

# 						print('Epoch [{:>2}/{:>2}] | Batch [{:>3}/{:>3}] | Iter [{:>2}/{:>2}] | G_loss: {:.6f}'
# 							  .format(epoch+1, end_epoch, b+1, len(batched_img), it+1, g_iter, g_loss), end='\r')

                    print(
                        'Epoch [{:>2}/{:>2}] | Batch [{:>4}/{:>4}] | D_loss: {:.6f} | G_loss: {:.6f} | d_real: {:.6f} | d_fake: {:.6f}'
                        .format(epoch + 1, epochs, b + 1, len(batched_img),
                                d_loss, g_loss, d_real, d_fake))

                if (epoch + 1) % self.display_step == 0:
                    samples = sess.run(self.g_infer,
                                       feed_dict={
                                           self.g_inputs: sample_noise,
                                           self.label: self.test_labels
                                       })
                    samples = samples / 2 + 0.5
                    fig = self.visualize_result(samples)
                    plt.savefig(self.pic_path +
                                '{}.png'.format(str(epoch + 1).zfill(4)),
                                bbox_inches='tight')
                    plt.close(fig)

            if model_name != '':
                # 				if os.path.exists('./model')
                self.saver.save(sess,
                                './model/{}/{}'.format(model_name, model_name))
                print('Model saved at \'{}\''.format('./model/' + model_name))
Пример #6
0
    vocab_size = experiment_params['vocab_size']
    max_seq_length = experiment_params['max_seq_length']
    to_tokens = experiment_params['to_tokens']
    to_lower = experiment_params['to_lower']
    use_punct = experiment_params['use_punct']
    train_embeddings = experiment_params['train_embeddings']
    embedding_dim = experiment_params['embedding_dim']
    embedding_type = experiment_params['embedding_type']
    embedding_source = experiment_params['embedding_source']
    use_crf = model_params['use_crf'] if 'use_crf' in model_params else False

    # Initialize the dataset processor
    data_set = data_processor.DataProcessor(task_name,
                                            dataset_dir,
                                            max_seq_length,
                                            vocab_size=vocab_size,
                                            to_tokens=to_tokens,
                                            to_lower=to_lower,
                                            use_punct=use_punct)

    # If dataset folder is empty get the metadata and datasets to .npz files
    if not os.listdir(dataset_dir):
        data_set.get_dataset()

    # Load the metadata
    vocabulary, labels = data_set.load_metadata()

    # Generate the embedding matrix
    embeddings = embedding_processor.get_embedding(embeddings_dir,
                                                   embedding_type,
                                                   embedding_source,
Пример #7
0
    def train(self, start_epoch, epochs, batch_size, g_iter, d_iter,
              model_dir):
        dp = data_processor.DataProcessor()
        sample_noise = np.random.normal(0, 1, (5 * 5, self.noise_dim))

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            ckpt = tf.train.get_checkpoint_state(model_dir)
            print(ckpt)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print('Reloading model parameters...')
                self.saver.restore(sess, ckpt.model_checkpoint_path)

            end_epoch = start_epoch + epochs
            for epoch in range(start_epoch, end_epoch):
                w_distance, g_loss = 0, 0
                batched_img = dp.get_batch(batch_size, d_iter)

                for b, img in enumerate(batched_img):
                    for it in range(d_iter):
                        z = np.random.normal(0, 1,
                                             (batch_size, self.noise_dim))
                        _, _, w_distance, d_real, d_fake = sess.run(
                            [
                                self.d_optim, self.clip_D, self.w_distance,
                                tf.reduce_mean(self.d_real),
                                tf.reduce_mean(self.d_fake)
                            ],
                            feed_dict={
                                self.g_inputs: z,
                                self.d_inputs: img
                            })
# 						print('Epoch [{:>2}/{:>2}] | Batch [{:>3}/{:>3}] | Iter [{:>2}/{:>2}] | W_dist: {:.6f}'
# 							  .format(epoch+1, end_epoch, b+1, len(batched_img), it+1, d_iter, w_distance), end='\r')

                    for it in range(g_iter):
                        z = np.random.normal(0, 1,
                                             (batch_size, self.noise_dim))
                        _, g_loss = sess.run([self.g_optim, self.g_loss],
                                             feed_dict={self.g_inputs: z})

# 						print('Epoch [{:>2}/{:>2}] | Batch [{:>3}/{:>3}] | Iter [{:>2}/{:>2}] | G_loss: {:.6f}'
# 							  .format(epoch+1, end_epoch, b+1, len(batched_img), it+1, g_iter, g_loss), end='\r')

                    print(
                        'Epoch [{:>2}/{:>2}] | W_dist: {:.6f} | G_loss: {:.6f} | d_real: {:.6f} | d_fake: {:.6f}'
                        .format(epoch + 1, end_epoch, w_distance, g_loss,
                                d_real, d_fake))

                if (epoch + 1) % self.display_step == 0:
                    samples = sess.run(self.g_sample,
                                       feed_dict={self.g_inputs: sample_noise})
                    samples = samples / 2 + 0.5
                    # 					print(samples[0])
                    fig = self.visualize_result(samples)
                    plt.savefig(self.pic_path +
                                '{}.png'.format(str(epoch + 1).zfill(4)),
                                bbox_inches='tight')
                    plt.close(fig)

            self.saver.save(
                sess,
                './model/model_' + str(end_epoch) + '/model_' + str(end_epoch))