def create_data_processor(is_undersampled, is_oversampled, seed=0): global DataProcessor if is_undersampled: DataProcessor = data_processor.DataProcessor( TrainData).with_scaling().with_undersampling(seed) else: if is_oversampled: DataProcessor = data_processor.DataProcessor( TrainData).with_scaling().with_oversampling(seed) else: DataProcessor = data_processor.DataProcessor( TrainData).with_scaling()
print("Batch size: " + str(batch_size)) print("Epochs: " + str(num_epochs)) print("Evaluate every steps: " + str(evaluate_steps)) print("Early Stopping: " + str(early_stopping)) print("Patience: " + str(patience)) # Data set parameters vocab_size = experiment_params['vocab_size'] max_seq_length = experiment_params['max_seq_length'] to_tokens = experiment_params['to_tokens'] embedding_dim = experiment_params['embedding_dim'] embedding_type = experiment_params['embedding_type'] embedding_source = experiment_params['embedding_source'] # Initialize the dataset and embedding processor data_set = data_processor.DataProcessor(task_name, dataset_dir, max_seq_length, vocab_size=vocab_size, to_tokens=to_tokens) embedding = embedding_processor.get_embedding(embedding_type) # If dataset folder is empty get the metadata and datasets if not os.listdir(dataset_dir): data_set.get_dataset() # Load the metadata vocabulary, labels = data_set.load_metadata() # Generate the embedding matrix embedding_matrix = embedding.get_embedding_matrix(embeddings_dir, embedding_source, embedding_dim, vocabulary) # Loop over each experiment in the optimiser for experiment in model_optimiser.get_experiments(project_name=experiment_params['project_name'], workspace="nathanduran", auto_output_logging='simple'):
import torch.optim as optim import torch.utils.data as torch_data from torch.autograd import Variable import matplotlib.pyplot as plt import time import numpy as np import global_define import network as nw import data_processor import torch import torch.nn as nn IfInitial = False dp = data_processor.DataProcessor() if IfInitial: dp.init_data() print('initial done!') # get training data and test data train_set_np = np.load("data/train_set.npy") train_set_label_np = np.load("data/train_set_label.npy") test_set_np = np.load("data/test_set.npy") test_set_label_np = np.load("data/test_set_label.npy") # network G = nw.Generator().cuda() D = nw.Discriminator().cuda() G.weight_init(mean=0, std=0.01) D.weight_init(mean=0, std=0.01)
import data_processor as dp import recommendation as rec import numpy as np processor = dp.DataProcessor() print processor.data print processor.simi(0, 1) print processor.simi(3, 1) print processor.simi(4, 1) recomend = rec.Recommendation() print recomend.get_non_watched_movies(1)
def train(self, epochs, batch_size, g_iter, d_iter, model_dir, model_name): print('Training . . .') dp = data_processor.DataProcessor() sample_noise = self.get_noise(5 * 5) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(model_dir) print(ckpt) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Reloading model parameters...') self.saver.restore(sess, ckpt.model_checkpoint_path) for epoch in range(epochs): d_loss, g_loss = 0, 0 batched_img, batched_label, batched_wrong_label = dp.get_batch( batch_size, d_iter) dataset = list( zip(batched_img, batched_label, batched_wrong_label)) for b, (img, label, wrong_label) in enumerate(dataset): z = self.get_noise(batch_size) for it in range(d_iter): _, d_loss, d_real, d_fake = sess.run( [ self.d_optim, self.d_loss, self.d_real_loss, self.d_fake_loss ], feed_dict={ self.g_inputs: z, self.d_inputs: img, self.label: label, self.wrong_label: wrong_label }) # print('Epoch [{:>2}/{:>2}] | Batch [{:>3}/{:>3}] | Iter [{:>2}/{:>2}] | W_dist: {:.6f}' # .format(epoch+1, end_epoch, b+1, len(batched_img), it+1, d_iter, w_distance), end='\r') z = self.get_noise(batch_size) for it in range(g_iter): _, g_loss = sess.run([self.g_optim, self.g_loss], feed_dict={ self.g_inputs: z, self.label: label }) # print('Epoch [{:>2}/{:>2}] | Batch [{:>3}/{:>3}] | Iter [{:>2}/{:>2}] | G_loss: {:.6f}' # .format(epoch+1, end_epoch, b+1, len(batched_img), it+1, g_iter, g_loss), end='\r') print( 'Epoch [{:>2}/{:>2}] | Batch [{:>4}/{:>4}] | D_loss: {:.6f} | G_loss: {:.6f} | d_real: {:.6f} | d_fake: {:.6f}' .format(epoch + 1, epochs, b + 1, len(batched_img), d_loss, g_loss, d_real, d_fake)) if (epoch + 1) % self.display_step == 0: samples = sess.run(self.g_infer, feed_dict={ self.g_inputs: sample_noise, self.label: self.test_labels }) samples = samples / 2 + 0.5 fig = self.visualize_result(samples) plt.savefig(self.pic_path + '{}.png'.format(str(epoch + 1).zfill(4)), bbox_inches='tight') plt.close(fig) if model_name != '': # if os.path.exists('./model') self.saver.save(sess, './model/{}/{}'.format(model_name, model_name)) print('Model saved at \'{}\''.format('./model/' + model_name))
vocab_size = experiment_params['vocab_size'] max_seq_length = experiment_params['max_seq_length'] to_tokens = experiment_params['to_tokens'] to_lower = experiment_params['to_lower'] use_punct = experiment_params['use_punct'] train_embeddings = experiment_params['train_embeddings'] embedding_dim = experiment_params['embedding_dim'] embedding_type = experiment_params['embedding_type'] embedding_source = experiment_params['embedding_source'] use_crf = model_params['use_crf'] if 'use_crf' in model_params else False # Initialize the dataset processor data_set = data_processor.DataProcessor(task_name, dataset_dir, max_seq_length, vocab_size=vocab_size, to_tokens=to_tokens, to_lower=to_lower, use_punct=use_punct) # If dataset folder is empty get the metadata and datasets to .npz files if not os.listdir(dataset_dir): data_set.get_dataset() # Load the metadata vocabulary, labels = data_set.load_metadata() # Generate the embedding matrix embeddings = embedding_processor.get_embedding(embeddings_dir, embedding_type, embedding_source,
def train(self, start_epoch, epochs, batch_size, g_iter, d_iter, model_dir): dp = data_processor.DataProcessor() sample_noise = np.random.normal(0, 1, (5 * 5, self.noise_dim)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(model_dir) print(ckpt) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('Reloading model parameters...') self.saver.restore(sess, ckpt.model_checkpoint_path) end_epoch = start_epoch + epochs for epoch in range(start_epoch, end_epoch): w_distance, g_loss = 0, 0 batched_img = dp.get_batch(batch_size, d_iter) for b, img in enumerate(batched_img): for it in range(d_iter): z = np.random.normal(0, 1, (batch_size, self.noise_dim)) _, _, w_distance, d_real, d_fake = sess.run( [ self.d_optim, self.clip_D, self.w_distance, tf.reduce_mean(self.d_real), tf.reduce_mean(self.d_fake) ], feed_dict={ self.g_inputs: z, self.d_inputs: img }) # print('Epoch [{:>2}/{:>2}] | Batch [{:>3}/{:>3}] | Iter [{:>2}/{:>2}] | W_dist: {:.6f}' # .format(epoch+1, end_epoch, b+1, len(batched_img), it+1, d_iter, w_distance), end='\r') for it in range(g_iter): z = np.random.normal(0, 1, (batch_size, self.noise_dim)) _, g_loss = sess.run([self.g_optim, self.g_loss], feed_dict={self.g_inputs: z}) # print('Epoch [{:>2}/{:>2}] | Batch [{:>3}/{:>3}] | Iter [{:>2}/{:>2}] | G_loss: {:.6f}' # .format(epoch+1, end_epoch, b+1, len(batched_img), it+1, g_iter, g_loss), end='\r') print( 'Epoch [{:>2}/{:>2}] | W_dist: {:.6f} | G_loss: {:.6f} | d_real: {:.6f} | d_fake: {:.6f}' .format(epoch + 1, end_epoch, w_distance, g_loss, d_real, d_fake)) if (epoch + 1) % self.display_step == 0: samples = sess.run(self.g_sample, feed_dict={self.g_inputs: sample_noise}) samples = samples / 2 + 0.5 # print(samples[0]) fig = self.visualize_result(samples) plt.savefig(self.pic_path + '{}.png'.format(str(epoch + 1).zfill(4)), bbox_inches='tight') plt.close(fig) self.saver.save( sess, './model/model_' + str(end_epoch) + '/model_' + str(end_epoch))