def __init__(self, dim_time, dim_input): model = {} model['noise_std'] = 1e-2 p_array = [0.4, 0.3, 0.5, 0.5, 0.4, 0.3, 0.8, 0.6] model["p"] = [p_array[i] for i in range(dim_input)] model["t"] = 78 v_array = [0.03, 0.08, 0.15, 0.2, 0.2, 0.1, 0.1, 0.25] model["v"] = [v_array[i] for i in range(dim_input)] model['ksi_mean'] = 0 model['ksi_std'] = 1 model['tau_mean'] = 0 model['tau_std'] = 3 model['geodesic'] = sigmoid # A matrix (1 source) a_v_array = [1, -1, 1, -1, 1, -1, 1, -1] a_v = [a_v_array[i] for i in range(dim_input)] model["a_matrix"] = ( np.array(a_v) - np.array(a_v).dot(model["v"]) / (np.array(model["v"]).dot(model["v"])) * np.array(model["v"])) model['sources_std'] = 2e-2 # Cohort cohort = {} cohort['n_visits'] = dim_time cohort['duration'] = 6 cohort['patient_sd_begin_age'] = 2 cohort['patient_mean_begin_age'] = 80 - cohort['duration'] / 2. cohort['name'] = "PPMI-Clone-2modalities" cohort['n_patients'] = int(1e6) self.data_generator = Data_Generator(cohort, model) self.patient = 0
def main(unused_argv): data_config = configuration.DataConfig().config data_gen = Data_Generator( processed_video_dir=data_config["processed_video_dir"], caption_file=data_config["caption_file"], unique_freq_cutoff=data_config["unique_frequency_cutoff"], max_caption_len=data_config["max_caption_length"]) data_gen.load_vocabulary(data_config["caption_data_dir"]) data_gen.load_dataset(data_config["caption_data_dir"]) #data_gen.build_dataset() assert FLAGS.dataset in ["val", "test", "train"] if FLAGS.max_len_captions: max_len = FLAGS.max_len_captions else: max_len = data_config['max_caption_length'] model_config = configuration.ModelConfig(data_gen).config model = Model_S2VT( num_frames=model_config["num_frames"], image_width=model_config["image_width"], image_height=model_config["image_height"], image_channels=model_config["image_channels"], num_caption_unroll=model_config["num_caption_unroll"], num_last_layer_units=model_config["num_last_layer_units"], image_embedding_size=model_config["image_embedding_size"], word_embedding_size=model_config["word_embedding_size"], hidden_size_lstm1=model_config["hidden_size_lstm1"], hidden_size_lstm2=model_config["hidden_size_lstm2"], vocab_size=model_config["vocab_size"], initializer_scale=model_config["initializer_scale"], learning_rate=model_config["learning_rate"]) model.build() summary_op = tf.summary.merge(model.summaries) gen_caption = [] infer_util = Inference(model, data_gen.word_to_idx, data_gen.idx_to_word) with tf.Session() as sess: train_writer = tf.summary.FileWriter(data_config["train_log_dir"], sess.graph) saver = tf.train.Saver(max_to_keep=20, keep_checkpoint_every_n_hours=0.5) if FLAGS.checkpoint_model: model_path = FLAGS.checkpoint_model else: model_path = tf.train.latest_checkpoint( data_config["checkpoint_dir"]) if model_path != None: print("Restoring weights from %s" % model_path) saver.restore(sess, model_path) else: print("No checkpoint found. Exiting") return if FLAGS.max_captions: max_iter = FLAGS.max_captions else: max_iter = len(data_gen.dataset[ FLAGS.dataset]) + 10 #+10 is just to be safe ;) iter = 0 btch = 0 video_paths = { i["file_name"]: i["path"] for i in data_gen.dataset[FLAGS.dataset] } video_files = list(video_paths.keys()) for btch in range(0, len(video_files), FLAGS.batch_size): print("Processing batch %d" % (int(btch / FLAGS.batch_size) + 1)) start = btch end = min(len(video_files), btch + FLAGS.batch_size) dataset = {} dataset["video"] = np.asarray([ np.load(video_paths[video_files[i]]) for i in range(start, end) ]) dataset["path"] = [ video_paths[video_files[i]] for i in range(start, end) ] dataset["file"] = [video_files[i] for i in range(start, end)] dataset["gen_caption"] = infer_util.generate_caption_batch( sess, dataset["video"], max_len=max_len) for i in range(len(dataset['gen_caption'])): dictionary = {} dictionary["gen_caption"] = dataset['gen_caption'][i] dictionary["file_name"] = dataset['file'][i] dictionary["path"] = dataset['path'][i] gen_caption.append(dictionary) iter += 1 if iter >= max_iter: break if iter >= max_iter: break with open( os.path.join(data_config["result_dir"], "generated_caption.json"), "w") as fl: fl.write(json.dumps(gen_caption, indent=4, sort_keys=True))
def run(): """Runs evaluation in a loop, and logs summaries to TensorBoard.""" # Create the evaluation directory if it doesn't exist. data_config = configuration.DataConfig().config data_gen = Data_Generator(data_config["processed_video_dir"], data_config["caption_file"], data_config["unique_frequency_cutoff"], data_config["max_caption_length"]) data_gen.load_vocabulary(data_config["caption_data_dir"]) data_gen.load_dataset(data_config["caption_data_dir"]) FLAGS.checkpoint_dir = data_config["checkpoint_dir"] eval_dir = data_config["val_log_dir"] if not tf.gfile.IsDirectory(eval_dir): tf.logging.info("Creating eval directory: %s", eval_dir) tf.gfile.MakeDirs(eval_dir) g = tf.Graph() with g.as_default(): # Build the model for evaluation. model_config = configuration.ModelConfig(data_gen).config model = Model_S2VT(**model_config) model.build() # Create the Saver to restore model Variables. saver = tf.train.Saver() # Create the summary operation and the summary writer. val_writer = tf.summary.FileWriter(data_config["val_log_dir"]) g.finalize() if (FLAGS.eval_all_models): model_names = list( set([ n.split(".")[0] for n in os.listdir(data_config["checkpoint_dir"]) if "model" in n ])) model_names.sort(key=lambda x: int(x[6:])) for name in model_names: FLAGS.checkpoint_file = os.path.join( data_config["checkpoint_dir"], name) tf.logging.info( "Starting evaluation of %s at " % (name) + time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())) run_once(model, saver, val_writer, data_gen) else: # Run a new evaluation run every eval_interval_secs. while True: start = time.time() tf.logging.info( "Starting evaluation at " + time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())) run_once(model, saver, val_writer, data_gen) time_to_next_eval = start + FLAGS.eval_interval_secs - time.time( ) if time_to_next_eval > 0: time.sleep(time_to_next_eval)
def main(): if len(sys.argv) < 2: print("Please type in which data to use: sinusoid || linear") sys.exit() #maml_hyperparameters torch.autograd.set_detect_anomaly(True) meta_lr = 0.001 inner_lr = 0.001 update_num = 1 meta_epochs = 500 plot_size = meta_epochs // 100 start_time = time.time() #data task_num, data_num = 25, 5 data = Data_Generator(num_task=task_num, num_samples_per_task=data_num, task_type=sys.argv[1]) #model model = Predictor() optimizer = optim.Adam(model.parameters(), lr=meta_lr) iterations = 3 criterion = nn.MSELoss() mode = "maml" #proximal regualization inner_iterations = 5 #da_related proposed = True u = [0.00001 / math.sqrt(k + 1) for k in range(meta_epochs)] grad_list = [] linear_func = 0 #used for quadratic term init_parameters = [] for ele in model.state_dict().keys(): init_parameters.append(model.state_dict()[ele]) #beginning of the training for epoch in range(meta_epochs): if sys.argv[1] == "linear": pass elif sys.argv[1] == "sinusoid": inputs, outputs, amp, phase = data.generate() losses = [0] * task_num if epoch % plot_size == 0: sys.stdout.write("\r%d" % epoch) sys.stdout.flush() for i in range(task_num): if mode == "maml": #regular maml loss = F.mse_loss( torch.tensor(outputs[i][:data_num], dtype=torch.float32), model( torch.tensor(inputs[i][:data_num], dtype=torch.float32))) #if create_graph=False, first-order MAML? grad = torch.autograd.grad(loss, model.parameters(), create_graph=True) fast_weights = list( map(lambda p: p[1] - inner_lr * p[0], zip(grad, model.parameters()))) losses[i] = F.mse_loss( torch.tensor(outputs[i][data_num:], dtype=torch.float32), model(torch.tensor(inputs[i][data_num:], dtype=torch.float32), vars=fast_weights)) """ elif mode=="proximal": temp_model=Predictor() loss=F.mse_loss(torch.tensor(outputs[i][:data_num],dtype =torch.float32),temp_model(torch.tensor(inputs[i][:data_num],dtype = torch.float32))) regularization =torch.tensor(list(map(lambda p: ((p[1] -p[0])**2).sum(), zip(temp_model.parameters(), model.parameters())))).sum() in_optimizer=optim.Adam(temp_model.parameters(),lr=inner_lr) total=loss+regularization for _ in range(inner_iterations): in_optimizer.zero_grad() total.backward(retain_graph=True) in_optimizer.step() losses[i]=total else: #dual averaging loss=F.mse_loss(torch.tensor(outputs[i][:data_num],dtype =torch.float32),model(torch.tensor(inputs[i][:data_num],dtype = torch.float32))) #if create_graph=False, first-order MAML? grad = torch.autograd.grad(loss, model.parameters(),create_graph=True) fast_weights = list(map(lambda p: p[1] - inner_lr * p[0], zip(grad, model.parameters()))) losses[i]=F.mse_loss(torch.tensor(outputs[i][data_num:],dtype=torch.float32),model(torch.tensor(inputs[i][data_num:],dtype=torch.float32),vars=fast_weights)) """ sum_losses = sum(losses) for _ in range(iterations): optimizer.zero_grad() sum_losses.backward(retain_graph=True) optimizer.step() """ else: grad = torch.autograd.grad(sum_losses, model.parameters()) "initializing" if epoch==0: grad_list=grad else: grad_list=list(map(lambda p: p[1]+p[0], zip(grad, grad_list))) quadratic=0 for param,init in zip(model.parameters(),init_parameters): quadratic+=criterion(param,init) linear=0 for param,g in zip(model.parameters(),grad_list): linear+=torch.sum(g*param) real_function=1.0/(epoch+1)*linear+u[epoch]*quadratic real_function.backward() optimizer.step() """ test_task_data_num, test_task_num = 20, 1 test_updates = 1 plot_size = 100 test_inputs, test_outputs, test_amp, test_phase = data.generate() x = np.linspace(-5.0, 5.0, num=100).reshape(-1, 1) y_before = model(torch.tensor(x, dtype=torch.float32).reshape( -1, 1)).detach().numpy() plt.plot(x, y_before, label="before finetuning") optimizer = optim.Adam(model.parameters(), lr=meta_lr) for i in range(test_updates): optimizer.zero_grad() for _ in range(iterations): loss = criterion( torch.tensor(test_outputs[0][:test_task_data_num], dtype=torch.float32), model( torch.tensor(test_inputs[0][:test_task_data_num], dtype=torch.float32))) loss.backward() optimizer.step() loss = criterion( torch.tensor(test_outputs[0][test_task_data_num:], dtype=torch.float32), model.forward( torch.tensor(test_inputs[0][test_task_data_num:], dtype=torch.float32))) y_predict = model(torch.tensor(x, dtype=torch.float32).reshape( -1, 1)).detach().numpy() total_se = np.square( y_predict - np.linspace(-5.0, 5.0, num=plot_size).reshape(-1, 1)).mean() print("\nThe execution time:", time.time() - start_time) print("\nThe total squared error", total_se) plt.plot(x, test_phase[0] * np.sin(x - test_amp[0]), label="oracle") plt.plot(x, y_predict, label="after fine-tuning") plt.legend() plt.show()
def main(unused_argv): data_config = configuration.DataConfig().config data_gen = Data_Generator( processed_video_dir=data_config["processed_video_dir"], caption_file=data_config["caption_file"], unique_freq_cutoff=data_config["unique_frequency_cutoff"], max_caption_len=data_config["max_caption_length"]) data_gen.load_vocabulary(data_config["caption_data_dir"]) data_gen.load_dataset(data_config["caption_data_dir"]) #data_gen.build_dataset() model_config = configuration.ModelConfig(data_gen).config model = Model_S2VT( num_frames=model_config["num_frames"], image_width=model_config["image_width"], image_height=model_config["image_height"], image_channels=model_config["image_channels"], num_caption_unroll=model_config["num_caption_unroll"], num_last_layer_units=model_config["num_last_layer_units"], image_embedding_size=model_config["image_embedding_size"], word_embedding_size=model_config["word_embedding_size"], hidden_size_lstm1=model_config["hidden_size_lstm1"], hidden_size_lstm2=model_config["hidden_size_lstm2"], vocab_size=model_config["vocab_size"], initializer_scale=model_config["initializer_scale"], learning_rate=model_config["learning_rate"]) model.build() summary_op = tf.summary.merge(model.summaries) with tf.Session() as sess: train_writer = tf.summary.FileWriter(data_config["train_log_dir"], sess.graph) saver = tf.train.Saver(max_to_keep=20, keep_checkpoint_every_n_hours=0.5) if FLAGS.checkpoint_model: model_path = FLAGS.checkpoint_model else: model_path = tf.train.latest_checkpoint( data_config["checkpoint_dir"]) if model_path != None: print("Restoring weights from %s" % model_path) saver.restore(sess, model_path) else: print("No checkpoint found. Intializing Variables from scratch") sess.run(tf.global_variables_initializer()) data_gen.init_batch(int(FLAGS.batch_size), "train") if FLAGS.save_freq: iter_to_save = np.int32(FLAGS.save_freq) else: iter_to_save = int(data_gen.iter_per_epoch["train"] / 2) for epoch in range(int(FLAGS.num_epochs)): for i in range(data_gen.iter_per_epoch["train"]): start_time = time.time() dataset = data_gen.get_next_batch("train") data_gen_time = time.time() - start_time feed_dict = {} feed_dict[model.caption_input] = dataset["indexed_caption"] feed_dict[model.caption_mask] = dataset["caption_mask"] feed_dict[model.rnn_input] = dataset["video"] if np.mod(i + 1, int(FLAGS.summary_freq)) == 0: print("Writing Summary") summary, loss, global_step, _ = sess.run( [ summary_op, model.batch_loss, model.global_step, model.train_step ], feed_dict=feed_dict) train_writer.add_summary(summary, global_step) time_global_step = tf.Summary() value = time_global_step.value.add() value.simple_value = (time.time() - start_time) value.tag = "global_step/time_global_step" train_writer.add_summary(time_global_step, global_step) else: loss, global_step, _ = sess.run([ model.batch_loss, model.global_step, model.train_step ], feed_dict=feed_dict) print("global_step = ", global_step, ", loss = ", loss, ', Elapsed time: %.2f' % (time.time() - start_time)) if np.mod(i + 1, iter_to_save) == 0: print("Saving the model ...") saver.save(sess, os.path.join(data_config["checkpoint_dir"], 'model'), global_step=int(global_step)) print("Saving the model ...") saver.save(sess, os.path.join(data_config["checkpoint_dir"], 'model'), global_step=int(global_step))
from __future__ import print_function from __future__ import absolute_import import json import os import configuration from data_generator import Data_Generator def is_ascii(s): return all(ord(c) < 128 for c in s) data_config = configuration.DataConfig().config data_gen = Data_Generator(data_config["processed_video_dir"], data_config["caption_file"], data_config["unique_frequency_cutoff"], data_config["max_caption_length"]) data_gen.load_vocabulary(data_config["caption_data_dir"]) data_gen.load_dataset(data_config["caption_data_dir"]) ref = { 'info': {}, 'images': [], 'licenses': [], 'type': 'captions', 'annotations': [] } ref['info'] = { 'contributor': 'Suyash Agrawal',
import numpy as np import random import matplotlib.pyplot as plt from maml_linear_regression import maml_linear_regression from data_generator import Data_Generator data_generator=Data_Generator() dim=10 real_theta=np.random.rand(dim) theta,datas,outputs=data_generator.generate_linear(real_theta) maml=maml_linear_regression(10,False) #タスク数、データ数(False数の場合10-1000のデータをランダムに選択) #X,y,theta=maml.datas[0] #print(maml.meta_gradient(X,y,theta)) print(maml.fit()) print(maml.real_theta) x=np.arange(len(maml.process)) plt.plot(x,maml.test_process,label='test set') plt.plot(x,maml.process,label='val set') plt.plot(x,maml.real_process) plt.legend(loc='upper right') plt.ylim(0,1000) plt.show() """
from data_loader import Data_Loader from data_generator import Data_Generator if __name__ == "__main__": # Enter how many rows to generate limit = int(input("Enter number of rows to generate: ")) # Initializing classes, Data_Loader(), Data_Generator loader = Data_Loader() generator = Data_Generator(limit) # Loads signle names from a csv file into a dataframe filename = 'names1.csv' df = loader.read_csvfile(filename) # Generates firstnames, middlenames, lastnames for every religion and gender for religion in loader.religion: for gender in loader.gender: firstnames, middlenames, lastnames = loader.get_names_by_religion_gender( religion, gender) for i in range(2): if i % 2 == 0: # Generates 2 word length names if gender == 'unisex': gender1 = 'male' firstnames1, middlenames1, lastnames1 = loader.get_names_by_religion_gender( religion, gender1) first_names1 = firstnames + firstnames1 last_names1 = lastnames + lastnames1