def split(X, Y, x_to_classify, name, call_plot, key): # Iterations if iteration: perform_iteration(X, Y, 100, x_to_classify, name) return # 70/30 split for training and testing data, X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=42, test_size=0.3, stratify=Y) # Visualise the Training Data On all features, not on some features if call_plot: plot(X_train, Y_train, name, key) models(X_train, X_test, Y_train, Y_test, x_to_classify, name, key)
def generate_slow(self, x, models, dilation_depth, n_repeat, ctx, n=100): dilations = [2**i for i in range(dilation_depth)] * n_repeat res = list(x.asnumpy()) for _ in trange(n): x = nd.array(res[-sum(dilations)-1:],ctx=ctx) y = models(x) res.append(y.argmax(1).asnumpy()[-1]) return res
loader_val = dataset_mini(n_examples, n_episodes, 'val', args) elif dataset == 'tiered': loader_train = dataset_tiered(n_examples, n_episodes, 'train', args) loader_val = dataset_tiered(n_examples, n_episodes, 'val', args) if pkl == 0: print('Load image data rather than PKL') loader_train.load_data() loader_val.load_data() else: print('Load PKL data') loader_train.load_data_pkl() loader_val.load_data_pkl() # construct model m = models(args) ce_loss, acc, sigma_value = m.construct() # train and stepsize global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay(lr, global_step, step_size, gamma, staircase=True) # update ops for batch norm update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = tf.train.AdamOptimizer(learning_rate).minimize( ce_loss, global_step=global_step)
## Getting DB Details for Connection and Initializing DB onject ## logger.info('Initiating Connection with DB with following details: ') #remove_all() sectn = config['DB_DETAILS'] host = sectn["db_host"] db_user = sectn["db_userName"] db_pass = sectn["db_pass"] db_name = sectn["db_name"] logger.info('host: ' + str(host) + '\n' + 'DataBase User: '******'\n' + 'Database Name: ' + str(db_name) + '\n' + 'Collection Name: ' + 'mail_resumes') ########################################### db_inst = models(host, db_name, db_user, db_pass) logger.info('Established Connection, DB Instance: ' + str(db_inst)) ######################################## logger.info("\nInitiating reply object: \n") reply_mail_obj = Reply_Module(inst, db_inst) ########################################## """ def to_Process(pending_att_mail_dict):
def train(opt): # Load data loader = DataLoader(opt) opt.vocab_size = loader.vocab_size opt.seq_length = loader.seq_length # Tensorboard summaries (they're great!) tb_summary_writer = tb and tb.SummaryWriter(opt.checkpoint_path) # Load pretrained model, info file, histories file infos = {} histories = {} if opt.start_from is not None: with open(os.path.join(opt.start_from, 'infos_' + opt.id + '.pkl')) as f: infos = cPickle.load(f) saved_model_opt = infos['opt'] need_be_same = ["rnn_type", "rnn_size", "num_layers"] for checkme in need_be_same: assert vars(saved_model_opt)[checkme] == vars( opt )[checkme], "Command line argument and saved model disagree on '%s' " % checkme if os.path.isfile( os.path.join(opt.start_from, 'histories_' + opt.id + '.pkl')): with open( os.path.join(opt.start_from, 'histories_' + opt.id + '.pkl')) as f: histories = cPickle.load(f) iteration = infos.get('iter', 0) epoch = infos.get('epoch', 0) val_result_history = histories.get('val_result_history', {}) loss_history = histories.get('loss_history', {}) lr_history = histories.get('lr_history', {}) #ss_prob_history = histories.get('ss_prob_history', {}) loader.iterators = infos.get('iterators', loader.iterators) loader.split_ix = infos.get('split_ix', loader.split_ix) if opt.load_best_score == 1: best_val_score = infos.get('best_val_score', None) models = Seq2Seq().cuda() # Create model optimizer = utils.build_optimizer_adam(models.parameters(), opt) update_lr_flag = True sc_flag = False while True: # Update learning rate once per epoch if update_lr_flag: # Assign the learning rate if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0: frac = (epoch - opt.learning_rate_decay_start ) // opt.learning_rate_decay_every decay_factor = opt.learning_rate_decay_rate**frac opt.current_lr = opt.learning_rate * decay_factor else: opt.current_lr = opt.learning_rate utils.set_lr(optimizer, opt.current_lr) # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start ) // opt.scheduled_sampling_increase_every #opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) #model.ss_prob = opt.ss_pro # Load data from train split (0) start = time.time() data = loader.get_batch('train') data_time = time.time() - start start = time.time() # Unpack data torch.cuda.synchronize() tmp = [ data['fc_feats'], data['att_feats'], data['labels'], data['dist'], data['masks'], data['att_masks'] ] tmp = [_ if _ is None else torch.from_numpy(_).cuda() for _ in tmp] fc_feats, att_feats, labels, dist_label, masks, attmasks = tmp labels = labels.long() nd_labels = labels batchsize = fc_feats.size(0) # Forward pass and loss d_steps = 1 g_steps = 1 #print (torch.sum(labels!=0), torch.sum(masks!=0)) if 1: if 1: models.train() optimizer.zero_grad() wordact = models( labels.view(batchsize, -1).transpose(1, 0), labels.view(batchsize, -1).transpose(1, 0), fc_feats) wordact_t = wordact.transpose(1, 0)[:, 1:, :] wordact_t = wordact_t.contiguous().view( wordact_t.size(0) * wordact_t.size(1), -1) labels_flat = labels.view(batchsize, -1) wordclass_v = labels_flat[:, 1:] wordclass_t = wordclass_v.contiguous().view(\ wordclass_v.size(0)*wordclass_v.size(1), -1) loss_xe = F.cross_entropy(wordact_t[...], wordclass_t[...].view(-1)) train_loss = loss_xe train_loss.backward() optimizer.step() if 1: if iteration % opt.print_freq == 1: print('Read data:', time.time() - start) if not sc_flag: print("iter {} (epoch {}), train_loss = {:.3f}, data_time = {:.3f}" \ .format(iteration, epoch, loss_xe, data_time)) else: print("iter {} (epoch {}), avg_reward = {:.3f}, data_time = {:.3f}, time/batch = {:.3f}" \ .format(iteration, epoch, np.mean(reward[:,0]), data_time, total_time)) # Update the iteration and epoch iteration += 1 if data['bounds']['wrapped']: epoch += 1 update_lr_flag = True # Write the training loss summary if (iteration % opt.losses_log_every == 0): add_summary_value(tb_summary_writer, 'train_loss', train_loss, iteration) add_summary_value(tb_summary_writer, 'learning_rate', opt.current_lr, iteration) #add_summary_value(tb_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration) if sc_flag: add_summary_value(tb_summary_writer, 'avg_reward', np.mean(reward[:, 0]), iteration) loss_history[ iteration] = train_loss if not sc_flag else np.mean( reward[:, 0]) lr_history[iteration] = opt.current_lr #ss_prob_history[iteration] = model.ss_prob # Validate and save model if (iteration % opt.save_checkpoint_every == 0): checkpoint_path = os.path.join( opt.checkpoint_path, 'langmodel{:05d}.pth'.format(iteration)) torch.save(models.state_dict(), checkpoint_path) optimizer_path = os.path.join(opt.checkpoint_path, 'optimizer.pth') torch.save(optimizer.state_dict(), optimizer_path)