def learn_embeddings(split=10): _data = args.rescale * utils.load_emb( args.temp_dir, args.data_name, args.pre_train_path, int(args.dimensions / 2), config['nodes']) _network = tdata.TensorDataset( t.LongTensor( np.vstack([ pickle.load( open(args.temp_dir + args.data_name + '_input.p.' + str(i))) for i in range(split) ])), t.LongTensor( np.vstack([ pickle.load( open(args.temp_dir + args.data_name + '_output.p.' + str(i))) for i in range(split) ]))) model = SkipGram({ 'emb_size': int(args.dimensions / 2), 'window_size': 1, 'batch_size': args.batch_size, 'iter': args.iter, 'neg_ratio': 5, 'lr_ratio': args.lrr, 'lr': args.lr, 'network': _network, 'pre_train': _data, 'node_types': config['nodes'], 'edge_types': config['edges'], 'graph_name': args.data_name, 'dump_timer': args.dump_timer, 'data_dir': args.temp_dir, 'mode': args.op, 'map_mode': args.map_func, 'fine_tune': args.fine_tune, 'model_dir': args.model_dir, 'log_dir': args.log_dir }) model.train() return model.output()
def learn_embeddings(): ''' Learn embeddings by optimizing the Skipgram objective using SGD. ''' print('Network Spec:',config) # flexible param interface for tuning more_param = args.more_param # everything separated by underscore, e.g., rescale_0.1_lr_0.02 more_param_dict = {} # {param_key: param_value_str} if more_param != 'None': more_param_list = more_param.split("_") assert len(more_param_list) % 2 == 0 for i in xrange(0, len(more_param_list), 2): more_param_dict[more_param_list[i]] = more_param_list[i+1] rescale_factor = 1. if 'rescale' not in more_param_dict else float(more_param_dict['rescale']) learning_rate = 1. if 'lr' not in more_param_dict else float(more_param_dict['lr']) # please keep default values consistent with records on our google spreadsheet learning_rate_ratio = 16. if 'lrr' not in more_param_dict else float(more_param_dict['lrr']) # please keep default values consistent with records on our google spreadsheet _data = '' if len(args.pre_train_path) > 0: _data = rescale_factor * utils.load_emb(args.data_dir, args.pre_train_path, args.dimensions, args.graph_name, config['nodes']) if args.weighted: _network = tdata.TensorDataset(t.LongTensor(cPickle.load(open(args.data_dir + args.graph_name + '_input.p'))), t.LongTensor(cPickle.load(open(args.data_dir + args.graph_name + '_output.p'))), t.LongTensor(cPickle.load(open(args.data_dir + args.graph_name + '_weight.p')))) else: _network = tdata.TensorDataset(t.LongTensor(cPickle.load(open(args.data_dir + args.graph_name + '_input.p'))), t.LongTensor(cPickle.load(open(args.data_dir + args.graph_name + '_output.p')))) model = SkipGram({'emb_size':args.dimensions, 'weighted':args.weighted, 'window_size':1, 'batch_size':args.batch_size, 'iter':args.iter, 'neg_ratio':5, 'graph_name':args.graph_name, 'dump_timer':args.dump_timer, 'model_dir':args.model_dir, 'log_dir':args.log_dir, 'data_dir':args.data_dir, 'mode':args.op, 'map_mode':args.map_func,'fine_tune':args.fine_tune, 'lr_ratio':learning_rate_ratio, 'lr': learning_rate, 'network':_network, 'more_param': args.more_param, 'pre_train':_data, 'node_types':config['nodes'], 'edge_types':config['edges']}) if args.pre_load_model: pre_load_model = t.load(args.pre_load_model, map_location=lambda storage, loc: storage) model.neg_loss.load_state_dict(pre_load_model) model.cuda() model.train() return
df_pair = utils.load_pair(datadirect, train_txt) df_pair = pd.merge(df_pair, df_lbl, on='label_code', how='left') imgnum = df_pair.shape[0] df_attrname, df_lblattr = utils.load_attr(datadirect, attr_txt, lblattr_txt) df_attr = pd.merge(df_pair[['label_code']], df_lblattr, on='label_code', how='left') attrnum = df_attrname.shape[0] df_lblattr = pd.merge(df_lblattr, df_lbl, on='label_code', how='left') adj_attrsim = utils.create_adjattr(df_lblattr, num_classes) df_lblemb = utils.load_emb(datadirect, lblemb_txt) df_lblemb = pd.merge(df_lblemb, df_lbl, on='label_name', how='left') adj_embsim = utils.create_adjemb(df_lblemb, num_classes) imagepath_list = df_pair['image_path'].tolist() label_list = df_pair['label_index'].tolist() attr_list = df_attr.iloc[:, 1:].values.tolist() imagepath_list, label_list, attr_list = shuffle(imagepath_list, label_list, attr_list, random_state=731) files, labels, attrs = tf.constant(imagepath_list), tf.constant( label_list), tf.constant(attr_list)
if __name__ == '__main__': args = parse_args() arg = {} _data = '' config_name = os.path.join( os.path.dirname(args.data_dir).replace('intermediate', 'input'), args.graph_name.split('_ko_')[0] + '.config') config = utils.read_config(config_name) #config['nodes'] = ['PR', 'AD', 'WO', 'AS', 'GE', 'PE', 'EV', 'PO'] #config['edges'] = [(5, 2), (5, 5), (5, 2), (5, 2), (6, 1), (5, 5), (5, 3), (5, 1), (5, 3), (5, 7), (5, 2), (5, 4), (5, 1), (3, 1), (5, 3), (5, 1), (1, 1), (5, 0), (1, 1), (5, 1), (5, 1), (5, 5), (5, 5), (5, 2), (5, 5)] # baseline score if args.op == -1: _data = utils.load_emb(args.data_dir, args.pre_train_path, args.dimensions, args.graph_name, config['nodes']) #args.op = 1 #print(_data) t.cuda.set_device(int(args.gpu)) type_offset = cPickle.load( open(args.data_dir + args.graph_name + '_offset.p')) in_mapping = cPickle.load( open(args.data_dir + args.graph_name + '_in_mapping.p')) out_mapping = cPickle.load( open(args.data_dir + args.graph_name + '_out_mapping.p')) model = neg.NEG_loss(type_offset=type_offset, node_types=config['nodes'], edge_types=config['edges'], embed_size=args.dimensions,
def train(): query_list, keywords_list = utils.read_data(FLAGS.dataset_path) # shuffle data data = zip(query_list,keywords_list) random.seed(FLAGS.seed) random.shuffle(data) query_list,keywords_list = zip(*data) item2id, id2item, target_vocab_size = utils.load_mappings(query_list, keywords_list,mappings_path=FLAGS.mappings_path, source_vocab_size=FLAGS.source_vocab_size, target_vocab_size=FLAGS.target_vocab_size) vocab_size = len(item2id) print vocab_size,target_vocab_size query,keywords = utils.prepare_dataset(query_list,keywords_list,item2id) data=zip(query,keywords) train_data,dev_data,test_data=data[:int(len(data)*0.8)],data[int(len(data)*0.8):int(len(data)*0.9)],data[int(len(data)*0.9):] train_manager = utils.BatchManager(train_data,FLAGS.batch_size,shuffle=True) dev_manager = utils.BatchManager(dev_data,FLAGS.batch_size,shuffle=False) test_manager = utils.BatchManager(test_data,FLAGS.batch_size,shuffle=False) dev_goldens = keywords_list[int(len(data)*0.8):int(len(data)*0.9)] test_goldens = keywords_list[int(len(data)*0.9):] model_dir = os.path.join(FLAGS.out_dir,'models') summary_dir = os.path.join(FLAGS.out_dir,'summries') if not os.path.exists(FLAGS.out_dir): os.mkdir(FLAGS.out_dir) if not os.path.exists(model_dir): os.mkdir(model_dir) if not os.path.exists(summary_dir): os.mkdir(summary_dir) tfConfig = tf.ConfigProto() tfConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.memory_usage with tf.Session(config=tfConfig) as sess: with tf.variable_scope("root"): train_model=getattr(models,FLAGS.model_name)('train',vocab_size,target_vocab_size,FLAGS.emb_dim, FLAGS.encoder_num_units,FLAGS.encoder_num_layers,FLAGS.decoder_num_units,FLAGS.decoder_num_layers, FLAGS.dropout_emb,FLAGS.dropout_hidden,item2id[utils.START],item2id[utils.END], FLAGS.learning_rate,FLAGS.clip_norm,FLAGS.attention_option,FLAGS.beam_size,FLAGS.optimizer,FLAGS.maximum_iterations) with tf.variable_scope("root",reuse=True): dev_model=getattr(models,FLAGS.model_name)('infer',vocab_size,target_vocab_size,FLAGS.emb_dim, FLAGS.encoder_num_units,FLAGS.encoder_num_layers,FLAGS.decoder_num_units,FLAGS.decoder_num_layers, FLAGS.dropout_emb,FLAGS.dropout_hidden,item2id[utils.START],item2id[utils.END], FLAGS.learning_rate,FLAGS.clip_norm,FLAGS.attention_option,FLAGS.beam_size,FLAGS.optimizer,FLAGS.maximum_iterations) dev_f1_value = tf.placeholder(dtype=tf.float32,name='dev_f1') dev_f1_summary = tf.summary.scalar(name='dev_f1',tensor=dev_f1_value) with tf.variable_scope('root',reuse=True): test_model=getattr(models,FLAGS.model_name)('infer',vocab_size,target_vocab_size,FLAGS.emb_dim, FLAGS.encoder_num_units,FLAGS.encoder_num_layers,FLAGS.decoder_num_units,FLAGS.decoder_num_layers, FLAGS.dropout_emb,FLAGS.dropout_hidden,item2id[utils.START],item2id[utils.END], FLAGS.learning_rate,FLAGS.clip_norm,FLAGS.attention_option,FLAGS.beam_size,FLAGS.optimizer,FLAGS.maximum_iterations) test_f1_value = tf.placeholder(dtype=tf.float32,name='test_f1') test_f1_summary = tf.summary.scalar(name='test_F1',tensor=test_f1_value) with tf.variable_scope('training_procedure'): best_epoch = tf.get_variable('best_epoch',shape=[],initializer=tf.zeros_initializer(),trainable=False,dtype=tf.int32) best_dev_score = tf.get_variable('best_dev_score',shape=[],initializer=tf.zeros_initializer(),trainable=False,dtype=tf.float32) best_test_score = tf.get_variable('best_test_score',shape=[],initializer=tf.zeros_initializer(),trainable=False,dtype=tf.float32) saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(summary_dir) # try: if FLAGS.restore is not False: checkpoint = tf.train.latest_checkpoint(model_dir) saver.restore(sess, checkpoint) print 'Restore model from %s.' % checkpoint else: sess.run(tf.global_variables_initializer()) if os.path.exists(FLAGS.pre_trained_emb_path): print "Loading pre_trained word embeddings from %s" % FLAGS.pre_trained_emb_path pre_embeddings = utils.load_emb(item2id,FLAGS.pre_trained_emb_path,FLAGS.emb_dim) sess.run(train_model.embeddings.assign(pre_embeddings)) del pre_embeddings start_epoch,best_dev_f1, best_test_f1 = sess.run([best_epoch,best_dev_score,best_test_score]) no_improve = 0 print 'Train start!' sess.run(train_model.learning_rate.assign(FLAGS.learning_rate)) for epoch in range(start_epoch+1,FLAGS.max_epoch): # train train_loss = [] bar = ProgressBar(max_value=train_manager.num_batch) for batch_data in bar(train_manager.iter_batch()): batch_loss,summaries,global_step=train_model.train_step(sess,batch_data) #add summaries to tensorboard summary_writer.add_summary(summaries,global_step) train_loss.append(batch_loss) print "Epoch %d finished. Loss: %.4f" % (epoch,np.mean(train_loss)) # dev querys = [] predicts = [] goldens = [] for batch_data in dev_manager.iter_batch(): encoder_inputs, decoder_inputs, encoder_lengths, decoder_lengths = batch_data query_id,golden_id,predict_id=dev_model.eval_step(sess,batch_data) querys.extend(query_id) goldens.extend(golden_id) predicts.extend(predict_id) # dev_p,dev_r,dev_f1 = utils.evaluate_scores(querys,predicts,goldens,id2item,FLAGS.out_dir) dev_p, dev_r, dev_f1 = utils.evaluate_scores2(querys, predicts, dev_goldens,goldens, id2item, FLAGS.out_dir) print "Dev precision / recall / f1 score: %.2f / %.2f / %.2f" % (dev_p,dev_r,dev_f1) # test querys = [] predicts = [] goldens = [] for batch_data in test_manager.iter_batch(): encoder_inputs, decoder_inputs, encoder_lengths, decoder_lengths = batch_data query_id,golden_id,predict_id=test_model.eval_step(sess,batch_data) querys.extend(query_id) goldens.extend(golden_id) predicts.extend(predict_id) test_p,test_r,test_f1 = utils.evaluate_scores(querys,predicts,goldens,id2item,FLAGS.out_dir) #test_p, test_r, test_f1 = utils.evaluate_scores2(querys, predicts, test_goldens,goldens, id2item, FLAGS.out_dir) print "Test precision / recall / f1 score: %.2f / %.2f / %.2f" % (test_p,test_r,test_f1) print "Best dev f1: %.2f test f1: %.2f epoch:%d\n" % (best_dev_f1, best_test_f1,int(sess.run(best_epoch))) summary_writer.add_summary(sess.run(dev_f1_summary,feed_dict={dev_f1_value:dev_f1}),global_step=epoch+1) summary_writer.add_summary(sess.run(test_f1_summary, feed_dict={test_f1_value: dev_f1}),global_step=epoch + 1) if dev_f1 > best_dev_f1: best_dev_f1 = dev_f1 best_test_f1 = test_f1 sess.run(best_epoch.assign(epoch)) sess.run(best_dev_score.assign(best_dev_f1)) sess.run(best_test_score.assign(best_test_f1)) saver.save(sess,os.path.join(model_dir,FLAGS.model_name)) no_improve =0 else: no_improve = no_improve+1 if no_improve >= FLAGS.max_no_improve: break print "Best dev f1: %.2f test f1: %.2f epoch:%d" % (best_dev_f1,best_test_f1,int(best_epoch.eval()))