Exemplos de DataUtil.create_vocabulary_word2vec em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: utils

Classe / Tipo: DataUtil

Método / Função: create_vocabulary_word2vec

Exemplos em hotexamples.com: 2

DataUtil.create_vocabulary_word2vec em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de utils.DataUtil.create_vocabulary_word2vec em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

DataUtil(13)

create_vocabulary_word2vec(2)

prepare_data(2)

convertCaptionI2V(1)

gen_dict(1)

Métodos Frequentes

DataUtil (13)

create_vocabulary_word2vec (2)

prepare_data (2)

convertCaptionI2V (1)

gen_dict (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: main_caption_s2s_step.py Projeto: youjiangxu/VideoCaptioning

def main(hf, f_type, capl=16, d_w2v=512, output_dim=512, feature_shape=None, lr=0.01, batch_size=64, total_epoch=100, file=None, pretrained_model=None): ''' capl: the length of caption ''' # Create vocabulary v2i, train_data, val_data, test_data = DataUtil.create_vocabulary_word2vec( file, capl=capl, v2i={ '': 0, 'UNK': 1, 'BOS': 2, 'EOS': 3 }) i2v = {i: v for v, i in v2i.items()} print('building model ...') voc_size = len(v2i) input_video = tf.placeholder(tf.float32, shape=(None, ) + feature_shape, name='input_video') input_captions = tf.placeholder(tf.int32, shape=(None, capl), name='input_captions') y = tf.placeholder(tf.int32, shape=(None, capl, len(v2i))) captionModel = CaptionModel.CaptionModel(input_video, input_captions, voc_size, d_w2v, output_dim) predict_score, predict_words = captionModel.build_model() loss = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=predict_score) loss = tf.reduce_mean(loss) + sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) optimizer = tf.train.RMSPropOptimizer(lr, decay=0.9, momentum=0.0, epsilon=1e-8) train = optimizer.minimize(loss) ''' configure && runtime environment ''' config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.3 # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) config.log_device_placement = False sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) with sess.as_default(): saver = tf.train.Saver(sharded=True, max_to_keep=total_epoch) if pretrained_model is not None: saver.restore(sess, pretrained_model) print('restore pre trained file:' + pretrained_model) for epoch in xrange(total_epoch): # # shuffle print('Epoch: %d/%d, Batch_size: %d' % (epoch + 1, total_epoch, batch_size)) # # train phase tic = time.time() total_loss = exe_train(sess, train_data, batch_size, v2i, hf, feature_shape, train, loss, input_video, input_captions, y, capl=capl) print(' --Train--, Loss: %.5f, .......Time:%.3f' % (total_loss, time.time() - tic)) tic = time.time() js = exe_test(sess, test_data, batch_size, v2i, i2v, hf, feature_shape, predict_words, input_video, input_captions, y, capl=capl) print(' --Val--, .......Time:%.3f' % (time.time() - tic)) #save model export_path = '/home/xyj/usr/local/saved_model/caption/s2s' + '_' + f_type + '/' + 'lr' + str( lr) + '_f' + str(feature_shape[0]) if not os.path.exists(export_path + '/model'): os.makedirs(export_path + '/model') print('mkdir %s' % export_path + '/model') if not os.path.exists(export_path + '/res'): os.makedirs(export_path + '/res') print('mkdir %s' % export_path + '/res') # eval res_path = export_path + '/res/' + f_type + '_E' + str(epoch + 1) + '.json' evaluate_mode_by_shell(res_path, js) save_path = saver.save( sess, export_path + '/model/' + 'E' + str(epoch + 1) + '_L' + str(total_loss) + '.ckpt') print("Model saved in file: %s" % save_path)

Exemplo n.º 2

0

Exibir arquivo

def main(hf,f_type, reduction_dim=512, centers_num = 32, capl=16, d_w2v=512, output_dim=512, batch_size=64,total_epoch=100, file=None,pretrained_model=None): # Create vocabulary v2i, train_data, val_data, test_data = DataUtil.create_vocabulary_word2vec(file=file, capl=capl, v2i={'': 0, 'UNK':1,'BOS':2, 'EOS':3}) i2v = {i:v for v,i in v2i.items()} print('building model ...') voc_size = len(v2i) input_video = tf.placeholder(tf.float32, shape=(None,)+feature_shape,name='input_video') input_captions = tf.placeholder(tf.int32, shape=(None,capl), name='input_captions') y = tf.placeholder(tf.int32,shape=(None, capl)) if args.soft: captionModel = SamModel.SoftModel(input_video, input_captions, voc_size, d_w2v, output_dim, reduction_dim=reduction_dim, centers_num=centers_num, done_token=v2i['EOS'], max_len = capl, beamsearch_batchsize = 1, beam_size=5) else: captionModel = SamModel.HardModel(input_video, input_captions, voc_size, d_w2v, output_dim, reduction_dim=reduction_dim, centers_num=centers_num, done_token=v2i['EOS'], max_len = capl, beamsearch_batchsize = 1, beam_size=5) predict_score, predict_words, loss_mask, finished_beam, logprobs_finished_beams, past_symbols = captionModel.build_model() loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=predict_score) loss = tf.reduce_sum(loss,reduction_indices=[-1])/tf.reduce_sum(loss_mask,reduction_indices=[-1]) loss = tf.reduce_mean(loss)+sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) optimizer = tf.train.AdamOptimizer(learning_rate=lr,beta1=0.9,beta2=0.999,epsilon=1e-08,use_locking=False,name='Adam') gvs = optimizer.compute_gradients(loss) capped_gvs = [(tf.clip_by_global_norm([grad], 10)[0][0], var) for grad, var in gvs ] train = optimizer.apply_gradients(capped_gvs) tf.summary.scalar('cross_entropy',loss) ''' configure && runtime environment ''' config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.6 config.log_device_placement=False sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) ''' tensorboard configure ''' merged = tf.summary.merge_all() export_path = 'saved_model/mvad/'+f_type+'/'+'lr'+str(lr)+'_f'+str(feature_shape[0])+'_B'+str(batch_size) if not os.path.exists(export_path+'/log'): os.makedirs(export_path+'/log') print('mkdir %s' %export_path+'/log') print('building writer') train_writer = tf.summary.FileWriter(export_path + '/log', sess.graph) with sess.as_default(): saver = tf.train.Saver(sharded=True,max_to_keep=total_epoch) if pretrained_model is not None: saver.restore(sess, pretrained_model) print('restore pre trained file:' + pretrained_model) for epoch in xrange(total_epoch): # # shuffle print('Epoch: %d/%d, Batch_size: %d' %(epoch+1,total_epoch,batch_size)) # # train phase tic = time.time() total_loss = exe_train(sess, train_data, epoch, batch_size, v2i, hf, feature_shape, train, loss, input_video, input_captions, y, merged, train_writer, bidirectional=bidirectional, step=step, capl=capl,f_feature=f_feature,b_feature=b_feature, avg_feature=avg_feature,input_feature=input_feature,cond1=cond1,threshold=threshold, temp_t=temp_t, alpha_t=alpha_t) print(' --Train--, Loss: %.5f, .......Time:%.3f' %(total_loss,time.time()-tic)) # tic = time.time() # js = exe_test(sess, test_data, batch_size, v2i, i2v, hf, feature_shape, # predict_words, input_video, input_captions, y, step=step, capl=capl) # print(' --Val--, .......Time:%.3f' %(time.time()-tic)) # beamsearch tic = time.time() print('beam searching ...') js = beamsearch_exe_test(sess, test_data, 1, v2i, i2v, hf, feature_shape, predict_words, input_video, input_captions, y, finished_beam, logprobs_finished_beams, past_symbols, step=step, capl=capl) print(' --Val--, .......Time:%.3f' %(time.time()-tic)) #save model if not os.path.exists(export_path+'/model'): os.makedirs(export_path+'/model') print('mkdir %s' %export_path+'/model') if not os.path.exists(export_path+'/res'): os.makedirs(export_path+'/res') print('mkdir %s' %export_path+'/res') # eval res_path = export_path+'/res/E'+str(epoch+1)+'.json' evaluate_mode_by_shell(res_path,js) save_path = saver.save(sess, export_path+'/model/'+'E'+str(epoch+1)+'_L'+str(total_loss)+'.ckpt') print("Model saved in file: %s" % save_path)