Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--make_data', action='store_true')
    args = parser.parse_args()
    if args.make_data:
        make_data(get_base_param())
    prepare(get_base_param())
    
    #save_param
    common_io.dump_to_file(get_base_param()['param_save_path'],get_base_param())
    print 'have a good day'
data_set = {}
data_set['image_text.train'] = label_list1
data_set['image_text.val'] = label_list2


vocab_list = []
for vac in label_list1+label_list2:
    tmp = vac.split('\t')
    if len(tmp)>1:
        vocab_list.append(tmp[1])
    else: 
        vocab_list.append("")
    
misc = statistic_vocabulary.preProBuildWordVocab(vocab_list)
common_io.dump_to_file(vocab_out , misc) 

#misc = common_io.load_from_file(vocab_out)
wordtoix = misc['wordtoix']
print "vocab size"
print len(wordtoix)

for file_name,label_list in data_set.items():
    print 'Starting %s' % file_name

    ix_label_out_file = nlpcaffe_data_out_dir + file_name

    ix_label_out = []

    for i in xrange(0,len(label_list)):
        labels = label_list[i]
Exemplo n.º 3
0
def main():
    
    
    label_list = common_io.read_txt_lines( text_file_in ,remove_space = True)
    misc = preProBuildWordVocab(label_list, word_count_threshold)
    common_io.dump_to_file(vocab_out , misc)