示例#1
0
num = 0
count = 0
for line in sys.stdin:
    if num % 1000 == 0:
        print(num, file=sys.stderr)
    num += 1
    l = line.rstrip().split('\t')
    img = l[0]
    img_end = IMAGE_FEATURE_LEN + 1
    img_feature = [float(x) for x in l[1:img_end]]
    texts = [x.split('\x01')[0] for x in l[img_end:]]
    for text in texts:
        words = segmentor.Segment(text, FLAGS.seg_method)
        word_ids = [
            vocabulary.id(word) for word in words
            if vocabulary.has(word) or ENCODE_UNK
        ]
        word_ids_length = len(word_ids)
        if num % 1000 == 0:
            #print(libgezi.gbk2utf8('\t'.join(words)), file=sys.stderr)
            print('\t'.join(words), file=sys.stderr)
            print(word_ids, file=sys.stderr)
        if len(word_ids) == 0:
            continue
        word_ids = word_ids[:TEXT_MAX_WORDS]
        if FLAGS.pad:
            word_ids = gezi.pad(word_ids, TEXT_MAX_WORDS, 0)

        if writer is not None:
            example = tf.train.Example(features=tf.train.Features(
                feature={
示例#2
0
predictor = melt.Predictor('./model.ckpt-12000')

#vocabulary.init()
#vocab = vocabulary.vocab
vocab = Vocabulary(FLAGS.vocab, NUM_RESERVED_IDS)

ids_list = []
text_list = []
for line in open('./test.txt'):
    text = line.strip().split('\t')[-1]
    text_list.append(text)
    words = line.split()
    ids = [
        vocab.id(word) for word in text.split(WORDS_SEP)
        if vocab.has(word) or ENCODE_UNK
    ]
    ids = gezi.pad(ids, TEXT_MAX_WORDS)
    ids_list.append(ids)
ids_list = np.array(ids_list)


def bulk_predict(predictor, images, texts):
    scores = predictor.inference(
        'score', {
            '%s/%s' % (FLAGS.algo, FLAGS.image_feature_place): images,
            '%s/%s' % (FLAGS.algo, FLAGS.text_place): texts
        })
    return scores