Пример #1
0
def sample_targets(input_phrase, model, n_samples, reverse_score, normalize):

    [lm_model, enc_dec, indx_word_src, indx_word_trgt, state, \
            lm_model_fr_2_en, enc_dec_fr_2_en, state_fr2en] = model

    beam_search = BeamSearch(enc_dec)
    beam_search.compile()
    sampler = enc_dec.create_sampler(many_samples=True)

    #sample_func can take argument : normalize (bool)
    trans, scores, trans_bin = cached_sample_func(lm_model,
                                                  input_phrase,
                                                  n_samples,
                                                  sampler=sampler,
                                                  beam_search=beam_search)

    #Reordering scores-trans
    #Warning : selection of phrases to rescore is hard-coded
    trans = [tra for (sco, tra) in sorted(zip(scores, trans))][0:10]
    trans_bin = [tra_bin
                 for (sco, tra_bin) in sorted(zip(scores, trans_bin))][0:10]
    scores = sorted(scores)[0:10]

    #Reverse scoring of selected phrases
    if reverse_score:
        reverse_scorer = enc_dec_fr_2_en.create_scorer(batch=True)

        source_phrases_to_reverse_score = []
        target_phrases_to_reverse_score = []
        for tra_bin in trans_bin:
            source_phrases_to_reverse_score.append(input_phrase)
            target_phrases_to_reverse_score.append(tra_bin)

        state_fr2en['seqlen'] = 1000
        x, x_mask, y, y_mask = create_padded_batch(
            state_fr2en, [numpy.asarray(target_phrases_to_reverse_score)],
            [numpy.asarray(source_phrases_to_reverse_score)])

        reverse_scores = -reverse_scorer(
            numpy.atleast_2d(x), numpy.atleast_2d(y), numpy.atleast_2d(x_mask),
            numpy.atleast_2d(y_mask))[0]

        for index in xrange(len(scores)):
            scores[index] = (scores[index] + reverse_scores[index]) / 2.

    else:
        for index in xrange(len(scores)):
            scores[index] = scores[index]

    trans = trans[numpy.argmin(scores)]
    score = numpy.min(scores)

    if normalize == False:
        final_score = score
    else:
        final_score = score / numpy.log(len(input_phrase) + 1)

    return trans, final_score
Пример #2
0
def main():
    args = parse_args()

    state = prototype_phrase_state()
    with open(args.state) as src:
        state.update(cPickle.load(src))
    state.update(eval("dict({})".format(args.changes)))

    logging.basicConfig(
        level=getattr(logging, state['level']),
        format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")

    server_address = ('', args.port)
    httpd = ThreadedHTTPServer(server_address, MTReqHandler)
    #httpd = BaseHTTPServer.HTTPServer(server_address, MTReqHandler)

    rng = numpy.random.RandomState(state['seed'])
    enc_dec = RNNEncoderDecoder(state, rng, skip_init=True)
    enc_dec.build()
    lm_model = enc_dec.create_lm_model()
    lm_model.load(args.model_path)
    indx_word = cPickle.load(open(state['word_indx'], 'rb'))

    sampler = None
    beam_search = None
    if args.beam_search:
        beam_search = BeamSearch(enc_dec)
        beam_search.compile()
    else:
        sampler = enc_dec.create_sampler(many_samples=True)

    idict_src = cPickle.load(open(state['indx_word'], 'r'))

    tokenizer_cmd = [os.getcwd() + '/tokenizer.perl', '-l', 'en', '-q', '-']
    detokenizer_cmd = [
        os.getcwd() + '/detokenizer.perl', '-l', 'fr', '-q', '-'
    ]
    sampler = Sampler(state,
                      lm_model,
                      indx_word,
                      idict_src,
                      beam_search=beam_search,
                      tokenizer_cmd=tokenizer_cmd,
                      detokenizer_cmd=detokenizer_cmd)
    httpd.sampler = sampler

    print 'Server starting..'
    httpd.serve_forever()
    '''