def sample_targets(input_phrase, model, n_samples, reverse_score, normalize): [lm_model, enc_dec, indx_word_src, indx_word_trgt, state, \ lm_model_fr_2_en, enc_dec_fr_2_en, state_fr2en] = model beam_search = BeamSearch(enc_dec) beam_search.compile() sampler = enc_dec.create_sampler(many_samples=True) #sample_func can take argument : normalize (bool) trans, scores, trans_bin = cached_sample_func(lm_model, input_phrase, n_samples, sampler=sampler, beam_search=beam_search) #Reordering scores-trans #Warning : selection of phrases to rescore is hard-coded trans = [tra for (sco, tra) in sorted(zip(scores, trans))][0:10] trans_bin = [tra_bin for (sco, tra_bin) in sorted(zip(scores, trans_bin))][0:10] scores = sorted(scores)[0:10] #Reverse scoring of selected phrases if reverse_score: reverse_scorer = enc_dec_fr_2_en.create_scorer(batch=True) source_phrases_to_reverse_score = [] target_phrases_to_reverse_score = [] for tra_bin in trans_bin: source_phrases_to_reverse_score.append(input_phrase) target_phrases_to_reverse_score.append(tra_bin) state_fr2en['seqlen'] = 1000 x, x_mask, y, y_mask = create_padded_batch( state_fr2en, [numpy.asarray(target_phrases_to_reverse_score)], [numpy.asarray(source_phrases_to_reverse_score)]) reverse_scores = -reverse_scorer( numpy.atleast_2d(x), numpy.atleast_2d(y), numpy.atleast_2d(x_mask), numpy.atleast_2d(y_mask))[0] for index in xrange(len(scores)): scores[index] = (scores[index] + reverse_scores[index]) / 2. else: for index in xrange(len(scores)): scores[index] = scores[index] trans = trans[numpy.argmin(scores)] score = numpy.min(scores) if normalize == False: final_score = score else: final_score = score / numpy.log(len(input_phrase) + 1) return trans, final_score
def main(): args = parse_args() state = prototype_phrase_state() with open(args.state) as src: state.update(cPickle.load(src)) state.update(eval("dict({})".format(args.changes))) logging.basicConfig( level=getattr(logging, state['level']), format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") server_address = ('', args.port) httpd = ThreadedHTTPServer(server_address, MTReqHandler) #httpd = BaseHTTPServer.HTTPServer(server_address, MTReqHandler) rng = numpy.random.RandomState(state['seed']) enc_dec = RNNEncoderDecoder(state, rng, skip_init=True) enc_dec.build() lm_model = enc_dec.create_lm_model() lm_model.load(args.model_path) indx_word = cPickle.load(open(state['word_indx'], 'rb')) sampler = None beam_search = None if args.beam_search: beam_search = BeamSearch(enc_dec) beam_search.compile() else: sampler = enc_dec.create_sampler(many_samples=True) idict_src = cPickle.load(open(state['indx_word'], 'r')) tokenizer_cmd = [os.getcwd() + '/tokenizer.perl', '-l', 'en', '-q', '-'] detokenizer_cmd = [ os.getcwd() + '/detokenizer.perl', '-l', 'fr', '-q', '-' ] sampler = Sampler(state, lm_model, indx_word, idict_src, beam_search=beam_search, tokenizer_cmd=tokenizer_cmd, detokenizer_cmd=detokenizer_cmd) httpd.sampler = sampler print 'Server starting..' httpd.serve_forever() '''