Пример #1
0
    def predict(self, keywords):
        sentences = []
        for keyword in keywords:
            content_inputs, content_inputs_length, keyword_inputs, keyword_length = prepare_batch_predict_data(
                keyword,
                previous=sentences,
                prev=FLAGS.prev_data,
                rev=FLAGS.rev_data,
                align=FLAGS.align_data)

            predicted_batch = self.model.predict(self.sess, content_inputs,
                                                 content_inputs_length,
                                                 keyword_inputs,
                                                 keyword_length)

            predicted_line = predicted_batch[
                0]  # predicted is a batch of one line
            predicted_line_clean = predicted_line[:-1]  # remove the end token
            predicted_ints = map(
                lambda x: x[0], predicted_line_clean
            )  # Flatten from [time_step, 1] to [time_step]
            predicted_sentence = ints_to_sentence(predicted_ints)

            if FLAGS.rev_data:
                predicted_sentence = predicted_sentence[::-1]

            sentences.append(predicted_sentence)
        return sentences
Пример #2
0
    def predict(self, keywords):
        sentences = []
        keywords = self.Normalize(keywords)  # add by zjg
        for keyword in keywords:
            source, source_len = prepare_batch_predict_data(
                keyword,
                previous=sentences,
                prev=FLAGS.prev_data,
                rev=FLAGS.rev_data,
                align=FLAGS.align_data)
            with self.sess.as_default():
                with self.graphpre.as_default():
                    predicted_batch = self.model.predict(
                        self.sess,
                        encoder_inputs=source,
                        encoder_inputs_length=source_len)

            predicted_line = predicted_batch[
                0]  # predicted is a batch of one line
            predicted_line_clean = predicted_line[:-1]  # remove the end token
            predicted_ints = map(
                lambda x: x[0], predicted_line_clean
            )  # Flatten from [time_step, 1] to [time_step]
            predicted_sentence = ints_to_sentence(predicted_ints)

            if FLAGS.rev_data:
                predicted_sentence = predicted_sentence[::-1]

            sentences.append(predicted_sentence)
        return sentences
Пример #3
0
    def predict_with_couplet(self, keywords):
        sentences = []
        for keyword in keywords[::2]:
            source, source_len = prepare_batch_predict_data(
                keyword,
                previous=sentences,
                prev=FLAGS.prev_data,
                rev=FLAGS.rev_data,
                align=FLAGS.align_data)

            predicted_batch = self.model.predict(
                self.sess,
                encoder_inputs=source,
                encoder_inputs_length=source_len)

            # predicted is a batch of one line
            predicted_line = predicted_batch[0]
            predicted_line_clean = predicted_line[:-1]  # remove the end token
            # Flatten from [time_step, 1] to [time_step]
            predicted_ints = map(lambda x: x[0], predicted_line_clean)
            predicted_sentence = ints_to_sentence(predicted_ints)

            if FLAGS.rev_data:
                predicted_sentence = predicted_sentence[::-1]

            sentences.append(predicted_sentence)

            # connects to a socket
            self.soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            self.soc.connect(("127.0.0.1", 12345))
            # we must encode the string to bytes
            self.soc.send((sentences[0] + predicted_sentence).encode("utf8"))
            # the number means how the response can be in bytes
            result_bytes = self.soc.recv(4096)
            self.soc.close()

            # the return will be in bytes, so decode
            pair_sentence = result_bytes.decode("utf8")
            # pair_sentence = subprocess.check_output(["python3 couplet_gen/seq_learning.py " + predicted_sentence])
            sentences.append(pair_sentence)

        return sentences
Пример #4
0
    def predict(self, keywords):
        sentences = []
        for idx, keyword in enumerate(keywords):
            source, source_len = prepare_batch_predict_data(
                keyword,
                previous=sentences,
                prev=FLAGS.prev_data,
                rev=FLAGS.rev_data,
                align=FLAGS.align_data)

            # flag_pre = True
            # while flag_pre:
            predicted_batch = self.model.predict(
                self.sess,
                encoder_inputs=source,
                encoder_inputs_length=source_len)
            # print("p",predicted_batch)
            predicted_line = predicted_batch[
                0]  # predicted is a batch of one line
            predicted_line_clean = predicted_line[:-1]  # remove the end token
            predicted_ints = [x[0] for x in predicted_line_clean
                              ]  # Flatten from [time_step, 1] to [time_step]
            predicted_sentence = ints_to_sentence(predicted_ints)

            if FLAGS.rev_data:
                predicted_sentence = predicted_sentence[::-1]
            # if idx == 0 or idx ==2:
            #     flag_pre = False
            # if idx == 1:
            #     print("p",predicted_sentence[-1])
            #     lis1 = RhymeUtil.get_possible_rhyme_categories(ch = predicted_sentence[-1])
            #     flag_pre = False
            # if idx == 3:
            #     lis3 = RhymeUtil.get_possible_rhyme_categories(ch = predicted_sentence[-1])
            #     for ch in lis3:
            #         if ch in lis1:
            #             flag_pre = False

            sentences.append(predicted_sentence)
        return sentences
    def predict(self, keywords):
        sentences = []
        sentences_no_space = []
        for keyword in keywords:
            source, source_len = prepare_batch_predict_data(
                keyword,
                previous=sentences,
                prev=FLAGS.prev_data,
                rev=FLAGS.rev_data,
                align=FLAGS.align_data,
                keep_prev_num=FLAGS.keep_prev_num,
                sen_pad_len=FLAGS.sen_pad_len,
                keyword_pad_len=FLAGS.keyword_pad_len)

            # print 'source: ', source
            # print 'source_len: ', source_len
            predicted_batch = self.model.predict(
                self.sess,
                encoder_inputs=source,
                encoder_inputs_length=source_len)

            # added by lrx, store model in binary format, so Java can call it
            # tensor_names = [t.name for op in tf.get_default_graph().get_operations() for t in op.values()]
            # for i in tensor_names: print i
            # output_graph_def = tf.graph_util.convert_variables_to_constants(self.sess, self.sess.graph_def, output_node_names=['seq2seq/decoder/out_put0000'])
            # tf.train.write_graph(output_graph_def, FLAGS.model_dir, 'minimal_graph.proto', as_text=False)

            # parse decoding results
            if FLAGS.beam_width > 1:
                predicted_line = predicted_batch[0]
                # print predicted_line
                sent_set = []

                for i in range(FLAGS.beam_width):
                    predicted_ints = []
                    for j in range(len(predicted_line)):
                        if predicted_line[j][i] == get_vocab_size() - 2:
                            break
                        else:
                            predicted_ints.append(predicted_line[j][i])

                    # reverse first, then map ints to words
                    if FLAGS.rev_data:
                        predicted_ints = predicted_ints[::-1]

                    predicted_sentence = ints_to_sentence(predicted_ints)
                    sent_set.append(predicted_sentence)

                # load LM to re-rank all candidates
                if self.re_ranking:
                    sent_set = self.evaluator(sent_set, self.n)
                    idx = 0
                    if len(sent_set) > 1:
                        idx = randint(0, len(sent_set) - 2)
                else:
                    idx = randint(0, len(sent_set) - 1)

                # output all candidates
                # all_sents = '|'.join(sent_set)
                # print all_sents
                sentences.append(sent_set[idx])

            else:
                predicted_line = predicted_batch[
                    0]  # predicted is a batch of one line
                predicted_line_clean = predicted_line[:
                                                      -1]  # remove the end token
                predicted_ints = map(
                    lambda x: x[0], predicted_line_clean
                )  # Flatten from [time_step, 1] to [time_step]

                if FLAGS.rev_data:
                    predicted_ints = predicted_ints[::-1]

                predicted_sentence = ints_to_sentence(predicted_ints)
                sentences.append(predicted_sentence)

        # remove space
        for s in sentences:
            s = re.sub(' ', '', s)
            sentences_no_space.append(s)

        return sentences_no_space