示例#1
0
文件: pegasus.py 项目: lantip/Malaya
    def _summarize(
        self,
        strings,
        top_p=0.7,
        temperature=1.0,
        postprocess=True,
        **kwargs,
    ):

        strings_ = [summarization_textcleaning(string) for string in strings]
        batch_x = [self._tokenizer.encode(string) + [1] for string in strings_]
        batch_x = pad_sequences(batch_x, padding='post')

        r = self._execute(
            inputs=[batch_x, top_p, temperature],
            input_labels=['Placeholder', 'top_p', 'temperature'],
            output_labels=['logits'],
        )
        p = r['logits'].tolist()

        results = []
        for no, r in enumerate(p):
            summary = self._tokenizer.decode(r)
            if postprocess:
                summary = postprocess_summary(strings[no], summary, **kwargs)

            results.append(summary)

        return results
示例#2
0
    def greedy_decoder(self, strings: List[str]):
        """
        generate a long text given a isi penting. 
        Decoder is greedy decoder with beam width size 1, alpha 0.5 .

        Parameters
        ----------
        strings: List[str]

        Returns
        -------
        result: str
        """

        points = [
            f'{no + 1}. {remove_repeat_fullstop(string)}.'
            for no, string in enumerate(strings)
        ]
        points = ' '.join(points)
        points = f'karangan: {points}'
        return upperfirst(self._predict(summarization_textcleaning(points)))