Exemplos de download_or_load em Python, exemplos de pororo.tasks.utils.download_utils.download_or_load em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: age_suitability.py Projeto: winterconnect/pororo

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "bert" in self.config.n_model:
            from transformers import RobertaTokenizer
            model_path = download_or_load(
                f"bert/{self.config.n_model}",
                self.config.lang,
            )

            config = torch.load(f"{model_path}/config.pt")
            tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

            model = RobertaEncoder.from_pretrained(
                device=device,
                model_path=model_path,
                tokenizer=tokenizer,
                config=config,
            ).eval().to(device)

            return PororoBertMovie(model, self.config)

Exemplo n.º 2

0

Exibir arquivo

    def load(self, device: int):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if self.config.n_model == "koparadigm":
            try:
                from koparadigm import Paradigm
            except ModuleNotFoundError as error:
                raise error.__class__(
                    "Please install koparadigm with: `pip install koparadigm`")
            model = Paradigm()
            return PororoKoParadigm(model, self.config)

        if self.config.n_model in ["enparadigm", "japaradigm"]:
            model_path = download_or_load(
                f"misc/inflection.{self.config.lang}.pickle",
                self.config.lang,
            )
            with open(model_path, "rb") as handle:
                model = dict(pickle.load(handle))
            return PororoParadigm(model, self.config)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: grapheme_conversion.py Projeto: peternara/pororo-nlp

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if self.config.n_model == "p2g.zh":
            from pororo.models.p2g import P2gM

            pinyin = download_or_load(
                f"misc/pinyin2idx.{self.config.lang}.pkl",
                self.config.lang,
            )
            char = download_or_load(
                f"misc/char2idx.{self.config.lang}.pkl",
                self.config.lang,
            )
            ckpt = download_or_load(
                f"misc/{self.config.n_model}.pt",
                self.config.lang,
            )
            model = P2gM(pinyin, char, ckpt, device)
            return PororoP2GZh(model, self.config)

        if self.config.n_model == "p2g.ja":
            from fairseq.models.transformer import TransformerModel

            load_dict = download_or_load(
                "transformer/transformer.base.ja.p2g",
                self.config.lang,
            )

            model = (TransformerModel.from_pretrained(
                model_name_or_path=load_dict.path,
                checkpoint_file="transformer.base.ja.p2g.pt",
                data_name_or_path=load_dict.dict_path,
                source_lang=load_dict.src_dict,
                target_lang=load_dict.tgt_dict,
            ).eval().to(device))

            return PororoP2GJa(model, self.config)

Exemplo n.º 4

0

Exibir arquivo

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "brainbert" in self.config.n_model:
            from pororo.models.brainbert import BrainRobertaModel

            model = (BrainRobertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertSts(model, self.config)

        if "jaberta" in self.config.n_model:
            from pororo.models.brainbert import JabertaModel

            model = (JabertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertSts(model, self.config)

        if "zhberta" in self.config.n_model:
            from pororo.models.brainbert import ZhbertaModel

            model = (ZhbertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertSts(model, self.config)

        if "sbert" in self.config.n_model:
            from sentence_transformers import SentenceTransformer

            path = download_or_load(
                f"sbert/{self.config.n_model}",
                self.config.lang,
            )
            model = SentenceTransformer(path).eval().to(device)
            return PororoSBertSts(model, self.config)

        if "roberta" in self.config.n_model:
            from pororo.models.brainbert import CustomRobertaModel

            model = (CustomRobertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertSts(model, self.config)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: machine_translation.py Projeto: Kyubyong/pororo-1

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        from pororo.tasks import PororoTokenizationFactory

        sent_tokenizer = (lambda text, lang: PororoTokenizationFactory(
            task="tokenization",
            lang=lang,
            model=f"sent_{lang}",
        ).load(device).predict(text))

        if "multi" in self.config.n_model:
            from fairseq.models.transformer import TransformerModel

            from pororo.tasks.utils.tokenizer import CustomTokenizer

            load_dict = download_or_load(
                f"transformer/{self.config.n_model}",
                self.config.lang,
            )

            model = (TransformerModel.from_pretrained(
                model_name_or_path=load_dict.path,
                checkpoint_file=f"{self.config.n_model}.pt",
                data_name_or_path=load_dict.dict_path,
                source_lang=load_dict.src_dict,
                target_lang=load_dict.tgt_dict,
            ).eval().to(device))

            tokenizer = CustomTokenizer.from_file(
                vocab_filename=f"{load_dict.src_tok}/vocab.json",
                merges_filename=f"{load_dict.src_tok}/merges.txt",
            )

            if "mtpg" in self.config.n_model:
                langtok_style = "mbart"
            elif "m2m" in self.config.n_model:
                langtok_style = "multilingual"
            else:
                langtok_style = "basic"

            return PororoTransformerTransMulti(
                model,
                self.config,
                tokenizer,
                sent_tokenizer,
                langtok_style,
            )

Exemplo n.º 6

0

Exibir arquivo

Arquivo: grammatical_error_correction.py Projeto: saimishra/pororo

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """

        if "charbert" in self.config.n_model:
            from pororo.models.brainbert import CharBrainRobertaModel

            model = (CharBrainRobertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            print(
                "As of now, this beta model tries to correct spacing errors in Korean text."
            )
            return PororoBertSpacing(model, self.config)

        if "transformer" in self.config.n_model:
            from fairseq.models.transformer import TransformerModel

            from pororo.tasks.utils.tokenizer import CustomTokenizer

            load_dict = download_or_load(
                f"transformer/{self.config.n_model}",
                self.config.lang,
            )

            tokenizer = None
            model = (TransformerModel.from_pretrained(
                model_name_or_path=load_dict.path,
                checkpoint_file=f"{self.config.n_model}.pt",
                data_name_or_path=load_dict.dict_path,
                source_lang=load_dict.src_dict,
                target_lang=load_dict.tgt_dict,
            ).eval().to(device))

            if "char" in self.config.n_model:
                return PororoTransformerGecChar(model, self.config)

            if load_dict.src_tok:
                tokenizer = CustomTokenizer.from_file(
                    vocab_filename=f"{load_dict.src_tok}/vocab.json",
                    merges_filename=f"{load_dict.src_tok}/merges.txt",
                )

            return PororoTransformerGec(model, tokenizer, device, self.config)

Exemplo n.º 7

0

Exibir arquivo

    def load(self, device: str):
        from sentence_transformers import SentenceTransformer

        model_path = self.config.n_model

        if self.config.lang != "en":
            model_path = download_or_load(
                f"sbert/{self.config.n_model}",
                self.config.lang,
            )
        model = SentenceTransformer(model_path).eval().to(device)
        return PororoSBertSentence(model, self.config)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: constituency_parsing.py Projeto: saimishra/pororo

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "transformer" in self.config.n_model:
            from fairseq.models.transformer import TransformerModel

            from pororo.tasks import PororoPosFactory

            load_dict = download_or_load(
                f"transformer/{self.config.n_model}",
                self.config.lang,
            )

            model = (TransformerModel.from_pretrained(
                model_name_or_path=load_dict.path,
                checkpoint_file=f"{self.config.n_model}.pt",
                data_name_or_path=load_dict.dict_path,
                source_lang=load_dict.src_dict,
                target_lang=load_dict.tgt_dict,
            ).eval().to(device))

            if self.config.lang == "ko":
                tagger = PororoPosFactory(
                    task="pos",
                    model="mecab-ko",
                    lang=self.config.lang,
                ).load(device)
                return PororoTransConstKo(model, tagger, self.config)

            if self.config.lang == "en":
                tagger = PororoPosFactory(
                    task="pos",
                    model="nltk",
                    lang=self.config.lang,
                ).load(device)
                return PororoTransConstEn(model, tagger, self.config)

            if self.config.lang == "zh":
                tagger = PororoPosFactory(
                    task="pos",
                    model="jieba",
                    lang=self.config.lang,
                ).load(device)
                return PororoTransConstZh(model, tagger, self.config)

Exemplo n.º 9

0

Exibir arquivo

    def load_model(cls, model_name: str, lang: str, **kwargs):
        """
        Load pre-trained model as RobertaHubInterface.
        :param model_name: model name from available_models
        :return: pre-trained model
        """
        from fairseq import hub_utils

        ckpt_dir = download_or_load(model_name, lang)
        tok_path = download_or_load(f"tokenizers/bpe32k.{lang}.zip", lang)

        x = hub_utils.from_pretrained(
            ckpt_dir,
            "model.pt",
            ckpt_dir,
            load_checkpoint_heads=True,
            **kwargs,
        )
        return BrainRobertaHubInterface(
            x["args"],
            x["task"],
            x["models"][0],
            tok_path,
        )

Exemplo n.º 10

0

Exibir arquivo

    def load_model(cls, model_name: str, lang: str, **kwargs):
        """
        Load pre-trained model as RobertaHubInterface.
        :param model_name: model name from available_models
        :return: pre-trained model
        """
        from fairseq import hub_utils

        ckpt_dir = download_or_load(model_name, lang)
        x = hub_utils.from_pretrained(
            ckpt_dir,
            "model.pt",
            bpe="gpt2",
            load_checkpoint_heads=True,
            **kwargs,
        )
        return CustomRobertaHubInterface(x["args"], x["task"], x["models"][0])

Exemplo n.º 11

0

Exibir arquivo

Arquivo: sentence_embedding.py Projeto: peternara/pororo-nlp

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        from sentence_transformers import SentenceTransformer

        model_path = self.config.n_model

        if self.config.lang != "en":
            model_path = download_or_load(
                f"sbert/{self.config.n_model}",
                self.config.lang,
            )
        model = SentenceTransformer(model_path).eval().to(device)
        return PororoSBertSentence(model, self.config)

Exemplo n.º 12

0

Exibir arquivo

    def load_model(cls, model_name: str, lang: str, **kwargs):
        """
        Load pre-trained model as RobertaHubInterface.
        :param model_name: model name from available_models
        :return: pre-trained model
        """
        from fairseq import hub_utils

        # cache directory is treated as the home directory for both model and data files
        ckpt_dir = download_or_load(model_name, lang)
        x = hub_utils.from_pretrained(
            ckpt_dir,
            "model.pt",
            ckpt_dir,
            load_checkpoint_heads=True,
            **kwargs,
        )
        return SegmentBertHubInterface(
            x["args"],
            x["task"],
            x["models"][0],
            lang,
        )

Exemplo n.º 13

0

Exibir arquivo

 def __init__(self, args, task, model):
     args.gpt2_encoder_json = download_or_load("misc/encoder.json", "en")
     args.gpt2_vocab_bpe = download_or_load("misc/vocab.bpe", "en")
     super().__init__(args, task, model)
     self.softmax = nn.Softmax(dim=1)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: question_generation.py Projeto: Kyubyong/pororo-1

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "bart" in self.config.n_model:
            from whoosh import index

            from pororo.models.bart.KoBART import KoBartModel
            from pororo.models.wikipedia2vec import Wikipedia2Vec
            from pororo.tasks import PororoTokenizationFactory

            vec_map = {
                "ko": "kowiki_20200720_100d.pkl",
                "en": "enwiki_20180420_100d.pkl",
                "ja": "jawiki_20180420_100d.pkl",
                "zh": "zhwiki_20180420_100d.pkl",
            }

            f_wikipedia2vec = download_or_load(
                f"misc/{vec_map[self.config.lang]}",
                self.config.lang,
            )

            f_index = download_or_load(
                f"misc/{self.config.lang}_indexdir.zip",
                self.config.lang,
            )

            model = Wikipedia2Vec(model_file=f_wikipedia2vec, device=device)
            idx = index.open_dir(f_index)

            sim_words = SimilarWords(model, idx)

            model_path = download_or_load(
                f"bart/{self.config.n_model}",
                self.config.lang,
            )

            model = KoBartModel.from_pretrained(
                device=device,
                model_path=model_path,
            )

            sent_tok = (lambda text: PororoTokenizationFactory(
                task="tokenization",
                lang=self.config.lang,
                model=f"sent_{self.config.lang}",
            ).load(device).predict(text))

            return PororoKoBartQuestionGeneration(
                model,
                sim_words,
                sent_tok,
                self.config,
            )

Exemplo n.º 15

0

Exibir arquivo

Arquivo: tokenization.py Projeto: peternara/pororo-nlp

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "sent" in self.config.n_model:
            import nltk

            try:
                nltk.data.find("tokenizers/punkt")
            except LookupError:
                nltk.download("punkt")

            from nltk.tokenize import sent_tokenize

            return PororoSentTokenizer(sent_tokenize, self.config)

        if self.config.n_model == "mecab_ko":
            try:
                import mecab
            except ModuleNotFoundError as error:
                raise error.__class__(
                    "Please install python-mecab-ko with: `pip install python-mecab-ko`"
                )
            model = mecab.MeCab()
            return PororoMecabKoTokenizer(model, self.config)

        if self.config.n_model == "char":
            return PororoCharTokenizer(self.config)

        if self.config.n_model == "jamo":
            return PororoJamoTokenizer(self.config)

        if self.config.n_model == "word":
            return PororoWordTokenizer(self.config)

        if self.config.n_model == "roberta":
            from fairseq.data.encoders.gpt2_bpe import get_encoder

            encoder = download_or_load("misc/encoder.json", self.config.lang)
            vocab = download_or_load("misc/vocab.bpe", self.config.lang)
            model = get_encoder(encoder, vocab)

            with open(encoder, "r") as f_vocab:
                vocab = json.load(f_vocab)
                inv_dict = {v: k for k, v in vocab.items()}

            return PororoRoBERTaTokenizer(model, vocab, inv_dict, self.config)

        if self.config.n_model == "moses":
            try:
                from sacremoses import MosesDetokenizer, MosesTokenizer
            except ModuleNotFoundError as error:
                raise error.__class__(
                    "Please install sacremoses with: `pip install sacremoses`")
            model = MosesTokenizer(lang="en")
            detok = MosesDetokenizer(lang="en")
            return PororoMosesTokenizer(model, detok, self.config)

        if self.config.n_model == "jieba":
            try:
                import jieba
            except ModuleNotFoundError as error:
                raise error.__class__(
                    "Please install jieba with: `pip install jieba`")
            model = jieba.cut
            return PororoJiebaTokenizer(model, self.config)

        if self.config.n_model == "mecab":
            try:
                import fugashi
            except ModuleNotFoundError as error:
                raise error.__class__(
                    "Please install fugashi with: `pip install fugashi`")

            try:
                import ipadic
            except ModuleNotFoundError as error:
                raise error.__class__(
                    "Please install ipadic with: `pip install ipadic`")

            dic_dir = ipadic.DICDIR
            mecabrc = os.path.join(dic_dir, "mecabrc")
            mecab_option = "-d {} -r {} ".format(
                dic_dir,
                mecabrc,
            )
            model = fugashi.GenericTagger(mecab_option)
            return PororoMecabTokenizer(model, self.config)
        else:
            from pororo.tasks.utils.tokenizer import CustomTokenizer

            path = download_or_load(
                f"tokenizers/{self.config.n_model}.zip",
                self.config.lang,
            )

            ext = "json" if "unigram" not in self.config.n_model else "txt"
            merges_filename = (f"{path}/merges.txt" if "unigram"
                               not in self.config.n_model else None)

            model = CustomTokenizer.from_file(
                vocab_filename=f"{path}/vocab.{ext}",
                merges_filename=merges_filename,
                normalize=True if "jpe" not in self.config.n_model else False,
            )
            if "jpe" in self.config.n_model:
                return PororoJamoPairTokenizer(model, self.config)
            if "mecab.bpe" in self.config.n_model:
                return PororoMecabSPTokenizer(model, self.config)
            return PororoSPTokenizer(model, self.config)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: predownload.py Projeto: syncdoth/mrc_serve

from fairseq import hub_utils
from fairseq.models.roberta import RobertaHubInterface, RobertaModel
import mecab
from pororo.models.brainbert.BrainRoBERTa import BrainRobertaHubInterface
from pororo.tasks.machine_reading_comprehension import PororoBertMrc
from pororo.tasks.utils.base import TaskConfig
from pororo.tasks.utils.download_utils import download_or_load
from pororo.tasks.utils.tokenizer import CustomTokenizer
from pororo.utils import postprocess_span
import torch

ckpt_dir = download_or_load("bert/brainbert.base.ko.korquad", "ko")
tok_path = download_or_load(f"tokenizers/bpe32k.ko.zip", "ko")

x = hub_utils.from_pretrained(
    ckpt_dir,
    "model.pt",
    ckpt_dir,
    load_checkpoint_heads=True
)
model = BrainRobertaHubInterface(
    x["args"],
    x["task"],
    x["models"][0],
    tok_path,
).to(torch.device("cuda"))

tagger = mecab.MeCab()
final = PororoBertMrc(model, tagger, postprocess_span, TaskConfig("mrc", "ko", "brainbert.base.ko.korquad"))

print(final("이름이 뭐야?", "이름은 시리야."))

Exemplo n.º 17

0

Exibir arquivo

    def _apply_wsd(self, tags: List[Tuple[str, str]]):
        """
        Apply Word Sense Disambiguation to get detail tag info

        Args:
            tags (List[Tuple[str, str]]): inference word-tag pair result

        Returns:
            List[Tuple[str, str]]: wsd-applied result

        """
        if self._wsd is None:
            from pororo.tasks import PororoWsdFactory
            self._wsd = PororoWsdFactory(
                task="wsd",
                lang="ko",
                model="transformer.large.ko.wsd",
            ).load(self._device)

        if self._cls2cat is None:
            self._cls2cat = dict()
            lines = (open(
                download_or_load(
                    "misc/wsd.cls.txt",
                    self.config.lang,
                ),
                "r",
                encoding="utf8",
            ).read().strip().splitlines())
            for line in lines:
                morph, homonymno, category = line.split()
                classifier = f"{morph}__NNB__{homonymno}"  # bound noun
                self._cls2cat[classifier] = category

        if self._quant2cat is None:
            self._quant2cat = dict()
            self._term2cat = dict()
            lines = (open(
                download_or_load(
                    "misc/re.templates.txt",
                    self.config.lang,
                ),
                "r",
            ).read().strip().splitlines())

            for line in lines:
                category, ner_category, expression = line.split(" ", 2)
                if ner_category == "QUANTITY":
                    self._quant2cat[expression] = category
                elif ner_category == "TERM":
                    self._term2cat[expression] = category

        input_text_with_markers = str()
        target_token_ids = []

        for idx, ner_token in enumerate(tags):
            surface, tag = ner_token
            # as {} will be used as special symbols
            surface = surface.replace("{", "｛")
            surface = surface.replace("}", "｝")

            if tag == "TERM":
                cat = self._template_match(surface, self._term2cat)
                if cat is not None:
                    tags[idx] = (surface, cat)
                input_text_with_markers += surface
            elif tag == "QUANTITY":
                cat = self._template_match(surface, self._quant2cat)
                if cat is not None:
                    tags[idx] = (surface, cat)
                    input_text_with_markers += surface
                else:
                    target_token_ids.append(idx)
                    input_text_with_markers += "{" + surface + "}"
            else:
                input_text_with_markers += surface

        wsd_results = self._wsd(input_text_with_markers)
        action = False
        has_category = False
        categories = []

        for wsd_token in wsd_results:
            morph, tag, homonymno = wsd_token[:3]
            if morph == "{":
                has_category = False
                action = True
            elif morph == "}":
                if has_category is False:
                    categories.append("QUANTITY")  # original category
                has_category = False
                action = False

            if action:
                if homonymno is None:
                    homonymno = "00"

                query = f"{morph}__{tag}__{homonymno}"
                if query in self._cls2cat:
                    category = self._cls2cat[query]
                    categories.append(category)
                    has_category = True
                    action = False

        assert len(target_token_ids) == len(categories)

        for target_token_id, cat in zip(target_token_ids, categories):
            tags[target_token_id] = (tags[target_token_id][0], cat)

        return tags

Exemplo n.º 18

0

Exibir arquivo

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        translator = None

        if "transformer" in self.config.n_model:
            from transformers import BertTokenizer

            from pororo.models.caption import Caption, Detr

            load_dict = download_or_load(
                f"transformer/{self.config.n_model}",
                "en",
            )
            tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

            pad_token_id = tokenizer.pad_token_id
            vocab_size = tokenizer.vocab_size

            transformer = Caption(pad_token_id, vocab_size)
            transformer.load_state_dict(
                torch.load(
                    os.path.join(
                        load_dict.path,
                        f"{self.config.n_model}.pt",
                    ),
                    map_location=device,
                )["model"])
            transformer.eval().to(device)

            detr = Detr(device)

            if self.config.lang != "en":
                assert self.config.lang in [
                    "ko",
                    "ja",
                    "zh",
                ], "Unsupported language code is selected!"
                from pororo.tasks import PororoTranslationFactory

                translator = PororoTranslationFactory(
                    task="mt",
                    lang="multi",
                    model="transformer.large.multi.mtpg",
                )
                translator = translator.load(device)

            return PororoCaptionBrainCaption(
                detr,
                transformer,
                tokenizer,
                translator,
                device,
                self.config,
            )

Exemplo n.º 19

0

Exibir arquivo

    def load(self, device):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "roberta" in self.config.n_model:
            from pororo.models.brainbert import CustomRobertaModel

            model = (CustomRobertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertNerEn(model, self.config)

        if "charbert" in self.config.n_model:
            from pororo.models.brainbert import CharBrainRobertaModel
            from pororo.tasks.tokenization import PororoTokenizationFactory

            model = (CharBrainRobertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))

            sent_tokenizer = PororoTokenizationFactory(
                task="tokenization",
                model="sent_ko",
                lang=self.config.lang,
            ).load(device)

            f_wsd_dict = open(
                download_or_load(
                    f"misc/wiki.{self.config.lang}.items",
                    self.config.lang,
                ),
                "r",
            )
            wsd_dict = defaultdict(dict)
            for line in f_wsd_dict.readlines():
                origin, target, word = line.strip().split("\t")
                wsd_dict[origin][word] = target

            return PororoBertCharNer(
                model,
                sent_tokenizer,
                wsd_dict,
                device,
                self.config,
            )

        if "zhberta" in self.config.n_model:
            from pororo.models.brainbert import ZhbertaModel

            model = (ZhbertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertNerZh(model, self.config)

        if "jaberta" in self.config.n_model:
            from pororo.models.brainbert import JabertaModel

            model = (JabertaModel.load_model(
                f"bert/{self.config.n_model}",
                self.config.lang,
            ).eval().to(device))
            return PororoBertNerJa(model, self.config)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: paraphrase_generation.py Projeto: Kyubyong/pororo-1

    def load(self, device: str):
        """
        Load user-selected task-specific model

        Args:
            device (str): device information

        Returns:
            object: User-selected task-specific model

        """
        if "multi" in self.config.n_model:
            from fairseq.models.transformer import TransformerModel

            from pororo.tasks.utils.tokenizer import CustomTokenizer

            load_dict = download_or_load(
                f"transformer/{self.config.n_model}",
                "multi",
            )

            model = (TransformerModel.from_pretrained(
                model_name_or_path=load_dict.path,
                checkpoint_file=f"{self.config.n_model}.pt",
                data_name_or_path=load_dict.dict_path,
                source_lang=load_dict.src_dict,
                target_lang=load_dict.tgt_dict,
            ).eval().to(device))

            tokenizer = CustomTokenizer.from_file(
                vocab_filename=f"{load_dict.src_tok}/vocab.json",
                merges_filename=f"{load_dict.src_tok}/merges.txt",
            )

            return PororoTransformerTransMulti(
                model,
                self.config,
                tokenizer,
            )

        if "transformer" in self.config.n_model:
            from fairseq.models.transformer import TransformerModel

            load_dict = download_or_load(
                f"transformer/{self.config.n_model}",
                self.config.lang,
            )

            tokenizer = None
            model = (TransformerModel.from_pretrained(
                model_name_or_path=load_dict.path,
                checkpoint_file=f"{self.config.n_model}.pt",
                data_name_or_path=load_dict.dict_path,
                source_lang=load_dict.src_dict,
                target_lang=load_dict.tgt_dict,
            ).eval().to(device))

            if self.config.lang != "zh":
                from pororo.tasks.utils.tokenizer import CustomTokenizer

                tokenizer = CustomTokenizer.from_file(
                    vocab_filename=f"{load_dict.src_tok}/vocab.json",
                    merges_filename=f"{load_dict.src_tok}/merges.txt",
                )

            return PororoTransformerParaphrase(model, self.config, tokenizer)