def construct_seq_class_transformer(options:
                                    KaggleEvaluationOptions) -> Reranker:
    try:
        model = MonoBERT.get_model(options.model_name, device=options.device)
    except OSError:
        try:
            model = MonoBERT.get_model(
                        options.model_name,
                        from_tf=True,
                        device=options.device)
        except AttributeError:
            # Hotfix for BioBERT MS MARCO. Refactor.
            BertForSequenceClassification.bias = torch.nn.Parameter(
                                                    torch.zeros(2))
            BertForSequenceClassification.weight = torch.nn.Parameter(
                                                    torch.zeros(2, 768))
            model = BertForSequenceClassification.from_pretrained(
                        options.model_name, from_tf=True)
            model.classifier.weight = BertForSequenceClassification.weight
            model.classifier.bias = BertForSequenceClassification.bias
            device = torch.device(options.device)
            model = model.to(device).eval()
    tokenizer = MonoBERT.get_tokenizer(
                    options.tokenizer_name, do_lower_case=options.do_lower_case)
    return MonoBERT(model, tokenizer)
def construct_seq_class_transformer(
        options: DocumentRankingEvaluationOptions) -> Reranker:
    model = MonoBERT.get_model(options.model,
                               from_tf=options.from_tf,
                               device=options.device)
    tokenizer = MonoBERT.get_tokenizer(options.tokenizer_name)
    return MonoBERT(model, tokenizer)
Exemplo n.º 3
0
def build_bert_reranker(
    name_or_path: str = "castorini/monobert-large-msmarco-finetune-only",
    device: str = None,
):
    """Returns a BERT reranker using the provided model name or path to load from"""
    model = MonoBERT.get_model(name_or_path, device=device)
    tokenizer = MonoBERT.get_tokenizer(name_or_path)
    return MonoBERT(model, tokenizer)
Exemplo n.º 4
0
def construct_seq_class_transformer(
        options: DocumentRankingEvaluationOptions) -> Reranker:
    model = AutoModelForSequenceClassification.from_pretrained(
        options.model, from_tf=options.from_tf)
    device = torch.device(options.device)
    model = model.to(device).eval()
    tokenizer = AutoTokenizer.from_pretrained(options.tokenizer_name)
    return MonoBERT(model, tokenizer)
Exemplo n.º 5
0
def construct_seq_class_transformer(
        options: PassageRankingEvaluationOptions) -> Reranker:
    try:
        model = AutoModelForSequenceClassification.from_pretrained(
            options.model, from_tf=options.from_tf)
    except AttributeError:
        # Hotfix for BioBERT MS MARCO. Refactor.
        BertForSequenceClassification.bias = torch.nn.Parameter(torch.zeros(2))
        BertForSequenceClassification.weight = torch.nn.Parameter(
            torch.zeros(2, 768))
        model = BertForSequenceClassification.from_pretrained(
            options.model, from_tf=options.from_tf)
        model.classifier.weight = BertForSequenceClassification.weight
        model.classifier.bias = BertForSequenceClassification.bias
    device = torch.device(options.device)
    model = model.to(device).eval()
    tokenizer = AutoTokenizer.from_pretrained(options.tokenizer_name)
    return MonoBERT(model, tokenizer)
Exemplo n.º 6
0
import nltk
from bs4 import BeautifulSoup
from nltk.tokenize import word_tokenize
from collections import Counter
from nltk.corpus import stopwords
from pygaggle.rerank.base import Query, Text
from pygaggle.rerank.transformer import MonoT5
from nltk.tokenize import word_tokenize

from pygaggle.rerank.transformer import MonoBERT
from pygaggle.rerank.base import hits_to_texts

from expanders.relevancefeedback import RelevanceFeedback
from cmn import utils

reranker = MonoBERT()

#@inproceedings{zheng-etal-2020-bert,
#    title = "{BERT-QE}: {C}ontextualized {Q}uery {E}xpansion for {D}ocument {R}e-ranking",
#    author = "Zheng, Zhi  and Hui, Kai  and  He, Ben  and Han, Xianpei  and  Sun, Le  and Yates, Andrew",
#    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
#    month = nov,
#    year = "2020",
#    address = "Online",
#    publisher = "Association for Computational Linguistics",
#    url = "https://www.aclweb.org/anthology/2020.findings-emnlp.424",
#    pages = "4718--4728",
#}


class BertQE(RelevanceFeedback):