Exemplo n.º 1
0
from capreolus import ConfigOption, Dependency, evaluator
from capreolus.task import Task
from capreolus.utils.loginit import get_logger

logger = get_logger(__name__)  # pylint: disable=invalid-name


@Task.register
class TutorialTask(Task):
    module_name = "tutorial"
    config_spec = [
        ConfigOption("optimize", "map",
                     "metric to maximize on the validation set")
    ]
    dependencies = [
        Dependency(key="benchmark",
                   module="benchmark",
                   name="nf",
                   provide_this=True,
                   provide_children=["collection"]),
        Dependency(key="searcher1", module="searcher", name="BM25RM3"),
        Dependency(key="searcher2", module="searcher", name="SDM"),
    ]

    commands = ["run"] + Task.help_commands
    default_command = "run"

    def run(self):
        output_dir = self.get_results_path()

        # read the title queries from the chosen benchmark's topic file
Exemplo n.º 2
0
import contextlib
import numpy as np
import torch
from torch import nn
from transformers import BertForNextSentencePrediction

from capreolus import ConfigOption, Dependency
from capreolus.reranker import Reranker
from capreolus.utils.loginit import get_logger

logger = get_logger(__name__)

# official weights converted with:
# def convert(name):
#     from transformers import BertTokenizer, BertForNextSentencePrediction, TFBertForNextSentencePrediction

#     tokenizer = BertTokenizer.from_pretrained("bert-large-uncased")

#     state = torch.load(f"/GW/NeuralIR/nobackup/birch-emnlp_bert4ir_v2/models/saved.{name}_1", map_location="cpu")

#     model = BertForNextSentencePrediction.from_pretrained("bert-large-uncased")
#     model.load_state_dict(state["model"].state_dict())

#     output = f"/GW/NeuralIR/nobackup/birch-emnlp_bert4ir_v2/models/export/birch-bert-large-{name}"
#     os.makedirs(output, exist_ok=True)
#     model.save_pretrained(output)
#     tokenizer.save_pretrained(output)

#     # tf2 support
#     tf_model = TFBertForNextSentencePrediction.from_pretrained(output, from_pt=True)
#     tf_model.save_pretrained(output)