Exemplo n.º 1
0
    def __init__(self, args, logger=None) -> None:
        self.args = args
        self.logger = logger
        # Load NER model
        self.ner_model = NER.get_model()
        # load biencoder model
        if logger:
            logger.info("loading biencoder model")
        with open(args.biencoder_config) as json_file:
            biencoder_params = json.load(json_file)
            biencoder_params["path_to_model"] = args.biencoder_model
        biencoder = load_biencoder(biencoder_params)

        crossencoder = None
        crossencoder_params = None
        if not args.fast:
            # load crossencoder model
            if logger:
                logger.info("loading crossencoder model")
            with open(args.crossencoder_config) as json_file:
                crossencoder_params = json.load(json_file)
                crossencoder_params["path_to_model"] = args.crossencoder_model
            crossencoder = load_crossencoder(crossencoder_params)

        # load candidate entities
        if logger:
            logger.info("loading candidate entities")
        (
            candidate_encoding,
            title2id,
            id2title,
            id2text,
            wikipedia_id2local_id,
            faiss_indexer,
        ) = _load_candidates(
            args.entity_catalogue,
            args.entity_encoding,
            faiss_index=args.faiss_index,
            index_path=args.index_path,
            logger=logger,
        )

        self.biencoder = biencoder
        self.biencoder_params = biencoder_params
        self.crossencoder = crossencoder
        self.crossencoder_params = crossencoder_params
        self.candidate_encoding = candidate_encoding
        self.title2id = title2id
        self.id2title = id2title
        self.id2text = id2text
        self.wikipedia_id2local_id = wikipedia_id2local_id
        self.faiss_indexer = faiss_indexer
        self.id2url = {
            v: "https://en.wikipedia.org/wiki?curid=%s" % k
            for k, v in wikipedia_id2local_id.items()
        }
Exemplo n.º 2
0
def load_models(args, logger=None):

    # load biencoder model
    if logger:
        logger.info("loading biencoder model")
    with open(args.biencoder_config) as json_file:
        biencoder_params = json.load(json_file)
        biencoder_params["path_to_model"] = args.biencoder_model
    biencoder = load_biencoder(biencoder_params)

    crossencoder = None
    crossencoder_params = None
    if not args.fast:
        # load crossencoder model
        if logger:
            logger.info("loading crossencoder model")
        with open(args.crossencoder_config) as json_file:
            crossencoder_params = json.load(json_file)
            crossencoder_params["path_to_model"] = args.crossencoder_model
        crossencoder = load_crossencoder(crossencoder_params)

    # load candidate entities
    if logger:
        logger.info("loading candidate entities")
    (
        candidate_encoding,
        title2id,
        id2title,
        id2text,
        wikipedia_id2local_id,
        faiss_indexer,
    ) = _load_candidates(args.entity_catalogue,
                         args.entity_encoding,
                         faiss_index=args.faiss_index,
                         index_path=args.index_path,
                         logger=logger)

    local_id2wikipedia_id = {v: k for k, v in wikipedia_id2local_id.items()}
    if True:
        with open('local_id2wikipedia_id.json', 'w') as json_file:
            json.dump(local_id2wikipedia_id, json_file, indent=4)

    return (
        biencoder,
        biencoder_params,
        crossencoder,
        crossencoder_params,
        candidate_encoding,
        title2id,
        id2title,
        id2text,
        wikipedia_id2local_id,
        faiss_indexer,
    )
Exemplo n.º 3
0
def load_models(args, logger=None):

    # load biencoder model
    if logger:
        logger.info("loading biencoder model")
    with open(args.biencoder_config) as json_file:
        biencoder_params = json.load(json_file)
        biencoder_params["path_to_model"] = args.biencoder_model
    biencoder = load_biencoder(biencoder_params)

    crossencoder = None
    crossencoder_params = None
    if not args.fast:
        # load crossencoder model
        if logger:
            logger.info("loading crossencoder model")
        with open(args.crossencoder_config) as json_file:
            crossencoder_params = json.load(json_file)
            crossencoder_params["path_to_model"] = args.crossencoder_model
        crossencoder = load_crossencoder(crossencoder_params)

    # load candidate entities
    if logger:
        logger.info("loading candidate entities")
    (
        candidate_encoding,
        title2id,
        id2title,
        id2text,
        wikipedia_id2local_id,
        faiss_indexer,
    ) = _load_candidates(
        args.entity_catalogue,
        args.entity_encoding,
        faiss_index=getattr(args, 'faiss_index', None),
        index_path=getattr(args, 'index_path', None),
        logger=logger,
    )

    return (
        biencoder,
        biencoder_params,
        crossencoder,
        crossencoder_params,
        candidate_encoding,
        title2id,
        id2title,
        id2text,
        wikipedia_id2local_id,
        faiss_indexer,
    )