Пример #1
0
    def __type_centric(self, query):
        """Type-centric TTI.

        :param query: query string
        :type query: str
        """
        types = dict()
        model = self.__config.get("model", TTI_MODEL_BM25)
        elastic = ElasticCache(
            self.__tc_config.get("index", DEFAULT_TTI_TC_INDEX))

        if model == TTI_MODEL_BM25:
            PLOGGER.info("TTI, TC, BM25")
            self.__tc_config["model"] = "bm25"
            # scorer = Scorer.get_scorer(elastic, query, self.__tc_config)
            types = Retrieval(self.__tc_config).retrieve(query)

        elif model == TTI_MODEL_LM:
            PLOGGER.debug("TTI, TC, LM")
            self.__tc_config["model"] = "lm"  # Needed for 2nd-pass
            self.__tc_config["field"] = "content"  # Needed for 2nd-pass
            self.__tc_config["second_pass"] = {"field": "content"}
            for param in ["smoothing_method", "smoothing_param"]:
                if self.__config.get(param, None) is not None:
                    self.__tc_config["second_pass"][param] = self.__config.get(
                        param)

            scorer = Scorer.get_scorer(elastic, query, self.__tc_config)
            types = Retrieval(self.__tc_config).retrieve(query, scorer)

            PLOGGER.info(types)

        return types
Пример #2
0
    def _first_pass_scoring(self, analyzed_query):
        """Returns first-pass scoring of documents.

        :param analyzed_query: analyzed query
        :return: RetrievalResults object
        """
        PLOGGER.debug("\tFirst pass scoring... ", )
        res1 = self.__elastic.search(analyzed_query, self.__first_pass_field, num=self.__first_pass_num_docs,
                                     fields_return=self.__first_pass_fields_return)
        return res1
Пример #3
0
def entity_linking():
    query = request.args.get("q", None)
    if query is None:
        return error("Query is not specified.")

    config = {
        "method": request.args.get("method", None),
        "threshold": request.args.get("threshold", 0.1)
    }
    el = EL(config, __entity, __elastic, __fcache)
    res = el.link(query)
    PLOGGER.debug(res)
    return jsonify(**res)
Пример #4
0
    def _second_pass_scoring(self, res1, scorer):
        """Returns second-pass scoring of documents.

        :param res1: first pass results
        :param scorer: scorer object
        :return: RetrievalResults object
        """
        PLOGGER.debug("\tSecond pass scoring... ", )
        for field in self.__get_fields():
            self.__elastic.multi_termvector(list(res1.keys()), field)

        res2 = {}
        for doc_id in res1.keys():
            res2[doc_id] = {"score": scorer.score_doc(doc_id), "fields": res1[doc_id].get("fields", {})}
        PLOGGER.debug("done")
        return res2
Пример #5
0
 def __get_top_n(self, fields_freq, n):
     """Sorts fields and returns top-n."""
     sorted_fields = sorted(fields_freq.items(),
                            key=lambda item: (item[1], item[0]),
                            reverse=True)
     top_fields = dict()
     i = 0
     for field, freq in sorted_fields:
         if i >= n:
             break
         if field in self.__fsdm_fields:
             continue
         i += 1
         top_fields[field] = freq
         if self.DEBUG:
             print("(" + field + ", " + str(freq) + ")")
     if self.DEBUG:
         PLOGGER.debug("\nNumber of fields:", len(top_fields), "\n")
     return top_fields
Пример #6
0
    def get_scorer(elastic, query, config):
        """Returns Scorer object (Scorer factory).

        :param elastic: Elastic object
        :param query: raw query (to be analyzed)
        :param config: dict with models parameters
        """
        model = config.get("model", None)
        if model == "lm":
            PLOGGER.debug("\tLM scoring ... ")
            return ScorerLM(elastic, query, config)
        elif model == "mlm":
            PLOGGER.debug("\tMLM scoring ...")
            return ScorerMLM(elastic, query, config)
        elif model == "prms":
            PLOGGER.debug("\tPRMS scoring ...")
            return ScorerPRMS(elastic, query, config)
        elif model is None:
            return None
        else:
            raise Exception("Unknown model " + model)