示例#1
0
 def get_auto_evidences(name, abbreviations, abbrvs_trie):
     frags = EvidenceFinder.find_names(normalize_dataset_ws(name),
                                       abbrvs_trie)
     evidences = []
     for f in frags:
         evidences.extend(abbreviations[f])
     return list(set(evidences))
示例#2
0
 def __call__(self, text):
     text = normalize_cell_ws(normalize_dataset_ws(text))
     ds = self.evidence_finder.find_datasets(text)
     ts = self.evidence_finder.find_tasks(text)
     ms = self.evidence_finder.find_metrics(text)
     ds -= ts
     ds -= ms
     return ts, ds, ms
示例#3
0
 def get_basic_dicts(taxonomy):
     tasks = {ts: [normalize_dataset_ws(ts)] for ts in taxonomy.tasks}
     datasets = {
         ds: EvidenceFinder.evidences_from_name(ds)
         for ds in taxonomy.datasets
     }
     metrics = {
         ms: EvidenceFinder.evidences_from_name(ms)
         for ms in taxonomy.metrics
     }
     return tasks, datasets, metrics
示例#4
0
    def compute_context_logprobs(self, context, noise, ms_noise, ts_noise,
                                 logprobs, axes_logprobs):
        if isinstance(context, str) or context is None:
            context = context or ""
            #abbrvs = self.extract_acronyms(context)
            context = normalize_cell_ws(normalize_dataset_ws(context))
            #dss = set(self.evidence_finder.find_datasets(context)) | set(abbrvs.keys())
            dss = self.evidence_finder.find_datasets(context)
            mss = self.evidence_finder.find_metrics(context)
            tss = self.evidence_finder.find_tasks(context)

            dss -= mss
            dss -= tss
        else:
            tss, dss, mss = context

        dss = {normalize_cell(ds): count for ds, count in dss.items()}
        mss = {normalize_cell(ms): count for ms, count in mss.items()}
        tss = {normalize_cell(ts): count for ts, count in tss.items()}
        ###print("dss", dss)
        ###print("mss", mss)
        dss = self._numba_extend_dict(dss)
        mss = self._numba_extend_dict(mss)
        tss = self._numba_extend_dict(tss)

        key = (self._hash_counter(tss), self._hash_counter(dss),
               self._hash_counter(mss), noise, ms_noise, ts_noise)
        if key not in self.logprobs_cache:
            lp, alp = compute_logprobs(
                self._taxonomy, self._taxonomy_tasks, self._taxonomy_datasets,
                self._taxonomy_metrics, self.reverse_merged_p,
                self.reverse_metrics_p, self.reverse_tasks_p, dss, mss, tss,
                noise, ms_noise, ts_noise, self.ds_pb, self.ms_pb, self.ts_pb,
                self.max_repetitions)
            self.logprobs_cache[key] = (lp, alp)
        else:
            lp, alp = self.logprobs_cache[key]
        logprobs += lp
        axes_logprobs[0] += alp[0]
        axes_logprobs[1] += alp[1]
        axes_logprobs[2] += alp[2]
示例#5
0
 def evidences_from_name(key):
     x = normalize_dataset_ws(key)
     y = [w for w in x.split() if w not in manual_dicts.stop_words]
     return [x] + y if len(y) > 1 else [x]