def load_train_vectors(self, embeddings_infile, prune=True):
     # load vectors
     print('Loading vectors...')
     self.train_vectors = Reach.load_fast_format(embeddings_infile)
     if prune:
         # prune embeddings to selected target ontology
         assert self.exemplar_to_concept
         self.train_vectors.prune(list(self.exemplar_to_concept.keys()))
     print(len(self.train_vectors.items), len(self.exemplar_to_concept))
Пример #2
0
    perfect = False

    gold = json.load(open("data/test_gold.json"))
    gold = list(zip(*sorted(gold.items())))[1]

    if perfect:
        data = json.load(open("data/test_gold.json"))
    else:
        data = json.load(open("data/test_uima.json"))
    data = list(zip(*sorted(data.items())))[1]

    txt, gold_bio = zip(*gold)
    _, data_bio = zip(*data)

    embeddings = Reach.load("", unk_word="UNK")
    concept_reach = Reach.load_fast_format("data/concept_vectors")
    concept_labels = json.load(open("data/concept_names2label.json"))

    gold_bio = list(chain.from_iterable(gold_bio))

    results_bio = {}

    r_phrases = compose(data,
                        f1=np.mean,
                        f2=np.mean,
                        window=0,
                        embeddings=embeddings,
                        context_function=reciprocal)

    pred_bio_focus = eval_extrinsic(list(chain.from_iterable(data_bio)),
                                    r_phrases, concept_reach, concept_labels,
Пример #3
0
    # Set this flag to true to replicate the perfect chunking setting
    # in experiment 3.
    perfect = True

    gold = json.load(open("data/test_gold.json"))
    gold = list(zip(*sorted(gold.items())))[1]

    if perfect:
        data = json.load(open("data/test_gold.json"))
    data = list(zip(*sorted(data.items())))[1]

    txt, gold_bio = zip(*gold)
    r = Reach.load("../../corpora/mimiciii-min5-neg3-w5-100.vec",
                   unk_word="<UNK>")

    r_concept = Reach.load_fast_format(f"data/concept_vectors")
    concept_labels = json.load(open("data/names2label.json"))

    grouped = defaultdict(list)
    for k, v in concept_labels.items():
        grouped[v].append(r_concept[k])

    grouped.pop("np")

    memory = {}
    for k, v in tqdm(grouped.items()):

        km = KMeans(10)
        km.fit(v)
        memory[k] = km.cluster_centers_
 def load_test_vectors(self, embeddings_infile):
     # load vectors
     print('Loading vectors...')
     self.test_vectors = Reach.load_fast_format(embeddings_infile)