def extract_negative_mappings(self):
        """
        sample negative pairings from entities
        :param mappings: positive mappings
        :param entities: entities grouped by kb
        :return:
        """
        for kb_names, kb_training_data in self.umls_training_data.items():

            # Format file names
            kb1_fname = 'kb-{}.json'.format(kb_names[0])
            kb2_fname = 'kb-{}.json'.format(kb_names[1])
            training_fname = '{}-{}.tsv'.format(kb_names[0], kb_names[1])

            kb1_path = os.path.join(self.OUTPUT_KB_DIR, kb1_fname)
            kb2_path = os.path.join(self.OUTPUT_KB_DIR, kb2_fname)
            training_path = os.path.join(self.OUTPUT_DIR, 'training',
                                         training_fname)

            # initialize KBs
            s_kb = KnowledgeBase()
            t_kb = KnowledgeBase()

            # load KBs
            sys.stdout.write("\tLoading %s and %s\n" % kb_names)
            s_kb = s_kb.load(kb1_path)
            t_kb = t_kb.load(kb2_path)

            # sample negatives using candidate selection module
            sys.stdout.write("\t\tSampling negatives between %s and %s\n" %
                             kb_names)
            neg_mappings = self.sample_negative_mappings(
                s_kb, t_kb, kb_training_data)

            # write negative mappings to training data file
            if neg_mappings:
                # write positive and negative training mappings to disk
                self.write_mapping_to_file(training_path,
                                           kb_training_data + neg_mappings)

                # append kb pair to done file
                with open(self.done_file, 'a') as outf:
                    outf.write('%s\n' % training_path)
        return
Пример #2
0
    def load_kb(kb_path) -> KnowledgeBase:
        """
        Load KnowledgeBase specified at kb_path
        :param kb_path: path to knowledge base
        :return:
        """
        sys.stdout.write("\tLoading %s...\n" % kb_path)

        assert kb_path is not None
        assert kb_path != ''

        kb_name = os.path.basename(kb_path)

        kb = KnowledgeBase()

        # load kb
        if kb_path.endswith('.json') or kb_path.endswith(
            '.pickle'
        ) or kb_path.endswith('.pkl'):
            kb = kb.load(kb_path)
        elif kb_path.endswith('.obo') or kb_path.endswith('.OBO'):
            kb = KBLoader.import_obo_kb(kb_name, kb_path)
        elif kb_path.endswith('.owl') or kb_path.endswith('.rdf') or \
            kb_path.endswith('.OWL') or kb_path.endswith('.RDF'):
            kb = KBLoader.import_owl_kb(kb_name, kb_path)
        elif kb_path.endswith('.msh'):
            kb = KBLoader.load_mesh(kb_name, kb_path)
        elif kb_path.endswith('.nci'):
            kb = KBLoader.load_nci(kb_name, kb_path)
        elif kb_path.endswith('.ttl') or kb_path.endswith('.n3'):
            sys.stdout.write('This program cannot parse your file type.\n')
            raise NotImplementedError()
        else:
            val = URLValidator()
            try:
                val(kb_path)
            except ValidationError:
                raise

            response = requests.get(kb_path, stream=True)
            response.raise_for_status()
            temp_file = 'temp_file_ontoemma.owl'
            with open(temp_file, 'wb') as outf:
                for block in response.iter_content(1024):
                    outf.write(block)
            kb = KBLoader.import_owl_kb('', temp_file)
            os.remove(temp_file)

        sys.stdout.write("\tEntities: %i\n" % len(kb.entities))

        return kb
Пример #3
0
    def import_kb(kb_name, kb_filename):
        """
        Returns a KnowledgeBase object loaded from kb_filename. The KB
        must be one of the supported one below.
        :param kb_name:
        :param kb_filename:
        :return:
        """
        # if needed, copy the file locally and update kb_filename.
        delete_local_copy = False
        if kb_filename.startswith('s3'):
            delete_local_copy = True
            kb_filename = file_util.cache_file(kb_filename)

        kb = None
        if kb_name in {
                KBLoader.SEQUENCE_ONTOLOGY, KBLoader.NCBI_TAXONOMY,
                KBLoader.CHEBI_TAXONOMY, KBLoader.GO_TAXONOMY,
                KBLoader.PR_TAXONOMY, KBLoader.CL_TAXONOMY,
                KBLoader.UNK_OBO_TAXONOMY
        }:
            kb = KBLoader.import_obo_kb(kb_name, kb_filename)
        elif kb_name == KBLoader.MESH_TAXONOMY:
            kb = KBLoader.import_mesh(kb_name, kb_filename)
        elif kb_name == KBLoader.DBPEDIA:
            kb = KBLoader.import_dbpedia(kb_name, kb_filename)
        elif kb_name == KBLoader.MERGED:
            kb = KnowledgeBase.load(kb_filename)
        else:
            raise LookupError("Unknown kb_name: {}".format(kb_name))

        # remove the local copy of the raw kb file(s).
        if delete_local_copy:
            os.remove(kb_filename)

        # return the imported kb.
        assert (kb is not None)
        return kb