Exemplo n.º 1
0
    def __init__(self):
        '''
        Constructor
        '''

        # VARIABLES
        self.umls_hpo2umls_d = {}
        self.umls_mesh2umls_d = {}
        self.inferred_hpo2mesh_d = {}
        self.inferred_mesh2hpo_d = {}
        self.hp2name_d = {}
        self.mesh2name_d = {}

        # ALGORITHM
        # Create hpo2umls dict from UMLS mappings
        for line in open(
                '/home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/umls-hpo-2016aa.tsv',
                'r').readlines():
            if line.startswith('CUI'):
                continue
            line_l = line.strip('\n').split('\t')
            cui = line_l[0]
            hp_code = line_l[2]
            hp_term = line_l[3]
            util.add_elem_with_dictionary(self.umls_hpo2umls_d, hp_code, cui)
            self.hp2name_d[hp_code] = hp_term

            #print('{}\t{}\t{}'.format(cui,hp_code,hp_term))

        # Create mesh2umls dict from UMLS mappings
        for line in open(
                '/home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/umls-mesh-2016aa.tsv',
                'r').readlines():
            if line.startswith('CUI'):
                continue
            line_l = line.strip('\n').split('\t')
            cui = line_l[0]
            mesh_code = line_l[2]
            mesh_term = line_l[3]
            util.add_elem_with_dictionary(self.umls_mesh2umls_d, mesh_code,
                                          cui)
            self.mesh2name_d[mesh_code] = mesh_term

            #print('{}\t{}\t{}'.format(cui,mesh_code,mesh_term))

        # Infer and create hpo2mesh dict
        for hp in self.umls_hpo2umls_d:
            hpCuis_s = set(self.umls_hpo2umls_d[hp])
            for mesh in self.umls_mesh2umls_d:
                meshCuis_s = set(self.umls_mesh2umls_d[mesh])
                if len(hpCuis_s) > len(meshCuis_s) or len(hpCuis_s) == len(
                        meshCuis_s):
                    resta = hpCuis_s - meshCuis_s
                else:
                    resta = meshCuis_s - hpCuis_s
                if len(resta) == 0:
                    util.add_elem_with_dictionary(self.inferred_hpo2mesh_d, hp,
                                                  mesh)
                    util.add_elem_with_dictionary(self.inferred_mesh2hpo_d,
                                                  mesh, hp)
Exemplo n.º 2
0
def map_do_orpha2do(path):
    '''
        Function to map disease identifiers from Orphanumber to DOID using DO extracted mappings
        :param file: /home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/orphanet-disease-symptom.tsv
        :return file: /home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/do-orpha2do-mappings.tsv
        :return file: /home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/orphanet-do-doid_orphanum-hp.tsv
        :return file: /home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/orphanet-diseases-do-orpha2do.tsv
    '''

    # IN
    orphaDisPhe_p = '/home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/orphanet-disease-symptom.tsv'
    with open('{}'.format(orphaDisPhe_p), 'r') as orphaDisPhe_f:
        orphaDisPhe_l = orphaDisPhe_f.readlines()
    orphaDisPhe_f.close()

    # OUT
    rd_sym_f = open("{}orphanet-do-doid_orphanum-hp.tsv".format(path), 'w')
    rd_sym_f.write(
        'orphanet_code\tdo_do:orphanet_cardinality\tdo_do_mapping\torphanet_term\thp_code\thp_term\n'
    )
    rd_mappings_f = open("{}do-orpha2do-mappings.tsv".format(path, ), 'w')
    rd_mappings_f.write("orphanet\tdo\n")
    orpha_mappings_f = open(
        "{}orphanet-diseases-do-orpha2do.tsv".format(path, ), 'w')
    orpha_mappings_f.write('orphanumber\tdoid\n')

    # VARIABLES
    orpha2do_dct = {}
    do_orpha_dct = {}
    orpha_orpha_dct = {}

    # ALGORITHM
    # import do doid2orpha mappings dictionary
    do_owl = "/home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/doid.owl"
    do_term = do.term(do_owl)
    do_do2orpha_dct = do_term.do2orpha

    # define reverse mappings, orpha to do
    for doid, orphanumber_l in do_do2orpha_dct.items():
        for orpha in orphanumber_l:
            util.add_elem_with_dictionary(orpha2do_dct, orpha, doid)
            do_orpha_dct[orpha] = 1

    # write down the do mappings orpha2do
    for orpha in orpha2do_dct:
        for key in orpha2do_dct[orpha].keys():
            rd_mappings_f.write("{}\t{}\n".format(orpha, key))

    # map orphanet rare diseases from orphanumber to doid
    for line in orphaDisPhe_l:
        if line.startswith('orphanet_code'):
            continue
        line_l = line.strip('\n').split('\t')
        orpha_id = line_l[0]
        #------------------------------------------------ orpha_name = line_l[1]
        #----------------------------------------------------- hp_id = line_l[2]
        #--------------------------------------------------- hp_name = line_l[3]
        orpha_orpha_dct[orpha_id] = 1
        if not orpha_id in orpha2do_dct:
            mapping_cardinality = 0
            disease_id = orpha_id
            rd_sym_f.write('{}\t1:{}\t{}\t{}\t{}\t{}\n'.format(
                line_l[0], mapping_cardinality, disease_id, line_l[1],
                line_l[2], line_l[3]))
        else:
            doid_l = list(orpha2do_dct[orpha_id].keys())
            mapping_cardinality = len(doid_l)
            if len(doid_l) > 1:
                print('orpha_id: {} has more than one doid'.format(orpha_id))
            for disease_id in doid_l:
                rd_sym_f.write('{}\t1:{}\t{}\t{}\t{}\t{}\n'.format(
                    line_l[0], mapping_cardinality, disease_id, line_l[1],
                    line_l[2], line_l[3]))

    for orpha in orpha_orpha_dct:
        if orpha in orpha2do_dct:
            for doid in orpha2do_dct[orpha].keys():
                orpha_mappings_f.write('{}\t{}\n'.format(orpha, doid))

    # close files
    rd_sym_f.close()
    rd_mappings_f.close()
    orpha_mappings_f.close()

    do_orpha_set = set(do_orpha_dct.keys())
    orpha_orpha_set = set(orpha_orpha_dct.keys())
    print("")
    print('From do, "xref" mappings DO to Orphanet:')
    print('orphanet_orpha: {}; do_orpha: {} OrphaNumbers'.format(
        len(orpha_orpha_set), len(do_orpha_set)))
    commons = list(do_orpha_set & orpha_orpha_set)
    differs = list(orpha_orpha_set - do_orpha_set)
    print('mapped: {}'.format(len(commons)))
    commons_h = commons[0:4]
    print('mapped: {}'.format(commons_h))
    print('not mapped: {}'.format(len(differs)))
    differs_h = differs[0:4]
    print('not mapped: {}'.format(differs_h))
Exemplo n.º 3
0
def get_hpo_mesh_mappings():
    '''
    Function to infer hpo2mesh and mesh2hpo mappings from the UMLS metathesaurus
    :return: hpo2mesh_dict
    :return: mesh2hpo_dict
    :return: hp2name_dict
    :return: mesh2name_dict
    '''

    # VARIABLES
    umls_hpo2umls_d = {}
    umls_mesh2umls_d = {}
    inferred_hpo2mesh_d = {}
    inferred_mesh2hpo_d = {}
    hp2name_d = {}
    mesh2name_d = {}

    # ALGORITHM
    # Create hpo2umls dict from UMLS mappings
    for line in open(
            '/home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/umls-hpo-2016aa.tsv',
            'r').readlines():
        if line.startswith('CUI'):
            continue
        line_l = line.strip('\n').split('\t')
        cui = line_l[0]
        hp_code = line_l[2]
        hp_term = line_l[3]
        util.add_elem_with_dictionary(umls_hpo2umls_d, hp_code, cui)
        hp2name_d[hp_code] = hp_term

        #print('{}\t{}\t{}'.format(cui,hp_code,hp_term))

    # Create mesh2umls dict from UMLS mappings
    for line in open(
            '/home/nuria/workspace/repurposing-hetio/rephetio-dhimmelstein/hetionet+hpo/data/umls-mesh-2016aa.tsv',
            'r').readlines():
        if line.startswith('CUI'):
            continue
        line_l = line.strip('\n').split('\t')
        cui = line_l[0]
        mesh_code = line_l[2]
        mesh_term = line_l[3]
        util.add_elem_with_dictionary(umls_mesh2umls_d, mesh_code, cui)
        mesh2name_d[mesh_code] = mesh_term

        #print('{}\t{}\t{}'.format(cui,mesh_code,mesh_term))

    # Infer and create hpo2mesh dict
    for hp in umls_hpo2umls_d:
        hpCuis_s = set(umls_hpo2umls_d[hp])
        for mesh in umls_mesh2umls_d:
            meshCuis_s = set(umls_mesh2umls_d[mesh])
            if len(hpCuis_s) > len(meshCuis_s) or len(hpCuis_s) == len(
                    meshCuis_s):
                resta = hpCuis_s - meshCuis_s
            else:
                resta = meshCuis_s - hpCuis_s
            if len(resta) == 0:
                util.add_elem_with_dictionary(inferred_hpo2mesh_d, hp, mesh)
                util.add_elem_with_dictionary(inferred_mesh2hpo_d, mesh, hp)
                print('{} {} {} {} {} {} {}'.format(resta, hp, hp2name_d[hp],
                                                    hpCuis_s, mesh,
                                                    mesh2name_d[mesh],
                                                    meshCuis_s))

    return inferred_hpo2mesh_d, inferred_mesh2hpo_d, hp2name_d, mesh2name_d