Python Network.add_node示例，model.network.Network.add_node Python示例

示例#1

0

显示文件

文件： fusion.py 项目： AstrorEnales/GenCoNet

 # Fusion
 print('[INFO] Network fusion')
 for graph in graphs:
     print('[INFO] Add network', graph)
     with io.open(graph, 'r', encoding='utf-8', newline='') as f:
         g = json.loads(f.read())
         network.load_from_dict(g)
 # Mapping
 print('[INFO] Add disease mappings')
 all_disease_ids = set()
 for node in network.get_nodes_by_label('Disease'):
     all_disease_ids.update(node.ids)
 for disease_id in all_disease_ids:
     mapped_ids, mapped_names = mondo_mapper.map_from(disease_id)
     if mapped_ids:
         network.add_node(Disease(mapped_ids, mapped_names))
 # Cleanup
 print('[INFO] Prune network')
 network.prune()
 print('[INFO] Merge duplicate node names')
 merge_duplicate_node_names(network)
 print('[INFO] Merge duplicate edges')
 network.merge_duplicate_edges()
 # Export
 print('[INFO] Export network')
 directory_utils.create_clean_directory(config['output-path'])
 with io.open(os.path.join(config['output-path'], 'graph.json'),
              'w',
              encoding='utf-8',
              newline='') as f:
     f.write(json.dumps(network.to_dict(), separators=(',', ':')))

示例#2

0

显示文件

        reader = csv.reader(f, delimiter='\t', quotechar='"')
        next(reader, None)
        for row in reader:
            if row[3] == 'H**o sapiens' and row[6] == 'H**o sapiens' and float(
                    row[7]) > 0.9:
                interactor_a_name = row[1]
                interactor_a_type = row[2]
                interactor_b_name = row[4]
                interactor_b_type = row[5]
                interactor_a = add_rna(interactor_a_name, interactor_a_type,
                                       node_lookup)
                interactor_b = add_rna(interactor_b_name, interactor_b_type,
                                       node_lookup)

                if interactor_a is not None and interactor_b is not None:
                    if interactor_a_type == 'mRNA':
                        gene = Gene([interactor_a.id], [])
                        network.add_node(gene)
                        e = Edge(gene, interactor_a, 'TRANSCRIBES', {})
                        network.add_edge(e)
                    elif interactor_b_type == 'mRNA':
                        gene = Gene([interactor_b.id], [])
                        network.add_node(gene)
                        e = Edge(gene, interactor_b, 'TRANSCRIBES', {})
                        network.add_edge(e)
                    e = Edge(interactor_a, interactor_b, 'REGULATES',
                             {'source': 'RNAInter'})
                    network.add_edge(e)

    network.save('../data/RNAInter/graph.json')

示例#3

0

显示文件

 mirna_rnacentral_id = mirna_rnacentral[1]
 mirna_hgnc_id = 'None'
 with io.open(mirna_mapping_file, 'r', encoding='utf-8',
              newline='') as mm:
     mirna_mapping_reader = csv.reader(mm, delimiter='\t')
     next(mirna_mapping_reader, None)
     for mirna_mapping_row in mirna_mapping_reader:
         if mirna_mapping_row[0] == mirna_rnacentral_id:
             mirna_hgnc_id = mirna_mapping_row[2]
             break
 mirna_name = re.split('[" ]', row[4])
 mirna_name = mirna_name[4]
 if mirna_hgnc_id != 'None':
     mirna = MiRNA([mirna_rnacentral_id, mirna_hgnc_id],
                   [mirna_name])
     network.add_node(mirna)
 else:
     mirna = MiRNA([mirna_rnacentral_id], [mirna_name])
     network.add_node(mirna)
 # genes
 gene_ensembl = row[1].split(':')
 gene_ensembl_id = gene_ensembl[1]
 gene_hgnc_id = 'None'
 with io.open(gene_mapping_file, 'r', encoding='utf-8',
              newline='') as gm:
     gene_mapping_reader = csv.reader(gm, delimiter='\t')
     next(gene_mapping_reader, None)
     for gene_mapping_row in gene_mapping_reader:
         if gene_mapping_row[2] == gene_ensembl_id:
             gene_hgnc_id = 'HGNC:' + gene_mapping_row[1]
             break

示例#4

0

显示文件

def get_given_drugs_related_info(disease_pairs, drugs):   # first disease pair with first drug array
    all_networks = []   # contains an array for each disease pair
    for index, disease_pair in enumerate(disease_pairs):
        networks_per_drug = []  # contains a network for each drug
        pair_drugs_ids = drugs[index]
        temp_id1 = disease_pair[0].replace(':', '-')
        temp_id2 = disease_pair[1].replace(':', '-')
        path = '../analysis/disease_pairs/' + temp_id1 + '_' + temp_id2
        for drug_id in pair_drugs_ids:
            try:
                os.mkdir(path)
            except FileExistsError:
                pass

            network = Network()
            d1 = Disease([disease_pair[0]], [])
            network.add_node(d1)
            d2 = Disease([disease_pair[1]], [])
            network.add_node(d2)
            drug = Drug([drug_id], [])
            network.add_node(drug)
            temp_drug_id = drug_id.replace(':', '-')
            with io.open(path + '/' + temp_id1 + '_' + temp_id2 + '_' + temp_drug_id + '_results.txt', 'w', encoding='utf-8', newline='') as results_file:
                results_file.write('In this file all information about the connection between ' + disease_pair[0] +
                                   ' and ' + disease_pair[1] + ' and the drug ' + drug_id + ' is summarized:\n')

                # the drug INDICATES, CONTRAINDICATES or INDUCES the disease
                query = """ MATCH (d:Disease)-[a]-(n:Drug) WHERE {d1_id} IN d.ids AND {n_id} in n.ids RETURN distinct(type(a)) """
                d1_results = session.run(query, parameters={'d1_id': disease_pair[0], 'n_id': drug_id})
                for result in d1_results:
                    results_file.write(drug_id + ' ' + result['(type(a))'] + ' ' + disease_pair[0] + '\n')
                    network.add_edge(Edge(drug, d1, result['(type(a))'], {}))
                query = """ MATCH (d:Disease)-[a]-(n:Drug) WHERE {d2_id} IN d.ids AND {n_id} in n.ids RETURN distinct(type(a)) """
                d2_results = session.run(query, parameters={'d2_id': disease_pair[1], 'n_id': drug_id})
                for result in d2_results:
                    results_file.write(drug_id + ' ' + result['(type(a))'] + ' ' + disease_pair[1] + '\n')
                    network.add_edge(Edge(drug, d2, result['(type(a))'], {}))

                # the drug targets a gene which is associated to the disease
                d1_genes = set()
                query = """ MATCH (n:Drug)-[:TARGETS]-(g:Gene)-[:ASSOCIATES_WITH]-(d:Disease) WHERE {d1_id} IN d.ids AND {n_id} in n.ids RETURN g.`_id` """
                d1_results = session.run(query, parameters={'d1_id': disease_pair[0], 'n_id': drug_id})
                for gene in d1_results:
                    d1_genes.add(gene['g.`_id`'])
                    g = Gene([gene['g.`_id`']], [])
                    network.add_node(g)
                    network.add_edge(Edge(drug, g, 'TARGETS', {'actions': []})) #TODO
                    network.add_edge(Edge(g, d1, 'ASSOCIATES_WITH', {}))
                d2_genes = set()
                query = """ MATCH (n:Drug)-[:TARGETS]-(g:Gene)-[:ASSOCIATES_WITH]-(d:Disease) WHERE {d2_id} IN d.ids AND {n_id} in n.ids RETURN g.`_id` """
                d2_results = session.run(query, parameters={'d2_id': disease_pair[1], 'n_id': drug_id})
                for gene in d2_results:
                    d2_genes.add(gene['g.`_id`'])
                    g = Gene([gene['g.`_id`']], [])
                    network.add_node(g)
                    network.add_edge(Edge(drug, g, 'TARGETS', {'actions': []})) #TODO
                    network.add_edge(Edge(g, d2, 'ASSOCIATES_WITH', {}))

                common_drug_genes = d1_genes.intersection(d2_genes) # genes associated to the drug and both diseases
                # relevant_genes are all genes associated to at least one disease and the drug, below the common genes
                # with the most disease associated references are added
                relevant_genes = d1_genes.union(d2_genes)
                if len(d1_genes) > 0:
                    nbr = str(len(d1_genes))
                    d1_genes = str(d1_genes)
                    d1_genes = d1_genes.replace('{', '')
                    d1_genes = d1_genes.replace('}', '')
                    d1_genes = d1_genes.replace('\'', '')
                    results_file.write(drug_id + ' targets following ' + nbr + ' genes which are associated to ' + disease_pair[0] + ': ' + d1_genes + '\n')
                if len(d2_genes) > 0:
                    nbr = str(len(d2_genes))
                    d2_genes = str(d2_genes)
                    d2_genes = d2_genes.replace('{', '')
                    d2_genes = d2_genes.replace('}', '')
                    d2_genes = d2_genes.replace('\'', '')
                    results_file.write(drug_id + ' targets following ' + nbr + ' genes which are associated to ' + disease_pair[1] + ': ' + d2_genes + '\n')
                if len(common_drug_genes) > 0:
                    nbr = str(len(common_drug_genes))
                    cdgs = str(common_drug_genes)
                    cdgs = cdgs.replace('{', '')
                    cdgs = cdgs.replace('}', '')
                    cdgs = cdgs.replace('\'', '')
                    results_file.write('The disease pair has ' + nbr + ' common genes which are targeted by the drug: ' + cdgs + '\n')

                # add the common genes with the most disease associated references
                # no given num_pmids is similar to num_pmids = 0
                all_d1_genes, all_d2_genes = get_genes(disease_pair)
                all_common_genes = all_d1_genes.intersection(all_d2_genes)
                relevant_common_genes = []  # the genes with the most cited gene-disease association, threshold 10
                if len(all_common_genes) > 0:
                    results_file.write('The disease pair has ' + str(len(all_common_genes)) + ' common genes, not considering the connection to the drug.'
                                        ' Following genes have the most references regarding their connection to both diseases:\n')
                    for gene in all_common_genes:
                        query = """ MATCH (d1:Disease)-[a]-(g:Gene) WHERE {g_id} IN g.ids AND {d1_id} IN d1.ids RETURN a.num_pmids """
                        results = session.run(query, parameters={'g_id': gene, 'd1_id': disease_pair[0]})
                        num_pmids = 0
                        for result in results:  # multiple edges to the same gene
                            temp = result['a.num_pmids']
                            if temp is not None:
                                num_pmids = num_pmids + temp
                        query = """ MATCH (d2:Disease)-[a]-(g:Gene) WHERE {g_id} IN g.ids AND {d2_id} IN d2.ids RETURN a.num_pmids """
                        results = session.run(query, parameters={'g_id': gene, 'd2_id': disease_pair[1]})
                        for result in results:  # multiple edges to the same gene
                            temp = result['a.num_pmids']
                            if temp is not None:
                                num_pmids = num_pmids + temp
                        relevant_common_genes.append([gene, num_pmids])
                    # sort by number of pmids
                    relevant_common_genes = sorted(relevant_common_genes, key=lambda item: item[1], reverse=True)
                    relevant_common_genes = relevant_common_genes[:10]  # threshold
                    rcgs = str(relevant_common_genes)
                    rcgs = rcgs[1:-1]
                    rcgs = rcgs.replace('\'', '')
                    results_file.write(rcgs + '\n')
                    for g in relevant_common_genes:
                        gene = Gene([g[0]], [])
                        network.add_node(gene)
                        network.add_edge(Edge(gene, d1, 'ASSOCIATES_WITH', {}))
                        network.add_edge(Edge(gene, d2, 'ASSOCIATES_WITH', {}))
                        relevant_genes.add(g[0])

                # add the common disease associated variants with most references
                # no given num_pmids is similar to num_pmids = 0
                disease_variants = {}
                query = """ MATCH (d1:Disease)-[a]-(v:Variant)--(d2:Disease) WHERE {d1_id} in d1.ids AND {d2_id} in d2.ids RETURN distinct(a.num_pmids), v.`_id` """
                results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
                for variant in results:
                    num_pmids = variant['(a.num_pmids)']
                    if num_pmids is None:
                        num_pmids = 0
                    var_id = variant['v.`_id`']
                    if var_id in disease_variants:
                        temp = disease_variants[var_id]
                        disease_variants[var_id] = temp + num_pmids
                    else:
                        disease_variants[var_id] = num_pmids
                query = """ MATCH (d2:Disease)-[a]-(v:Variant)--(d1:Disease) WHERE {d1_id} in d1.ids AND {d2_id} in d2.ids RETURN distinct(a.num_pmids), v.`_id` """
                results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
                for variant in results:
                    num_pmids = variant['(a.num_pmids)']
                    if num_pmids is None:
                        num_pmids = 0
                    var_id = variant['v.`_id`']
                    if var_id in disease_variants:
                        temp = disease_variants[var_id]
                        disease_variants[var_id] = temp + num_pmids
                    else:
                        disease_variants[var_id] = num_pmids
                dvs = ''
                i = 0
                for key, value in sorted(disease_variants.items(), key=lambda item: item[1], reverse=True):
                    if i < 9:   # threshold
                        num_pmids = disease_variants[key]
                        variant = Variant([key], [])
                        network.add_node(variant)
                        network.add_edge(Edge(variant, d1, 'ASSOCIATES_WITH', {}))
                        network.add_edge(Edge(variant, d2, 'ASSOCIATES_WITH', {}))
                        dvs = dvs + key + ':' + str(num_pmids) + ' PMIDs, '
                        i += 1
                dvs = dvs[:-2]

                # add the gene associated variants with smallest pvalues
                # if no pvalue is given, pvalue is set to 1
                gene_variants = []
                for gene in relevant_genes:
                    query = """ MATCH (g:Gene)-[a]-(v:Variant) WHERE {g_id} in g.ids RETURN v.`_id`, a.pvalue, type(a) """
                    results = session.run(query, parameters={'g_id': gene})
                    for variant in results:
                        pvalue = variant['a.pvalue']
                        if pvalue is None:
                            pvalue = 1
                        else:
                            pvalue = float(pvalue)
                        gene_variants.append([variant['v.`_id`'] + '-' + gene, pvalue, variant['type(a)']])
                gene_variants = sorted(gene_variants, key=lambda item: item[1])
                gene_variants = gene_variants[:10]  # threshold
                for v in gene_variants:
                    temp = v[0].split('-')
                    v_id = temp[0]
                    g_id = temp[1]
                    variant = Variant([v_id], [])
                    network.add_node(variant)
                    gene = Gene([g_id], [])
                    network.add_node(gene)
                    network.add_edge(Edge(gene, variant, v[2], {'pvalue': v[1]}))
                if len(gene_variants) > 0:
                    gvs = str(gene_variants)
                    gvs = gvs[1:-1]
                    gvs = gvs.replace('\'', '')
                else:
                    gvs = ''

                if len(disease_variants) > 0 or len(gene_variants) > 0:
                    results_file.write('The disease pair has at least ' + str(i) + ' variants associated to both diseases: ' +
                                           dvs + ' and at least ' + str(len(gene_variants)) + ' gene associated variants: ' + gvs + '\n')

                # dict with RNA name as key and an array as value
                # first array position is the number of regulated genes, second position is an array with the gene names
                relevant_rnas = {}
                for gene in relevant_genes:
                    query = """ MATCH (r:RNA)--(g:Gene) WHERE {g_id} in g.ids AND NOT r.label_id CONTAINS "MRNA" return r.`_id` """
                    results = session.run(query, parameters={'g_id': gene})
                    for result in results:
                        key = result['r.`_id`']
                        if key in relevant_rnas:
                            value = relevant_rnas[key]
                            genes = value[1]
                            if gene not in genes:
                                genes.add(gene)
                                relevant_rnas[key] = [value[0] + 1, genes]
                        else:
                            genes = set()
                            genes.add(gene)
                            relevant_rnas[key] = [1, genes]

                if len(relevant_rnas) > 0:
                    i = 0
                    for key, value in sorted(relevant_rnas.items(), key=lambda item: item[1], reverse=True):
                    # sort by the number of regulated genes
                        if i > 9:   # threshold
                            break
                        elif value[0] > 1:  # only add and print RNAs which regulate more than one gene
                            if i == 0:
                                results_file.write('RNAs with the number and names of the genes they regulate: \n')
                            rna_id = key
                            for gene_id in value[1]:
                                rna = RNA([rna_id], [])
                                network.add_node(rna)
                                gene = Gene([gene_id], [])
                                network.add_node(gene)
                                network.add_edge(Edge(rna, gene, 'REGULATES', {}))
                            regulated_genes = str(value[1])
                            regulated_genes = regulated_genes[1:-1]
                            regulated_genes = regulated_genes.replace('\'', '')
                            results_file.write(rna_id + '\t' + str(value[0]) + '\t' + regulated_genes + '\n')
                            i += 1

                    # append regulating RNAs to one RNA which regulates the most genes, MRNAs are not added
                    for key, value in sorted(relevant_rnas.items(), key=lambda item: item[1], reverse=True):
                        if value[0] > 1:
                            most_relevant_rna = RNA([key], [])
                            network.add_node(most_relevant_rna)
                            query = """ MATCH (r:RNA)--(n:RNA) WHERE {r_id} in r.ids AND NOT n.label_id CONTAINS "MRNA" RETURN n.`_id`, labels(n) """
                            results = session.run(query, parameters={'r_id': key})
                            reg_rnas = ''
                            for result in results:
                                rna_id = result['n.`_id`']
                                types = result['labels(n)']
                                for type in types:
                                    if type != 'RNA':
                                        if type == 'CircRNA':
                                            rna = CircRNA([rna_id], [])
                                        if type == 'ERNA':
                                            rna = ERNA([rna_id], [])
                                        if type == 'LncRNA':
                                            rna = LncRNA([rna_id], [])
                                        if type == 'MiRNA':
                                            rna = MiRNA([rna_id], [])
                                        if type == 'NcRNA':
                                            rna = NcRNA([rna_id], [])
                                        if type == 'PiRNA':
                                            rna = PiRNA([rna_id], [])
                                        if type == 'Pseudogene':
                                            rna = Pseudogene([rna_id], [])
                                        if type == 'Ribozyme':
                                            rna = Ribozyme([rna_id], [])
                                        if type == 'RRNA':
                                            rna = RRNA([rna_id], [])
                                        if type == 'ScaRNA':
                                            rna = ScaRNA([rna_id], [])
                                        if type == 'ScRNA':
                                            rna = ScRNA([rna_id], [])
                                        if type == 'SnoRNA':
                                            rna = SnoRNA([rna_id], [])
                                        if type == 'SnRNA':
                                            rna = SnRNA([rna_id], [])
                                        network.add_node(rna)
                                        network.add_edge(Edge(rna, most_relevant_rna, 'REGULATES', {}))
                                        reg_rnas = reg_rnas + rna_id + ', '
                            reg_rnas = reg_rnas[:-2]
                            results_file.write(key + ' is the RNA which regulates the most genes in this subgraph. It is regulated by ' + reg_rnas + '.\n')
                        break
            json_file = path + '/' + temp_id1 + '_' + temp_id2 + '_' + temp_drug_id + '_graph.json'
            network.save(json_file)
            draw_drug_subgraph(json_file)
            networks_per_drug.append(network)
        all_networks.append(networks_per_drug)
    return all_networks

示例#5

0

显示文件

文件： sider.py 项目： AstrorEnales/GenCoNet

network = Network()

drug_lookup = {}
with io.open(drug_file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    for row in reader:
        drug_lookup[row[0].strip()] = row[1].strip()

# 1: STITCH compound id (flat, see above)
# 2: UMLS concept id as it was found on the label
# 3: method of detection: NLP_indication / NLP_precondition / text_mention
# 4: concept name
# 5: MedDRA concept type (LLT = lowest level term, PT = preferred term; in a few cases the term is neither LLT nor PT)
# 6: UMLS concept id for MedDRA term
# 7: MedDRA concept name

# All side effects found on the labels are given as LLT. Additionally, the PT is shown. There is at least one
# PT for every LLT, but sometimes the PT is the same as the LLT.
with io.open(file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    for row in reader:
        pubchem_id = row[0][4::].lstrip('0')
        drug = Drug(['PubChem:CID%s' % pubchem_id], [drug_lookup[row[0]]] if row[0] in drug_lookup else [])
        network.add_node(drug)
        disease = Disease(['UMLS:%s' % row[1], 'UMLS:%s' % row[5]], [row[3], row[6]])
        network.add_node(disease)
        network.add_edge(Edge(drug, disease, 'INDICATES', {'source': 'SIDER'}))

network.save('../data/SIDER/graph.json')

示例#6

0

显示文件


def value_empty(s: str) -> bool:
    return not s or s.strip() == '-'


network = Network()

with io.open(file_cis, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    for row in reader:
        if value_empty(row[1]) or value_empty(row[13]):
            continue
        variant = Variant(['dbSNP:%s' % row[1]], [])
        network.add_node(variant)
        for gene_id in row[13].split(','):
            gene = Gene(['HGNC:%s' % gene_id], [])
            network.add_node(gene)
            rel = {
                'source': 'PMID:24013639',
                'pvalue': row[0],
                'snp_chr': row[2],
                'cis_trans': row[7]
            }
            network.add_edge(Edge(gene, variant, 'EQTL', rel))

with io.open(file_trans, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    for row in reader:

示例#7

0

显示文件

network = Network()

# 0 Location
# 1 Phenotype
# 2 Phenotype MIM number
# 3 Inheritance
# 4 Phenotype mapping key
# 5 Gene/Locus
# 6 Gene/Locus MIM number
with io.open('../data/OMIM/filtered_associations.csv',
             'r',
             encoding='utf-8',
             newline='') as f:
    reader = csv.reader(f, delimiter=',', quotechar='"')
    next(reader, None)
    for row in reader:
        disease = Disease(['OMIM:%s' % row[2]], [])
        network.add_node(disease)
        gene = Gene(['HGNC:%s' % row[5]], [])  # , 'OMIM:%s' % row[6]
        network.add_node(gene)
        rel = {
            'source': 'OMIM',
            'location': row[0],
            'phenotype': row[1],
            'inheritance': row[2],
            'phenotype_mapping_key': row[4]
        }
        network.add_edge(Edge(gene, disease, 'ASSOCIATES_WITH', rel))

network.save('../data/OMIM/graph.json')

示例#8

0

显示文件

文件： mirtarbase.py 项目： ckoenigs/master_database_change

    if 'Weak' not in row[7] and row[2] == 'H**o sapiens' and row[5] == 'H**o sapiens':
        mirna_name = row[1]
        gene_hgnc_id = 'HGNC:' + row[3]
        gene_entrez_id = int(row[4])
        gene_entrez_id = 'Entrez:' + str(gene_entrez_id)
        pmid = int(row[8])
        pmid = str(pmid)

        with io.open(mirna_to_URS_mapping_file, 'r', encoding='utf-8', newline='') as mapping_file:
            mapping_reader = csv.reader(mapping_file, delimiter='\t')
            next(mapping_reader, None)
            for mapping_row in mapping_reader:
                if mirna_name == mapping_row[2]:
                    mirna_rnacentral_id = mapping_row[0]
                    mirna = MiRNA([mirna_rnacentral_id], [mirna_name])
                    network.add_node(mirna)
                    gene = Gene([gene_hgnc_id, gene_entrez_id], [])
                    network.add_node(gene)
                    if (mirna_rnacentral_id + '$' + gene_hgnc_id) in edge_source_target_lookup:
                        edges = network.get_edges_from_to(mirna, gene, 'REGULATES')
                        for edge in edges:
                            pmid = edge.attributes['pmid'] + ', ' + str(pmid)
                            network.delete_edge(edge)
                            e = Edge(mirna, gene, 'REGULATES', {'source': 'miRTarBase', 'pmid': pmid})
                            network.add_edge(e)
                            edge_source_target_lookup.append(mirna_rnacentral_id + '$' + gene_hgnc_id)
                    else:
                        e = Edge(mirna, gene, 'REGULATES', {'source': 'miRTarBase', 'pmid': pmid})
                        network.add_edge(e)
                        edge_source_target_lookup.append(mirna_rnacentral_id + '$' + gene_hgnc_id)
                    break

示例#9

0

显示文件

文件： dgidb.py 项目： AstrorEnales/GenCoNet

        f.write(response.read())

network = Network()

with io.open(file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    for row in reader:
        row = [x.strip() for x in row]
        if not row[0] or not row[7] or not row[8]:
            continue
        gene_ids = {'HGNC:%s' % row[0]}
        if row[2]:
            gene_ids.add('Entrez:%s' % row[2])
        gene = Gene(gene_ids, [])
        network.add_node(gene)
        drug_name = row[7].replace('(%s)' % row[8], '').replace(row[8],
                                                                '').strip()
        drug = Drug(['ChEMBL:%s' % row[8]], [drug_name] if drug_name else [])
        network.add_node(drug)
        rel = {
            'source': 'DGIdb,%s' % row[3],
            'actions': [row[4]],
        }
        if row[9]:
            pubmed_ids = ','.join(
                ['PMID:%s' % x for x in row[9].strip().split(',')])
            rel['source'] += ',%s' % pubmed_ids
        network.add_edge(Edge(drug, gene, 'TARGETS', rel))

network.save('../data/DGIdb/graph.json')

示例#10

0

显示文件

    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    for row in reader:
        # Only parse drugs and not drug classes for now
        if row[5] == 'Drug':
            drug_ids = {'PharmGKB:%s' % row[0]}
            drug_ids.update(process_drug_cross_references(split_list(row[6])))
            for rx_norm_id in split_list(row[21]):
                drug_ids.add('RxNorm:%s' % rx_norm_id)
            for atc_code in split_list(row[22]):
                drug_ids.add('AtcCode:%s' % atc_code)
            for compound_id in split_list(row[23]):
                drug_ids.add('PubChem:CID%s' % compound_id)
            drug = Drug(drug_ids, [row[1]])
            drug.attributes['type'] = row[5]
            network.add_node(drug)

with open_file_in_zip('../data/PharmGKB/genes.zip', 'genes.tsv') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    #  0 - PharmGKB Accession Id
    #  1 - NCBI Gene ID
    #  2 - HGNC ID
    #  3 - Ensembl Id
    #  4 - Name
    #  5 - Symbol
    #  6 - Alternate Names
    #  7 - Alternate Symbols
    #  8 - Is VIP
    #  9 - Has Variant Annotation
    # 10 - Cross-references

示例#11

0

显示文件

        reader = csv.reader(f, delimiter=',', quotechar='"')
        next(reader, None)
        for row in reader:
            snp_adrs_results.append(row)

external_id_lookup = {}
for row in external_id_results:
    external_id_lookup[row[0]] = [x for x in row[1::] if x]

network = Network()
for row in targets_results:
    drug_ids = ['DrugBank:%s' % row[0]]
    if row[0] in external_id_lookup:
        drug_ids.extend(external_id_lookup[row[0]])
    drug = Drug(drug_ids, [row[1]])
    network.add_node(drug)
    gene_ids = ['HGNC:%s' % row[2]]
    if row[4]:
        gene_ids.append(row[4])
    gene = Gene(gene_ids, [row[3]])
    network.add_node(gene)
    rel = {
        'source': 'DrugBank',
        'known_action': row[5] == 1,
        'actions': row[6].split(',') if row[6] else [],
        'simplified_action': row[7]
    }
    network.add_edge(Edge(drug, gene, 'TARGETS', rel))
for row in interactions_results:
    drug1 = Drug(['DrugBank:%s' % row[0]], [row[1]])
    network.add_node(drug1)

示例#12

0

显示文件

文件： superdrug2.py 项目： AstrorEnales/GenCoNet

    next(reader, None)
    for row in reader:
        # 0 SUPERDRUG_ID
        # 1 PREFERRED_NAME
        # 2 ATC
        # 3 CHEMBL_ID
        # 4 DRUGBANK_ID
        # 5 KEGG_ID
        # 6 PUBCHEM_CID
        # 7 CASRN
        drug_ids = []  # ['SuperDrug:%s' % row[0]]
        # if row[2] != not_available_text:
        #     drug_ids.extend(['AtcCode:%s' % x for x in row[2].split(';')])
        if row[3] != not_available_text:
            for chembl_id in {x.strip() for x in row[3].split(';')}:
                drug_ids.append('ChEMBL:%s' % chembl_id)
        if row[4] != not_available_text:
            drug_ids.append('DrugBank:%s' % row[4])
        else:
            # For now, only use mappings including DrugBank
            continue
        # if row[5] != not_available_text:
        #     drug_ids.append('Kegg:%s' % row[5])
        if row[6] != not_available_text:
            for pubchem_id in {x.strip() for x in row[6].split(';')}:
                drug_ids.append('PubChem:CID%s' % pubchem_id)
        if len(drug_ids) > 1:
            network.add_node(Drug(drug_ids, [row[1]]))

network.save('../data/SuperDrug2/graph.json')

示例#13

0

显示文件

文件： drugcentral.py 项目： AstrorEnales/GenCoNet

from model.drug import Drug
from model.disease import Disease
from model.edge import Edge

network = Network()
with io.open('../data/DrugCentral/drugcentral_mappings.csv',
             'r',
             encoding='utf-8',
             newline='') as f:
    reader = csv.reader(f, delimiter=',', quotechar='"')
    next(reader, None)
    for row in reader:
        ids = ['DrugCentral:%s' % row[0], 'DrugBank:%s' % row[1]]
        if row[2]:
            ids.append('RxNorm:%s' % row[2])
        network.add_node(Drug(ids, [row[3]]))

with io.open('../data/DrugCentral/drugcentral_indications.csv',
             'r',
             encoding='utf-8',
             newline='') as f:
    reader = csv.reader(f, delimiter=',', quotechar='"')
    next(reader, None)
    for row in reader:
        disease = Disease(['SnoMedCT:%s' % row[2],
                           'UMLS:%s' % row[3]], [row[1]])
        network.add_node(disease)
        drug = network.get_node_by_id('DrugBank:%s' % row[0], 'Drug')
        e = Edge(drug, disease, 'INDICATES', {'source': 'DrugCentral'})
        network.add_edge(e)

示例#14

0

显示文件

        #  3 - DPI
        #  4 - diseaseId
        #  5 - diseaseName
        #  6 - diseaseType
        #  7 - diseaseClass
        #  8 - diseaseSemanticType
        #  9 - score
        # 10 - EI
        # 11 - YearInitial
        # 12 - YearFinal
        # 13 - NofPmids
        # 14 - NofSnps
        # 15 - source
        if int(row[13]) >= PUBMED_COUNT_THRESHOLD:
            gene = Gene(['HGNC:%s' % row[1]], [])
            network.add_node(gene)
            disease = Disease(['UMLS:%s' % row[4]], [row[5]])
            network.add_node(disease)
            rel = {
                'source': 'DisGeNet,%s' % row[15],
                'num_pmids': int(row[13]),
                'num_snps': int(row[14]),
                'score': row[9]
            }
            network.add_edge(Edge(gene, disease, 'ASSOCIATES_WITH', rel))

with io.open('../data/DisGeNet/curated_variant_disease_associations.tsv',
             'r',
             encoding='utf-8',
             newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')