Пример #1
0
     '../data/UniprotKB/graph.json',
     '../data/GO/graph.json',
     '../data/PharmGKB/graph.json',
     # '../data/PubMed/graph.json',
 ]
 # Fusion
 print('[INFO] Network fusion')
 for graph in graphs:
     print('[INFO] Add network', graph)
     with io.open(graph, 'r', encoding='utf-8', newline='') as f:
         g = json.loads(f.read())
         network.load_from_dict(g)
 # Mapping
 print('[INFO] Add disease mappings')
 all_disease_ids = set()
 for node in network.get_nodes_by_label('Disease'):
     all_disease_ids.update(node.ids)
 for disease_id in all_disease_ids:
     mapped_ids, mapped_names = mondo_mapper.map_from(disease_id)
     if mapped_ids:
         network.add_node(Disease(mapped_ids, mapped_names))
 # Cleanup
 print('[INFO] Prune network')
 network.prune()
 print('[INFO] Merge duplicate node names')
 merge_duplicate_node_names(network)
 print('[INFO] Merge duplicate edges')
 network.merge_duplicate_edges()
 # Export
 print('[INFO] Export network')
 directory_utils.create_clean_directory(config['output-path'])
Пример #2
0
def save_network(network: Network, config: Dict):
    output_path = config['output-path']
    # Save nodes
    node_import_files = []
    for label in network.node_labels():
        file_name = 'nodes_%s.csv' % label.replace(';', '_')
        nodes = set(network.get_nodes_by_label(label))
        if len(nodes) > 0:
            node_import_files.append(file_name)
            with io.open(os.path.join(output_path, file_name),
                         'w',
                         encoding='utf-8',
                         newline='') as f:
                writer = csv.writer(f, delimiter=',', quotechar='"')
                all_attribute_keys = set()
                for n in nodes:
                    all_attribute_keys.update(n.attributes.keys())
                all_attribute_keys = sorted(all_attribute_keys)
                writer.writerow([
                    'label_id:ID(Node-ID)', '_id:string', 'ids:string[]',
                    'names:string[]'
                ] + ['%s:string' % x for x in all_attribute_keys] + [':LABEL'])
                for n in nodes:
                    row = [
                        n.label_id, n.id, ';'.join(n.ids), ';'.join(n.names)
                    ]
                    for key in all_attribute_keys:
                        row.append(n.attributes[key] if key in
                                   n.attributes else None)
                    row.append(n.label)
                    writer.writerow(row)

    edge_metadata = {
        'HAS_MOLECULAR_FUNCTION': [['source:string', 'pmid:string'],
                                   ['source',
                                    'pmid']],  # pmid int now not string
        'BELONGS_TO_BIOLOGICAL_PROCESS': [['source:string', 'pmid:string'],
                                          ['source', 'pmid']],
        'IN_CELLULAR_COMPONENT': [['source:string', 'pmid:string'],
                                  ['source', 'pmid']],
        'INDICATES': [['source:string'], ['source']],
        'REGULATES': [['source:string', 'pmid:string'], ['source', 'pmid']],
        'TRANSCRIBES': [['source:string'], ['source']],
        'CONTRAINDICATES': [['source:string'], ['source']],
        'INDUCES': [['source:string'], ['source']],
        'CODES': [['source:string', 'pmid:int'], ['source', 'pmid']],
        'EQTL': [[
            'source:string', 'pvalue:string', 'snp_chr:string',
            'cis_trans:string'
        ], ['source', 'pvalue', 'snp_chr', 'cis_trans']],
        'INTERACTS': [['source:string', 'description:string'],
                      ['source', 'description']],
        'TARGETS': [[
            'source:string', 'known_action:boolean', 'actions:string[]',
            'simplified_action:string'
        ],
                    [
                        'source', lambda attr:
                        ('true' if attr['known_action'] else 'false')
                        if 'known_action' in attr else None,
                        lambda attr: ';'.join(attr['actions']),
                        'simplified_action'
                    ]],
        'ASSOCIATES_WITH':
        [['source:string', 'num_pmids:int', 'num_snps:int', 'score:string'],
         ['source', 'num_pmids', 'num_snps', 'score']],
        'HAS_ADR': [['source:string'], ['source']],
        'ASSOCIATED_WITH_ADR': [['source:string'], ['source']]
    }

    # Save relationships
    for x in edge_metadata:
        with io.open(os.path.join(output_path, 'rel_%s.csv' % x),
                     'w',
                     encoding='utf-8',
                     newline='') as f:
            writer = csv.writer(f, delimiter=',', quotechar='"')
            writer.writerow([':START_ID(Node-ID)'] + edge_metadata[x][0] +
                            [':END_ID(Node-ID)', ':TYPE'])
            for e in network.get_edges_by_label(x):
                values = []
                for l in edge_metadata[x][1]:
                    if isinstance(l, type(lambda: 0)):
                        values.append(l(e.attributes))
                    else:
                        values.append(e.attributes[l] if l in
                                      e.attributes else None)
                source_id = network.get_node_by_id(
                    e.source_node_id, e.source_node_label).label_id
                target_id = network.get_node_by_id(
                    e.target_node_id, e.target_node_label).label_id
                writer.writerow([source_id] + values + [target_id, e.label])

    with io.open(os.path.join(output_path, 'create_indices.cypher'),
                 'w',
                 encoding='utf-8',
                 newline='') as f:
        unique_labels = set()
        for node_label in network.node_labels():
            unique_labels.update(set(node_label.split(';')))
        for node_label in unique_labels:
            f.write('create constraint on (p:%s) assert p._id is unique;\n' %
                    node_label)
    with io.open(os.path.join(output_path, 'import_admin.bat'),
                 'w',
                 encoding='utf-8',
                 newline='') as f:
        f.write('@echo off\n')
        f.write('net stop neo4j\n')
        f.write('rmdir /s "%s"\n' %
                os.path.join(config['Neo4j']['database-path'],
                             config['Neo4j']['database-name']))
        f.write('CALL ' +
                os.path.join(config['Neo4j']['bin-path'], 'neo4j-admin'))
        f.write(' import ' +
                '--database %s ' % config['Neo4j']['database-name'] +
                ' '.join(['--nodes %s' % x
                          for x in node_import_files]) + ' ' + ' '.join([
                              '--relationships rel_%s.csv' % x
                              for x in network.edge_labels()
                          ]) + ' > import.log\n')
        f.write('net start neo4j\n')
        f.write(os.path.join(config['Neo4j']['bin-path'], 'cypher-shell'))
        f.write(
            ' -u %s -p %s --non-interactive < create_indices.cypher 1>> import.log 2>&1\n'
            % (config['Neo4j']['user'], config['Neo4j']['password']))
    with io.open(os.path.join(output_path, 'import_admin.sh'),
                 'w',
                 encoding='utf-8',
                 newline='') as f:
        f.write(os.path.join(config['Neo4j']['bin-path'], 'neo4j-admin'))
        f.write(' import ' +
                '--database %s ' % config['Neo4j']['database-name'] +
                ' '.join(['--nodes %s' % x
                          for x in node_import_files]) + ' ' + ' '.join([
                              '--relationships rel_%s.csv' % x
                              for x in network.edge_labels()
                          ]) + ' > import.log\n')
Пример #3
0
        f.write(
            '<li><a href="#loss-distinction-section">Possible loss of distinction</a></li>\n'
        )
        f.write(
            '<li><a href="#single-id-nodes">Nodes with single IDs</a></li>\n')
        f.write('</ul>\n')
        f.write(
            '<h1><a name="indi-contra-section">Drugs indicating and contraindicating same disease:</a></h1>\n'
        )
        f.write(
            '<table border="1">\n<thead>\n<tr><th>Drug</th><th>Indications</th><th>Contraindications</th></tr>\n</thead>\n'
        )
        f.write('<tbody>\n')
        drug_count = 0
        drug_check_failed_count = 0
        for drug in network.get_nodes_by_label('Drug'):
            drug_count += 1
            indications = {
                x.target_node_id
                for x in network.get_node_edges_by_label(drug, 'INDICATES')
            }
            contraindications = {
                x.target_node_id
                for x in network.get_node_edges_by_label(
                    drug, 'CONTRAINDICATES')
            }
            if not indications.isdisjoint(contraindications):
                drug_check_failed_count += 1

                for intersection in indications.intersection(
                        contraindications):