continue if source_uniprot not in nodes: source = { "name": "uniprot:" + source_uniprot, "alt_accession": "", "tax_id": "taxid:99284", "pathways": "", "aliases": "", "topology": "", } nodes[source_uniprot] = source nodes[source_uniprot]["id"] = db_api.insert_node(source) if target_uniprot not in nodes: target = { "name": "uniprot:" + target_uniprot, "alt_accession": "", "tax_id": "taxid:99284", "pathways": "", "aliases": "", "topology": "", } nodes[target_uniprot] = target nodes[target_uniprot]["id"] = db_api.insert_node(target)
# skipping header mapFile.readline() for line in mapFile: uniprot, salmonella = line.strip().split("\t") salmonella_nodes[salmonella] = salmonella_node ={ 'name': "uniprot:" + uniprot, 'alt_accession': "gene symbol:" + salmonella, 'tax_id': "taxid:99284", 'pathways': "", 'aliases': "", 'topology': "" } salmonella_nodes[salmonella]["id"] = db_api.insert_node(salmonella_node) with open(args.source_file) as source_file: # skipping the header source_file.readline() for line in source_file: salm_node_acc, human_node_acc = line.strip().split(';') dictionary_cursor.execute(""" SELECT DISTINCT uniprot.accession FROM uniprot JOIN foreign_ids on uniprot.id == foreign_ids.uniprot_id WHERE foreign_ids.accession = ? AND uniprot.is_swissprot = 1 """, (human_node_acc,))
if source_uniprot not in nodes: source = { 'name' : "uniprot:"+source_uniprot, 'alt_accession' : "", 'tax_id' : "taxid:99284", 'pathways' : "", 'aliases' : "", 'topology' : "" } nodes[source_uniprot] = source nodes[source_uniprot]["id"] = db_api.insert_node(source) if target_uniprot not in nodes: target = { 'name' : "uniprot:"+target_uniprot, 'alt_accession' : "", 'tax_id' : "taxid:99284", 'pathways' : "", 'aliases' : "", 'topology' : "" } nodes[target_uniprot] = target nodes[target_uniprot]["id"] = db_api.insert_node(target)
salmonella_uniprot, salmonella_gene_symbol, human_uniprot, human_gene_symbol = linearr if salmonella_uniprot not in nodes: salmonella_node = { 'name': "uniprot:" + salmonella_uniprot, 'alt_accession': "gene symbol:" + salmonella_gene_symbol, 'tax_id': "taxid:99284", 'pathways': "", 'aliases': "", 'topology': "" } nodes[salmonella_uniprot] = salmonella_node nodes[salmonella_uniprot]["id"] = db_api.insert_node( salmonella_node) if human_uniprot not in nodes: human_node = { 'name': "uniprot:" + human_uniprot, 'alt_accession': "gene symbol:" + human_gene_symbol, 'tax_id': "taxid:9606", 'pathways': "", 'aliases': "", 'topology': "" } nodes[human_uniprot] = human_node nodes[human_uniprot]["id"] = db_api.insert_node(human_node)
# skipping header mapFile.readline() for line in mapFile: uniprot, salmonella = line.strip().split("\t") salmonella_nodes[salmonella] = salmonella_node = { 'name': "uniprot:" + uniprot, 'alt_accession': "gene symbol:" + salmonella, 'tax_id': "taxid:99284", 'pathways': "", 'aliases': "", 'topology': "" } salmonella_nodes[salmonella]["id"] = db_api.insert_node( salmonella_node) with open(args.source_file) as source_file: # skipping the header source_file.readline() for line in source_file: salm_node_acc, human_node_acc = line.strip().split(';') dictionary_cursor.execute( """ SELECT DISTINCT uniprot.accession FROM uniprot JOIN foreign_ids on uniprot.id == foreign_ids.uniprot_id WHERE foreign_ids.accession = ? AND uniprot.is_swissprot = 1
linearr = line.split("\t") # deconstructing the array generated from the line node_id, name, alt_accession, tax_id, pathways, aliases, topology = linearr node_dict = { 'name' : name, 'alt_accession' : alt_accession, 'tax_id' : tax_id, 'pathways' : pathways, 'aliases' : aliases, 'topology' : topology } node_dict['id'] = db_api.insert_node(node_dict) nodes[name] = node_dict print("Parsing nodes done.") # parsing edges with open(args.edge_source_file) as edge_file: # informing the user print "Parsing edges" sum_edges = sum([1 for line in edge_file]) progress = 1 edge_file.seek(0)
source_name = linearr[1] if source_name not in nodes: source = { 'name' : "uniprot:"+source_name, 'alt_accession' : "gene symbol:"+linearr[0], 'tax_id' : "taxid:"+linearr[2], 'pathways' : linearr[7], 'aliases' : "", 'topology' : linearr[4] } nodes[source_name] = source source_id = db_api.insert_node(source) nodes[source_name]["id"] = source_id target_name = linearr[9] if target_name not in nodes: target = { 'name' : "uniprot:"+target_name, 'alt_accession' : "gene symbol:"+linearr[8], 'tax_id' : "taxid:"+linearr[10], 'pathways' : linearr[15], 'aliases' : "", 'topology' : linearr[12]
salmonella_uniprot, salmonella_gene_symbol, human_uniprot, human_gene_symbol = linearr if salmonella_uniprot not in nodes: salmonella_node = { "name": "uniprot:" + salmonella_uniprot, "alt_accession": "gene symbol:" + salmonella_gene_symbol, "tax_id": "taxid:99284", "pathways": "", "aliases": "", "topology": "", } nodes[salmonella_uniprot] = salmonella_node nodes[salmonella_uniprot]["id"] = db_api.insert_node(salmonella_node) if human_uniprot not in nodes: human_node = { "name": "uniprot:" + human_uniprot, "alt_accession": "gene symbol:" + human_gene_symbol, "tax_id": "taxid:9606", "pathways": "", "aliases": "", "topology": "", } nodes[human_uniprot] = human_node nodes[human_uniprot]["id"] = db_api.insert_node(human_node)
def main(): for db in args.source_files: # informing the user print("Parsing %s" % db) cursor = sqlite3.connect(db).cursor() mapped_nodes = {} nodemap = {} cursor.execute("SELECT * FROM node") result = cursor.fetchall() length = len(result) current = 1 new_db = PsimiSQL() cursor.execute("SELECT count(*) FROM node") num_of_nodes = cursor.fetchone()[0] # mapping nodes print("Mapping nodes") for line in result: # informing user if (current % 50 == 0): print("Mapping nodes %d/%d" % (current, length)) current += 1 row_id, name, alt_accession, tax_id, pathways, aliases, topology = line old_uniprot = name new_uniprot = "uniprot:"+get_primary(old_uniprot.split(':')[1]) # storing the new uniprot id for every old id nodemap[old_uniprot] = new_uniprot mapped_node = { 'name': new_uniprot, 'alt_accession': alt_accession, 'tax_id': tax_id, 'pathways': pathways, 'aliases': aliases, 'topology': topology } mapped_node['id'] = new_db.insert_node(mapped_node) mapped_nodes[new_uniprot] = mapped_node if len(nodemap) != num_of_nodes: print "Gebasz" # mapping edges cursor.execute("SELECT * FROM edge") result = cursor.fetchall() print("Mapping edges") length = len(result) current = 1 shit_counter = 0 for row in result: if (current % 10 == 0): print("Parsing edge %d/%d" % (current, length)) current += 1 old_source_uniprot = row[3] old_target_uniprot = row[4] edge_dict = { 'interaction_detection_method': row[5], 'first_author': row[6], 'publication_ids': row[7], 'interaction_types': row[8], 'source_db': row[9], 'interaction_identifiers': row[10], 'confidence_scores': row[11], 'layer': "0" } if (old_source_uniprot not in mapped_nodes or old_target_uniprot not in mapped_nodes): shit_counter +=1 else: new_db.insert_edge(mapped_nodes[old_source_uniprot], mapped_nodes[old_target_uniprot], edge_dict) # saving the mapped db and informing user db_name = os.path.split(db)[1] print("Saving db to %s " % (args.outdir+"/mapped"+db_name)) print("SHITCOUNTER %d" % shit_counter ) new_db.save_db_to_file(args.outdir+"/mapped"+db_name)