continue

        if source_uniprot not in nodes:

            source = {
                "name": "uniprot:" + source_uniprot,
                "alt_accession": "",
                "tax_id": "taxid:99284",
                "pathways": "",
                "aliases": "",
                "topology": "",
            }

            nodes[source_uniprot] = source

            nodes[source_uniprot]["id"] = db_api.insert_node(source)

        if target_uniprot not in nodes:

            target = {
                "name": "uniprot:" + target_uniprot,
                "alt_accession": "",
                "tax_id": "taxid:99284",
                "pathways": "",
                "aliases": "",
                "topology": "",
            }

            nodes[target_uniprot] = target

            nodes[target_uniprot]["id"] = db_api.insert_node(target)
    # skipping header
    mapFile.readline()

    for line in mapFile:
        uniprot, salmonella = line.strip().split("\t")

        salmonella_nodes[salmonella] = salmonella_node ={
            'name': "uniprot:" + uniprot,
            'alt_accession': "gene symbol:" + salmonella,
            'tax_id': "taxid:99284",
            'pathways': "",
            'aliases': "",
            'topology': ""
        }

        salmonella_nodes[salmonella]["id"] = db_api.insert_node(salmonella_node)

with open(args.source_file) as source_file:

    # skipping the header
    source_file.readline()

    for line in source_file:
        salm_node_acc, human_node_acc = line.strip().split(';')

        dictionary_cursor.execute("""
              SELECT DISTINCT uniprot.accession
              FROM uniprot
              JOIN foreign_ids on uniprot.id == foreign_ids.uniprot_id
              WHERE foreign_ids.accession = ? AND uniprot.is_swissprot = 1
            """, (human_node_acc,))

        if source_uniprot not in nodes:

            source = {
                'name' : "uniprot:"+source_uniprot,
                'alt_accession' : "",
                'tax_id' : "taxid:99284",
                'pathways' : "",
                'aliases' : "",
                'topology' : ""
            }

            nodes[source_uniprot] = source

            nodes[source_uniprot]["id"] = db_api.insert_node(source)

        if target_uniprot not in nodes:

            target = {
                'name' : "uniprot:"+target_uniprot,
                'alt_accession' : "",
                'tax_id' : "taxid:99284",
                'pathways' : "",
                'aliases' : "",
                'topology' : ""
            }

            nodes[target_uniprot] = target

            nodes[target_uniprot]["id"] = db_api.insert_node(target)
示例#4
0
        salmonella_uniprot, salmonella_gene_symbol, human_uniprot, human_gene_symbol = linearr

        if salmonella_uniprot not in nodes:

            salmonella_node = {
                'name': "uniprot:" + salmonella_uniprot,
                'alt_accession': "gene symbol:" + salmonella_gene_symbol,
                'tax_id': "taxid:99284",
                'pathways': "",
                'aliases': "",
                'topology': ""
            }

            nodes[salmonella_uniprot] = salmonella_node

            nodes[salmonella_uniprot]["id"] = db_api.insert_node(
                salmonella_node)

        if human_uniprot not in nodes:

            human_node = {
                'name': "uniprot:" + human_uniprot,
                'alt_accession': "gene symbol:" + human_gene_symbol,
                'tax_id': "taxid:9606",
                'pathways': "",
                'aliases': "",
                'topology': ""
            }

            nodes[human_uniprot] = human_node
            nodes[human_uniprot]["id"] = db_api.insert_node(human_node)
    # skipping header
    mapFile.readline()

    for line in mapFile:
        uniprot, salmonella = line.strip().split("\t")

        salmonella_nodes[salmonella] = salmonella_node = {
            'name': "uniprot:" + uniprot,
            'alt_accession': "gene symbol:" + salmonella,
            'tax_id': "taxid:99284",
            'pathways': "",
            'aliases': "",
            'topology': ""
        }

        salmonella_nodes[salmonella]["id"] = db_api.insert_node(
            salmonella_node)

with open(args.source_file) as source_file:

    # skipping the header
    source_file.readline()

    for line in source_file:
        salm_node_acc, human_node_acc = line.strip().split(';')

        dictionary_cursor.execute(
            """
              SELECT DISTINCT uniprot.accession
              FROM uniprot
              JOIN foreign_ids on uniprot.id == foreign_ids.uniprot_id
              WHERE foreign_ids.accession = ? AND uniprot.is_swissprot = 1
示例#6
0
        linearr = line.split("\t")

        # deconstructing the array generated from the line
        node_id, name, alt_accession, tax_id, pathways, aliases, topology = linearr

        node_dict = {
            'name' : name,
            'alt_accession' : alt_accession,
            'tax_id' : tax_id,
            'pathways' : pathways,
            'aliases' : aliases,
            'topology' : topology
        }

        node_dict['id'] = db_api.insert_node(node_dict)

        nodes[name] = node_dict

    print("Parsing nodes done.")

# parsing edges

with open(args.edge_source_file) as edge_file:

    # informing the user
    print "Parsing edges"
    sum_edges = sum([1 for line in edge_file])
    progress = 1
    edge_file.seek(0)
        source_name = linearr[1]

        if source_name not in nodes:

            source = {
                'name' : "uniprot:"+source_name,
                'alt_accession' : "gene symbol:"+linearr[0],
                'tax_id' : "taxid:"+linearr[2],
                'pathways' : linearr[7],
                'aliases' : "",
                'topology' : linearr[4]
            }

            nodes[source_name] = source

            source_id = db_api.insert_node(source)

            nodes[source_name]["id"] = source_id


        target_name = linearr[9]

        if target_name not in nodes:

            target = {
                'name' : "uniprot:"+target_name,
                'alt_accession' : "gene symbol:"+linearr[8],
                'tax_id' : "taxid:"+linearr[10],
                'pathways' : linearr[15],
                'aliases' : "",
                'topology' : linearr[12]
        salmonella_uniprot, salmonella_gene_symbol, human_uniprot, human_gene_symbol = linearr

        if salmonella_uniprot not in nodes:

            salmonella_node = {
                "name": "uniprot:" + salmonella_uniprot,
                "alt_accession": "gene symbol:" + salmonella_gene_symbol,
                "tax_id": "taxid:99284",
                "pathways": "",
                "aliases": "",
                "topology": "",
            }

            nodes[salmonella_uniprot] = salmonella_node

            nodes[salmonella_uniprot]["id"] = db_api.insert_node(salmonella_node)

        if human_uniprot not in nodes:

            human_node = {
                "name": "uniprot:" + human_uniprot,
                "alt_accession": "gene symbol:" + human_gene_symbol,
                "tax_id": "taxid:9606",
                "pathways": "",
                "aliases": "",
                "topology": "",
            }

            nodes[human_uniprot] = human_node
            nodes[human_uniprot]["id"] = db_api.insert_node(human_node)
def main():

    for db in args.source_files:

        # informing the user
        print("Parsing %s" % db)

        cursor = sqlite3.connect(db).cursor()

        mapped_nodes = {}
        nodemap = {}

        cursor.execute("SELECT * FROM node")
        result = cursor.fetchall()

        length = len(result)
        current = 1

        new_db = PsimiSQL()

        cursor.execute("SELECT count(*) FROM node")
        num_of_nodes = cursor.fetchone()[0]

        # mapping nodes

        print("Mapping nodes")

        for line in result:

            # informing user
            if (current % 50 == 0):
                print("Mapping nodes %d/%d" % (current, length))

            current += 1

            row_id, name, alt_accession, tax_id, pathways, aliases, topology = line

            old_uniprot = name

            new_uniprot = "uniprot:"+get_primary(old_uniprot.split(':')[1])

            # storing the new uniprot id for every old id
            nodemap[old_uniprot] = new_uniprot

            mapped_node = {
                'name': new_uniprot,
                'alt_accession': alt_accession,
                'tax_id': tax_id,
                'pathways': pathways,
                'aliases': aliases,
                'topology': topology
            }

            mapped_node['id'] = new_db.insert_node(mapped_node)

            mapped_nodes[new_uniprot] = mapped_node

        if len(nodemap) != num_of_nodes:
            print "Gebasz"

        # mapping edges

        cursor.execute("SELECT * FROM edge")
        result = cursor.fetchall()

        print("Mapping edges")
        length = len(result)
        current = 1
        shit_counter = 0

        for row in result:

            if (current % 10 == 0):
               print("Parsing edge %d/%d" % (current, length))
            current += 1

            old_source_uniprot = row[3]
            old_target_uniprot = row[4]


            edge_dict = {
                'interaction_detection_method': row[5],
                'first_author': row[6],
                'publication_ids': row[7],
                'interaction_types': row[8],
                'source_db': row[9],
                'interaction_identifiers': row[10],
                'confidence_scores': row[11],
                'layer': "0"
            }

            if (old_source_uniprot not in mapped_nodes or old_target_uniprot not in mapped_nodes):
                shit_counter +=1
            else:
                new_db.insert_edge(mapped_nodes[old_source_uniprot], mapped_nodes[old_target_uniprot], edge_dict)

        # saving the mapped db and informing user

        db_name = os.path.split(db)[1]

        print("Saving db to %s " % (args.outdir+"/mapped"+db_name))
        print("SHITCOUNTER %d" % shit_counter )

        new_db.save_db_to_file(args.outdir+"/mapped"+db_name)