Exemplo n.º 1
0
    def __init__(self, biosettings, id=15):
        bioloader.BioLoader.__init__(self, biosettings, id)

        self.biosettings.PurgeGroupData(id)

        biosettings.LoadAliases()
        self.kb = bioloader.KnowledgeBase(id, "PharmGKB",
                                          "Pharmacogenomics Knowledge Base")
        self.pathwayGroups = bioloader.Pathway(self.kb.groupTypeID, id + 1,
                                               "Pathways", "PharmGKB Pathways")
        self.kb.AddPathway(self.pathwayGroups, "PharmGKB Pathway")
        self.pathways = []
        #self.diseaseGroups		= bioloader.KnowledgeBase(id+1, "PharmGKB-Diseases", "Pharmacogenomics Knowledge Base (Disease)")
        self.diseaseGroups = bioloader.Pathway(
            self.kb.groupTypeID, id + 2, "Diseases",
            "PharmGKB Disease Associations")
        self.kb.AddPathway(self.diseaseGroups, "PharmGKB Disease")
        #self.drugGroups			= bioloader.KnowledgeBase(id+2, "PharmGKB-Drugs", "Pharmacogenomics Knowledge Base (Drug)")
        self.drugGroups = bioloader.Pathway(self.kb.groupTypeID, id + 3,
                                            "Drugs",
                                            "PharmGKB Drug Associations")
        self.kb.AddPathway(self.drugGroups, "PharmGKB Drug")
        self.gkbentities = dict()
        self.gkbentities["Gene"] = dict()
        self.gkbentities["Drug"] = dict()
        self.gkbentities["Disease"] = dict()
Exemplo n.º 2
0
    def CommitToDB(self, kb, parentGroup):
        if kb.groupTypeID not in self.alreadyCommitted:
            self.alreadyCommitted.add(kb.groupTypeID)
            curPathway = bioloader.Pathway(kb.groupTypeID, self.GetID(),
                                           self.acc,
                                           "%s (%s)" % (self.name, self.type))
            parentGroup.AddAssociation(curPathway.groupID, "")
            #kb.AddPathway(curPathway)
            #curPathway.AssociatePathways(group_id, self.id, self.type)

            if "Drug" in self.associations:
                for item in self.associations["Drug"]:
                    #print item
                    #print >> sys.stderr, kb.groupTypeID, "-> ", item.name
                    curPathway.AddAssociation(item.GetID(), "Drug")
                    item.CommitToDB(kb, curPathway)

            if "Disease" in self.associations:
                for item in self.associations["Disease"]:
                    #print >> sys.stderr, kb.groupTypeID, "-> ", item.name
                    curPathway.AddAssociation(item.GetID(), "Disease")
                    item.CommitToDB(kb, curPathway)

            if "Gene" in self.associations:
                #print "Saving genes to the database, %s in total" % (len(self.associations["Gene"]))
                for item in self.associations["Gene"]:
                    #print >> sys.stderr, curPathway.groupID, " + ", item.name
                    item.AddToPathway(curPathway)

            curPathway.Commit(self.biosettings)
Exemplo n.º 3
0
    def LoadPathways(self, filename):
        curPathway = None
        for line in open(filename):
            words = line.split(":")
            if len(words) > 1:
                newID = self.biosettings.NextID()
                if curPathway:
                    self.pathways.append(curPathway)
                curPathway = bioloader.Pathway(self.groupID, newID,
                                               words[0].strip(),
                                               words[1].strip())
                self.pathwayGroups.AddAssociation(curPathway.groupID,
                                                  "Pathway")
                #self.kb.AssociatePathways(self.groupID, newID, "Member")
            else:
                #pass
                words = line.split()
                if len(words) > 1 and words[0] == "Gene":
                    geneIDs = self.biosettings.regions.AliasToGeneID(
                        [words[2].strip()])
                    if len(geneIDs) == 1:
                        curPathway.AddGene(geneIDs[0])

        if curPathway:
            self.pathways.append(curPathway)
        os.system("mv %s archives" % (filename))
Exemplo n.º 4
0
    def LoadAssociation(self, line):
        cols = line.split("\t")
        success = False
        if len(cols[0].split(":")) > 1:
            idA = cols[0].split(":")  #type:id
            idB = cols[1].split(":")
            geneA = None
            geneB = None
            acceptableTypes = ["uniprotkb"]

            taxA = cols[9].split(":")[-1].strip()
            taxB = cols[10].split(":")[-1].strip()
            idLookup = BuildLookup(cols[13])
            if taxA == taxB and taxA == "9606(H**o sapiens)":
                #we can also attempt to id a gene using columns 4 & 5
                if idA[0] in acceptableTypes:
                    ids = self.biosettings.regions.AliasToGeneID([idA[1]])
                    if len(ids) == 1:
                        geneA = list(ids)[0]
                    elif len(ids) == 0:
                        self.missingUni = idA[1]
                    else:
                        self.ambiguousUni = idA[1]
                if idB[0] in acceptableTypes:
                    ids = self.biosettings.regions.AliasToGeneID([idB[1]])
                    if len(ids) == 1:
                        geneB = list(ids)[0]
                    elif len(ids) == 0:
                        self.missingUni = idB[1]
                    else:
                        self.ambiguousUni = idB[1]
                if geneA != None and geneB != None and geneA != geneB:
                    groupID = self.biosettings.NextID()
                    if idLookup["mint"] in self.observedIDs:
                        print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
                        print thisdoesnt.exit
                    self.observedIDs.add(idLookup["mint"])
                    pathway = bioloader.Pathway(self.groupID, groupID,
                                                idLookup["mint"], "")
                    pathway.AddGene(geneA)
                    pathway.AddGene(geneB)
                    pathway.Commit(self.biosettings)
                    self.biosettings.RelatePathways(self.groupID, groupID,
                                                    "mint", "")
                    success = True
            #else:
            #	print "Unknown tax ID: ", taxA, taxB
        return success
Exemplo n.º 5
0
    def Commit(self, groupTypeID, biosettings):
        #First, we'll add ourselves to the groups table
        pathway = bioloader.Pathway(groupTypeID, self.groupID, self.term_id,
                                    self.description)
        #Next, add our associations to the association table
        for association in self.associations:
            pathway.AddGene(association[0])

        #finally, we can add the various groups. If we have no parents, we can set ourselves as our own parent (since we don't have any root)

        parentCount = len(self.parents)
        for parent in self.parents:
            biosettings.RelatePathways(parent, self.groupID,
                                       self.parents[parent][0],
                                       self.parents[parent][1])
        if parentCount == 0:
            biosettings.RelatePathways(groupTypeID, self.groupID, "", "")
        pathway.Commit(biosettings)
Exemplo n.º 6
0
	def LoadAssociation(self, line):
		cols						= line.split("\t")
		words						= cols[0].split("|")
		dipA						= words[0].strip()
		uniA						= words[len(words)-1][10:]
		
		words						= cols[1].split("|")
		dipB						= words[0].strip()
		uniB						= words[len(words)-1][10:]
		taxA						= cols[9][6:]
		taxB						= cols[10][6:]
		comment						= cols[6]
		name						= cols[13]
		success						= False
		if taxA == taxB:
			idsA					= self.biosettings.regions.AliasToGeneID([uniA])
			idsB					= self.biosettings.regions.AliasToGeneID([uniB])
			if len(idsA) == 1 and len(idsB) == 1:
				geneA				= list(idsA)[0]
				geneB				= list(idsB)[0]
				
				if geneA != geneB:
					groupID			= self.biosettings.NextID()
					pathway			= bioloader.Pathway(self.groupID, groupID, name, comment)
					pathway.AddGene(geneA)
					pathway.AddGene(geneB)
					pathway.Commit(self.biosettings)
					self.biosettings.RelatePathways(self.groupID, groupID, "PFAM", "")
					success			= True
			else:
				if len(idsA) == 0:
					self.missingUni.add(uniA)
				else:
					self.ambiguousUni.add(uniA)
				if len(idsB) == 0:
					self.missingUni.add(uniB)
				else:
					self.ambiguousUni.add(uniB)
		else:
			print >> sys.stdout, "Misaligned Taxonomy (%s), %s %s  : %s" % (name, taxA, taxB, comment)
		return success
Exemplo n.º 7
0
    def LoadFile(self, groupID, filename):
        #print filename
        elements = filename.split("_")
        name = "_".join(elements[:2])

        elements = name.split("/")
        if len(elements) > 1:
            name = elements[1]

        #print "Pathway Name:	    ", name
        pathway = bioloader.Pathway(self.groupID, groupID, name, filename)

        lineCount = 0
        geneCount = 0
        genesMissed = 0

        for line in open(filename):
            words = line.strip().split()
            if lineCount > 0:
                geneID = 0
                geneName = words[2].strip()
                entrezID = words[3].strip()
                geneIDs = self.biosettings.regions.AliasToGeneID(
                    [entrezID, geneName])
                if len(geneIDs) > 0:
                    if len(geneIDs) == 1:
                        pathway.AddGene(geneIDs.pop())
                        geneCount += 1
                    else:
                        pass
                        #print "-----(%s,%s)\tUnable to disambiguate gene ID (%s distinct ids) " % (geneName, entrezID, len(geneIDs))
                else:
                    pass
                    #print "-----(%s,%s)\tUnable to recognize gene" % (entrezID, geneName)
                    genesMissed += 1
            lineCount += 1

        #print "Lines in File: %s\nGenes Identified: %s\nGenes Unable to be Identified: %s" % (lineCount, geneCount, genesMissed)
        pathway.Commit(self.biosettings)
        self.biosettings.RelatePathways(self.groupID, groupID, "", "")
Exemplo n.º 8
0
    def LoadPathway(self, groupID, remotePathway):
        definition = remotePathway['definition'][0]
        entryID = remotePathway['entry_id'][0]

        #print remotePathway

        genes = set()  # geneIDs identified with pathway
        failedInsertions = 0
        geneCount = 0
        #print "\n\nPathway(%s): %s - %s" % (groupID, entryID, definition)

        #because this could timeout, let's make sure it doesn't just faile
        doContinue = False

        while not doContinue:
            #try
            remoteGeneList = self.srv.get_genes_by_pathway(pathway_id=entryID)

            doContinue = True
            for geneHSA in remoteGeneList:
                gene = geneHSA[4:]
                geneIDs = self.biosettings.regions.AliasToGeneID([gene])
                if len(geneIDs) == 0:
                    #print "-----(%s, %s)\tUnable to recognize gene" % (gene, geneHSA)
                    failedInsertions += 1
                elif len(geneIDs) != 1:
                    failedInsertions += 1
                    #print "-----(%s,%s)\tUnable to disambiguate gene ID (%s distinct ids) " % (gene, geneHSA, len(geneIDs))
                else:
                    genes.add(geneIDs[0])

        if len(genes) > 0:
            pathway = bioloader.Pathway(self.groupID, groupID, entryID[5:],
                                        definition)
            for geneID in genes:
                pathway.AddGene(geneID)
                geneCount += 1
            pathway.Commit(self.biosettings)
            self.biosettings.RelatePathways(self.groupID, groupID, "", "")
Exemplo n.º 9
0
    def LoadAssociation(self, line):
        #header line
        if line[0] == "#":
            return False
        cols = line.split("\t")
        success = False
        geneA = int(cols[1])
        geneB = int(cols[2])
        name = "biogrid:%s" % (cols[0])
        cantContinue = False
        taxA = cols[15].strip()
        taxB = cols[16].strip()

        if taxA == taxB and taxA == "9606":
            if geneA not in self.biosettings.regions.genes:
                self.missing.add(geneA)
                cantContinue = True
            if geneB not in self.biosettings.regions.genes:
                self.missing.add(geneB)
                cantContinue = True

            if not cantContinue:
                groupID = self.biosettings.NextID()
                pathway = bioloader.Pathway(self.groupID, groupID, name, "")
                pathway.AddGene(geneA)
                pathway.AddGene(geneB)
                pathway.Commit(self.biosettings)
                self.biosettings.RelatePathways(self.groupID, groupID,
                                                cols[12].strip(), "")
                success = True
            #else:
            #	print "Unknown tax ID: ", taxA, taxB
        else:
            pass
            #print "Unable to find one or more genes (%s, %s) in %s total genes" % (geneA, geneB, len(self.biosettings.regions.genes))
        return success