Exemplo n.º 1
0
Arquivo: Plot.py Projeto: wkpalan/aigo
    def add(self, statistics, plotType):
        logger.info("Registering plot function %s" % statistics)
        
        if not rS.isRegistered(statistics):
            logger.handleWarning("Caution, the statistics is unknown : %s" % statistics)
            return False

        self.all.append(statistics)
        
        if plotType=="Multiple":
            newPlotFunc = self.getMultiPlotFunction(statistics)
            newPlotFunc = new.instancemethod(newPlotFunc, None, self.cls)
            setattr(self.cls, newPlotFunc.__name__, newPlotFunc)
        elif plotType=="Histo3D":
            newPlotFunc = self.getHisto3DPlotFunction(statistics)
            newPlotFunc = new.instancemethod(newPlotFunc, None, self.cls)
            setattr(self.cls, "%sHisto3D" % newPlotFunc.__name__, newPlotFunc)
        else:
            logger.handleWarning("Caution, the statistics plot type is unknown : %s" % plotType)
            pass

        self.types.setdefault(statistics, set()).add(plotType)
        self.allTypes.add(plotType)

        return True
Exemplo n.º 2
0
    def checkValidity(self):
        logger.info("Name :\t%s" % self.name)
        
        allValid=True
        for aspect in self.GPtoGO:
            valid=True
            for gp in self.GPtoGO[aspect]:
                for go in self.GPtoGO[aspect][gp]:
                    if not gp in self.GOtoGP[aspect][go]:
                        logger.handleWarning ("%s not found in GOtoGP[%s][%s]" % (gp,aspect,go))
                        valid=False

            for go in self.GOtoGP[aspect]:
                for gp in self.GOtoGP[aspect][go]:
                    if not go in self.GPtoGO[aspect][gp]:
                        logger.handleWarning ("%s not found in GPtoGO[%s][%s]" % (go,aspect,gp))
                        valid=False


            if valid:
                logger.info ("%s : is valid" % (aspect))
            
            allValid = allValid and valid
            
        return allValid
Exemplo n.º 3
0
Arquivo: IO.py Projeto: wkpalan/aigo
def extract_Affy(fileName,
                 G,
                 refSet=None,
                 GO_columns=[30, 31, 32],
                 filetype="Affy",
                 delimiter=',',
                 quoting=csv.QUOTE_ALL):
    fileName = checkForZip(fileName)
    if (not os.path.exists(fileName)):
        raise IOError(fileName + " does not exist and is required ")

    #sniff and seek dialect
    csvfile = readFile(fileName)

    hasRef = None
    if refSet:
        hasRef = dict(izip(refSet, refSet))

    GenetoGO, GOtoGene = dict(), dict()
    for aspect in G.aspect:
        GenetoGO[aspect], GOtoGene[aspect] = dict(), dict()

    #Skip comments
    row = csvfile.readline()
    while row[0] == '#':
        row = csvfile.readline()

    #Read Header
    header = row
    #rd=list(csv.reader(f))

    csv.register_dialect('format', delimiter=delimiter, quoting=quoting)
    rd = csv.reader(csvfile, dialect='format')
    for row in rd:
        #Read gene product id
        g = row[0]

        if hasRef and not hasRef.has_key(g):
            logger.handleWarning(
                "gene product %s is not in the reference set, skip it " % g)
            continue

        for aspect, i in zip(
            ['biological_process', 'cellular_component', 'molecular_function'],
                GO_columns):
            for item in row[i].split('///'):
                if not item == "---" and not len(item.strip()) == 0:
                    go = "GO:%07d" % int(item.split('//')[0].replace('/', ''))

                    go, aspect = G.get_GOAlternative(go, nameSpace=True)
                    if not aspect:
                        logger.handleWarning(
                            "term %s is not in GO graph, skip it " % go)
                        continue
                    GenetoGO[aspect].setdefault(g, set([])).add(go)
                    GOtoGene[aspect].setdefault(go, set([])).add(g)

    return GenetoGO, GOtoGene
Exemplo n.º 4
0
Arquivo: IO.py Projeto: wkpalan/aigo
def extract_GO2GP(fileName,
                  G,
                  refSet=None,
                  sep1='\t',
                  sep2=',',
                  comments='#',
                  skiprows=0):
    """
    Read a functional annotation mapping file of the form
    GO_ID sep Gene_ID,Gene_ID
    sep is \t by default
    """

    fileName = str(checkForZip(fileName))
    if (not os.path.exists(fileName)):
        raise IOError(fileName + " does not exist and is required ")

    hasRef = None
    if refSet:
        hasRef = dict(izip(refSet, refSet))

    GenetoGO, GOtoGene = dict(), dict()
    for aspect in G.aspect:
        GenetoGO[aspect], GOtoGene[aspect] = dict(), dict()

    data = loadtxt(fileName,
                   dtype="S",
                   usecols=[0, 1],
                   delimiter=sep1,
                   comments=comments,
                   skiprows=skiprows)

    for go, GP in data:

        if not go.find('GO:') == 0:
            continue

        #Get the alternative term if any and its GO aspect
        term, aspect = G.get_GOAlternative(go, nameSpace=True)

        if not aspect:
            logger.handleWarning("term %s is not in GO graph, skip it " %
                                 (term))
            continue

        for gp in GP.split(sep2):
            gp = gp.strip()

            if not hasRef is None and not hasRef.has_key(gp):
                logger.handleWarning(
                    "gene product %s is not in the reference set, skip it " %
                    gp)
                continue

            GenetoGO[aspect].setdefault(gp, set([])).add(go)
            GOtoGene[aspect].setdefault(go, set([])).add(gp)

    return GenetoGO, GOtoGene
Exemplo n.º 5
0
    def save(self, fileName):
        import shelve
        try:
            logger.info("File :\t%s" % fileName)
            shelf = shelve.open(fileName, protocol=-1)
            shelf['fileName'] = fileName
            for k, v in self.items():
                shelf[k] = v

            shelf.close()
            self.status = "Saved"

        except Exception, e:
            logger.handleWarning("Unable to save project %s: %s" %
                                 (fileName, str(e)))
Exemplo n.º 6
0
    def load(self, fileName):

        import shelve
        try:
            logger.info("File :\t%s" % fileName)
            shelf = shelve.open(fileName, protocol=-1)
            for k, v in shelf.items():
                self[k] = v

            shelf.close()
            self.status = "Loaded"

        except Exception, e:
            logger.handleWarning("Unable to load project %s: %s" %
                                 (fileName, str(e)))
Exemplo n.º 7
0
def GOSet_PWSimilarity(G, GO1, GO2, metric="GS2", **kargs):
    """
    Calculates pairwise semantic similarity scores between two given annotation sets
    """
    
    if metric=="GS2":
        sim, l=G.GS2([G.GOtoInt(GO1), G.GOtoInt(GO2)])
    elif metric=="CzekanowskiDice":
        sim = G.CzekanowskiDice(G.GOtoInt(GO1), G.GOtoInt(GO2))
        l=[sim,sim]
    elif metric=="Resnik":
        sim,l = G.Resnik(G.GOtoInt(GO1), G.GOtoInt(GO2), kargs.get('IC', dict()))
    else:
        logger.handleWarning ("Sorry, unknown semnatic similarity %s " % metric)
        sim,l=None,None
    
    return sim, l
Exemplo n.º 8
0
def GOSet_Similarity(G, GO, metric="GS2", **kargs):
    """
    Calculates pairwise semantic similarity scores between GO terms in a given annotation set
    """
    
    if len(GO)<2:
        allD=list([1.])
    else:
        if metric=="GS2":
            allD=[ G.GS2([G.GOtoInt([g1]), G.GOtoInt([g2])])[0] for i,g1 in enumerate(GO) for j,g2 in enumerate(GO) if i <j ]
        elif metric=="CzekanowskiDice":
            allD=[ G.CzekanowskiDice(G.GOtoInt([g1]), G.GOtoInt([g2])) for i,g1 in enumerate(GO) for j,g2 in enumerate(GO) if i <j ]
        elif metric=="Resnik":
            allD=[ G.Resnik(G.GOtoInt([g1]), G.GOtoInt([g2]), kargs.get('IC', dict()))[0] for i,g1 in enumerate(GO) for j,g2 in enumerate(GO) if i <j ]
        else:
            logger.handleWarning ("Sorry, unknown semnatic similarity %s " % metric)
            allD=None

    return allD 
Exemplo n.º 9
0
Arquivo: IO.py Projeto: wkpalan/aigo
def extract_SCOP(fileName, G, refSet=None):
    fileName = checkForZip(fileName)
    if (not os.path.exists(fileName)):
        raise IOError(fileName + " does not exist and is required ")

    hasRef = None
    if refSet:
        hasRef = dict(izip(refSet, refSet))

    GenetoGO, GOtoGene = dict(), dict()
    for aspect in G.aspect:
        GenetoGO[aspect], GOtoGene[aspect] = dict(), dict()

    rd = csv.reader(readFile(fileName), delimiter=";")
    header = rd.next()

    for row in rd:
        #Read gene product id
        g = row[0]

        g = row[header.index('domScop')]
        go = row[header.index('termGo')]

        if hasRef and not hasRef.has_key(g):
            logger.handleWarning(
                "gene product %s is not in the reference set, skip it " % g)
            continue

        if go.find('GO:') == 0:
            #Get the alternative term if any and its GO aspect
            go, aspect = G.get_GOAlternative(go, nameSpace=True)

            if not aspect:
                logger.handleWarning("term %s is not in GO graph, skip it " %
                                     term)
                continue

            GenetoGO[aspect].setdefault(g, set([])).add(go)
            GOtoGene[aspect].setdefault(go, set([])).add(g)

    return GenetoGO, GOtoGene
Exemplo n.º 10
0
def GO_Similarity(G, allGO, metric="GS2", **kargs):
    """
    Calculates pairwise semantic similarity scores in a list of annotation sets
    """
    
    if len(allGO)<2:
        sim=1.0
        l=[1.0]
    else:
        if metric=="GS2":
            sim,l=G.GS2( [G.GOtoInt(GO)  for GO in allGO ])
        elif metric=="CzekanowskiDice":
            allGO=[G.GOtoInt(GO)  for GO in allGO ]

            allD=dict()
            l=list()
            for i,GO1 in enumerate(allGO):
                foo=[allD.setdefault(sort([i,j])[0],dict()).setdefault(sort([i,j])[1], G.CzekanowskiDice(GO1, GO2)) for j,GO2 in enumerate(allGO) if not j==i]
                l.append(mean(foo))

            sim=mean(l)

        elif metric=="Resnik":
            allGO=[G.GOtoInt(GO)  for GO in allGO ]

            allD=dict()
            l=list()
            for i,GO1 in enumerate(allGO):
                foo=[allD.setdefault(sort([i,j])[0],dict()).setdefault(sort([i,j])[1], G.Resnik(GO1, GO2, kargs.get('IC', dict()))) for j,GO2 in enumerate(allGO) if not j==i]
                l.append(mean(foo))

            sim=mean(l)
            
        else:
            logger.handleWarning ("Sorry, unknown semnatic similarity %s " % metric)
            sim,l=None,None
    
    return sim,l
Exemplo n.º 11
0
Arquivo: IO.py Projeto: wkpalan/aigo
def extract_GAF(fileName, G, refSet=None):

    fileName = checkForZip(fileName)
    if (not os.path.exists(fileName)):
        raise IOError(fileName + " does not exist and is required ")

    hasRef = None
    if refSet:
        refRef = dict(izip(refSet, refSet))

    GenetoGO, GOtoGene = dict(), dict()
    for aspect in G.aspect:
        GenetoGO[aspect], GOtoGene[aspect] = dict(), dict()

    data, GAF_col = readGAF_2(fileName)

    for row in data:
        #g=row[GAF_col.index("DB Object Symbol")]
        g = ".".join([
            row[GAF_col.index("Taxon(|taxon)")][6:],
            row[GAF_col.index("DB Object Symbol")]
        ])

        go = row[GAF_col.index('GO ID')]

        if not row[GAF_col.index('Qualifier')].find('NOT') == -1:
            logger.handleWarning(
                "go term %s for gene product %s is qualified as NOT: ignored" %
                (go, g))
            continue

        if hasRef and not hasRef.has_key(g):
            logger.handleWarning(
                "gene product %s is not in the reference set, skip it " % g)
            continue

        if go.find('GO:') == 0:

            go, aspect = G.get_GOAlternative(go, nameSpace=True)

            if not aspect:
                logger.handleWarning("term %s is not in GO graph, skip it " %
                                     go)
                continue

            GenetoGO[aspect].setdefault(g, set([])).add(go)
            GOtoGene[aspect].setdefault(go, set([])).add(g)

    return GenetoGO, GOtoGene
Exemplo n.º 12
0
    def add(self, fileName, refType="Fasta"):

        if self.fileName == '':
            self.fileName = fileName
            self.refType = refType
        else:
            if type(self.fileName) == list:
                self.fileName.append(fileName)
                self.refType.append(refType)
            else:
                self.fileName = [self.fileName, fileName]
                self.refType = [self.refType, refType]

        fileName = checkForZip(fileName)
        if (not os.path.exists(fileName)):
            logger.handleFatal(fileName + " does not exist and is required ")

        logger.info("Organism :\t%s" % self.organism)

        logger.info("%s file :\t%s " % (refType, fileName))

        try:

            #Use fasta file to define the reference set
            if refType == "Fasta":
                from Bio import SeqIO
                allID = set([
                    rec.name.split(";")[0].split(":")[-1]
                    for rec in SeqIO.parse(readFile(fileName), "fasta")
                ])
                self.update(allID)

            #Use a simple text file to define the reference set, first column is chosen by default
            elif refType == "Text":
                allID = set([
                    r[0] for r in csv.reader(readFile(fileName), delimiter=";")
                ])
                self.update(allID)

            #Use a GO annotation file to define the reference set
            elif refType == "GAF":
                from AIGO.IO import readGAF_2
                data, GAF_col = readGAF_2(fileName)

                allID = set([
                    ".".join([
                        row[GAF_col.index("Taxon(|taxon)")][6:],
                        row[GAF_col.index("DB Object Symbol")]
                    ]) for row in data
                ])
                self.update(allID)

            #Use a Affymetrix annotation file to define the reference set
            elif refType == "AFFY":
                f = readFile(fileName)
                row = f.readline()
                while row[0] == '#':
                    row = f.readline()

                header = row
                rd = csv.reader(f)
                allID = set()
                for row in rd:
                    #Read gene product id if not control sequence
                    if ("Control sequence".upper() != row[4].upper()):
                        allID.add(row[0])

                self.update(allID)
            else:
                print "Sorry, unknown file type !!"
                self.extend([])
                raise Exception

            if len(self) == 0:
                logger.handleWarning("No gene products loaded")

        except Exception, e:
            logger.handleFatal("Unable to read file %s: %s" %
                               (fileName, str(e)))