Python OrthologyGroup示例，OrthologyGroup Python示例

示例#1

0

显示文件

文件： DomainLevelProtein.py 项目： expectopatronum/orth-scripts

    def initDomainLevelProteins(domainfile):
        handle = open(domainfile, 'r')
        proteinsA = {}
        proteinsB = {}
        orthologGroups = {}
        groupsStarted = False
        ort = None
        lineStarts = ['Group', 'Score', 'Boots', '_____']
        #header, protein, start, end
        for line in handle.readlines():
            if groupsStarted:
                if line[0:5] not in lineStarts:
                    hasA = not line.startswith(' ')
                    splittedLine = line.split()
                    temp = ort.getBasicProteins(splittedLine)

                    for p in temp:
                        p.__class__ = DomainLevelProtein

                    if hasA:
                        splittedHeader = Helper.retrieveDomainHeaderInformation(
                            splittedLine[0])
                        temp[0].domain = splittedHeader[1]
                        temp[0].start = int(splittedHeader[2])
                        temp[0].end = int(splittedHeader[3])
                        temp[0].header = splittedLine[0]

                        proteinsA[temp[0].header] = temp[0]
                        score = float(splittedLine[1].split('%')[0])
                        ort.inparalogsA[temp[0].header] = score

                    if not hasA or len(temp) > 1:
                        splittedHeader = Helper.retrieveDomainHeaderInformation(
                            splittedLine[-2])
                        temp[-1].domain = splittedHeader[1]
                        temp[-1].start = int(splittedHeader[2])
                        temp[-1].end = int(splittedHeader[3])
                        temp[-1].header = splittedLine[-2]

                        proteinsB[temp[-1].header] = temp[-1]
                        score = float(splittedLine[-1].split('%')[0])
                        ort.inparalogsB[temp[-1].header] = score

                elif line.startswith('Group'):
                    ort = OrthologyGroup.getBasicOrthologyGroup(
                        line, False, orthologGroups)

                elif line.startswith('Bootstrap'):
                    ort.addSeeds(line)

            else:
                if line.startswith('_'):
                    groupsStarted = True

        handle.close()
        return proteinsA, proteinsB, orthologGroups

示例#2

0

显示文件

文件： DomainLevelProtein.py 项目： expectopatronum/orth-scripts

    def initDomainLevelProteins(domainfile):
        handle = open(domainfile, 'r')
        proteinsA = {}
        proteinsB = {}
        orthologGroups = {}
        groupsStarted = False
        ort = None
        lineStarts = ['Group', 'Score', 'Boots', '_____']
        #header, protein, start, end
        for line in handle.readlines():
            if groupsStarted:
                if line[0:5] not in lineStarts:
                    hasA = not line.startswith(' ')                
                    splittedLine = line.split()
                    temp = ort.getBasicProteins(splittedLine)
                    
                    for p in temp:
                        p.__class__ = DomainLevelProtein
                    
                    if hasA:
                        splittedHeader = Helper.retrieveDomainHeaderInformation(splittedLine[0])
                        temp[0].domain = splittedHeader[1]
                        temp[0].start = int(splittedHeader[2])
                        temp[0].end = int(splittedHeader[3])
                        temp[0].header = splittedLine[0]

                        proteinsA[temp[0].header] = temp[0]
                        score = float(splittedLine[1].split('%')[0])
                        ort.inparalogsA[temp[0].header] = score
                        
                    if not hasA or len(temp) > 1:
                        splittedHeader = Helper.retrieveDomainHeaderInformation(splittedLine[-2])
                        temp[-1].domain = splittedHeader[1]
                        temp[-1].start = int(splittedHeader[2])
                        temp[-1].end = int(splittedHeader[3])
                        temp[-1].header = splittedLine[-2]
                        
                        proteinsB[temp[-1].header] = temp[-1]
                        score = float(splittedLine[-1].split('%')[0])
                        ort.inparalogsB[temp[-1].header] = score
                
                elif line.startswith('Group'):
                    ort = OrthologyGroup.getBasicOrthologyGroup(line, False, orthologGroups)
                
                elif line.startswith('Bootstrap'):
                    ort.addSeeds(line)
                  
            else:
                if line.startswith('_'):
                    groupsStarted = True
            
        handle.close()
        return proteinsA, proteinsB, orthologGroups

示例#3

0

显示文件

文件： GeneLevelProtein.py 项目： expectopatronum/orth-scripts

    def initGeneLevelProteins(filename, tsvfileA, tsvfileB, useDomains):
        proteinsA = {}
        proteinsB = {}
        orthologGroups = {}
        groupsStarted = False

        rcp = ConfigParser.RawConfigParser()
        rcp.read("orthology.cfg")
        cutoff = rcp.getint("Options", "domainlengthcutoff")

        if useDomains:
            domainsA, shortA = Helper.getDomainsFromTsv(tsvfileA, cutoff)
            domainsB, shortB = Helper.getDomainsFromTsv(tsvfileB, cutoff)
        handle = open(filename, "r")
        ort = None

        lineStarts = ["Group", "Score", "Boots", "_____"]
        for line in handle.readlines():
            if groupsStarted:
                if line[0:5] not in lineStarts:
                    hasA = not line.startswith(" ")
                    temp = []
                    splittedLine = line.split()
                    temp = ort.getBasicProteins(splittedLine)

                    if hasA:
                        temp[0].__class__ = GeneLevelProtein
                        proteinsA[temp[0].accession] = temp[0]
                        if useDomains:
                            temp[0].domains = domainsA[temp[0].accession]
                        score = float(splittedLine[1].split("%")[0])
                        ort.inparalogsA[temp[0].accession] = score

                    if not hasA or len(temp) > 1:
                        temp[-1].__class__ = GeneLevelProtein
                        proteinsB[temp[-1].accession] = temp[-1]
                        if useDomains:
                            temp[-1].domains = domainsB[temp[-1].accession]
                        score = float(splittedLine[-1].split("%")[0])
                        ort.inparalogsB[temp[-1].accession] = score

                elif line.startswith("Group"):
                    ort = OrthologyGroup.getBasicOrthologyGroup(line, True, orthologGroups)

                elif line.startswith("Bootstrap"):
                    ort.addSeeds(line)

            else:
                if line.startswith("_"):
                    groupsStarted = True

        pairsCount = 0
        for g in orthologGroups:
            pairsCount += len(orthologGroups[g].inparalogsA) * len(orthologGroups[g].inparalogsB)

        print pairsCount, "should be the amount of pairs"
        print len(orthologGroups), "ortholog groups read from the file"
        handle.close()
        if useDomains:
            return proteinsA, proteinsB, orthologGroups, shortA, shortB
        else:
            return proteinsA, proteinsB, orthologGroups