Пример #1
0
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s cdsfile repfile' \
            % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------------------
    repDict = {}
    ctIO.readRep(sys.argv[2], repDict)
    locusL = repDict.keys()
    locusL.sort()
    cdsDict = ctIO.readFasta(sys.argv[1], locusL)

    for locus in locusL:
        print '>%s' % locus
        seq = cdsDict[locus]
        tmpList = repDict[locus]
        for posDict in tmpList:
            posKeys = posDict.keys()
            posKeys.sort()
            repList = []
            for posTuple in posKeys:
                start = (posTuple[0] - 1) * 3
                end = posTuple[1] * 3
                if start >= end:
                    print >> sys.stderr, locus, posTuple
                    sys.exit(1)
                #--------patch a bug---2011-08-25
                #repList.append(seq[start:end]+':'+str(start+3))
                repList.append(seq[start:end] + ':' + str(start + 1))
            #--------------------------------------------------
            print '#'.join(repList)
Пример #2
0
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) < 4:
        print >> sys.stderr, 'Using python %s pep prospero \
outputfile [overlap percentage]' % sys.argv[0]
        sys.exit(0)
    #---ori--------------------------------------------
    pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ")
    seqDict = readFasta(sys.argv[1])
    repDict = {}
    for line in open(sys.argv[2]):
        if line.startswith('using sequence1'):
            locus = line.strip().split()[-1]
            seq = seqDict[locus]
            repDict[locus] = []
        elif line[0] == '>':
            match = pat.match(line)
            tmpDict = {}
            pos1 = int(match.group(1))
            pos2 = int(match.group(2))
            pos3 = int(match.group(3))
            pos4 = int(match.group(4))
            if len(sys.argv) == 5:
                if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \
                        float(sys.argv[4]):
                    continue
            tmpDict[(pos1, pos2)] = seq[pos1 - 1:pos2]
            tmpDict[(pos3, pos4)] = seq[pos3 - 1:pos4]
            repDict[locus].append(tmpDict)
    #------------------------------------------------
    outputRep(repDict, sys.argv[3])
Пример #3
0
def main():
    print >> sys.stderr, "Print the result to three files"
    if len(sys.argv) != 3:
        print >> sys.stderr, 'Using python %s seq rep' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------
    codonList = codonSet()
    cdsDict = readFasta(sys.argv[1])
    #ct_rdict(cdsDict)
    repDict = {}
    readRep(sys.argv[2], repDict)
    #ct_rdict(repDict)
    codonRepDict, codonSeqDict = originalSta(repDict, cdsDict)
    #ct_rdict(codonRepDict)
    #print '*********************'
    #ct_rdict(codonSeqDict)
    #-------compare within protein with repeats----
    #--get codons within repeat and divide codons within other
    #seuquences, and bar graph the number of them, heatmap the
    #ratio of each codons of one protein.
    codonNumSeq = \
        totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2])
    singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList)
    #--compare proteins have no repeat and proteins have repeats but
    #delete repeats
    repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2], codonList)
Пример #4
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) < 4:
        print >>sys.stderr, 'Using python %s pep prospero \
outputfile [overlap percentage]' % sys.argv[0]
        sys.exit(0)
    #---ori--------------------------------------------
    pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ")
    seqDict = readFasta(sys.argv[1])
    repDict = {}
    for line in open(sys.argv[2]):
        if line.startswith('using sequence1'):
            locus = line.strip().split()[-1]
            seq = seqDict[locus]
            repDict[locus] = []
        elif line[0] == '>':
            match = pat.match(line)
            tmpDict = {}
            pos1 = int(match.group(1))
            pos2 = int(match.group(2))
            pos3 = int(match.group(3))
            pos4 = int(match.group(4))
            if len(sys.argv) == 5:
                if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \
                        float(sys.argv[4]):
                    continue
            tmpDict[(pos1, pos2)] = seq[pos1-1:pos2]  
            tmpDict[(pos3, pos4)] = seq[pos3-1:pos4]  
            repDict[locus].append(tmpDict)
    #------------------------------------------------
    outputRep(repDict, sys.argv[3])
Пример #5
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s cdsfile repfile' \
            % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------------------
    repDict = {}
    ctIO.readRep(sys.argv[2], repDict)
    locusL = repDict.keys()
    locusL.sort()
    cdsDict = ctIO.readFasta(sys.argv[1], locusL)

    for locus in locusL:
        print '>%s' % locus
        seq = cdsDict[locus]
        tmpList = repDict[locus]
        for posDict in tmpList:
            posKeys = posDict.keys()
            posKeys.sort()
            repList = []
            for posTuple in posKeys:
                start = (posTuple[0] - 1) * 3
                end = posTuple[1] * 3
                if start >= end:
                    print >>sys.stderr, locus, posTuple
                    sys.exit(1)
                #--------patch a bug---2011-08-25
                #repList.append(seq[start:end]+':'+str(start+3))
                repList.append(seq[start:end]+':'+str(start+1))
            #--------------------------------------------------
            print '#'.join(repList)
Пример #6
0
def main():
    print >>sys.stderr, "Print the result to three files"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s seq rep' % sys.argv[0]
        sys.exit(0)
    #-------------------------------------------
    codonList = codonSet()
    cdsDict = readFasta(sys.argv[1])
    #ct_rdict(cdsDict)
    repDict = {}
    readRep(sys.argv[2], repDict)
    #ct_rdict(repDict)
    codonRepDict, codonSeqDict = originalSta(repDict, cdsDict)
    #ct_rdict(codonRepDict)
    #print '*********************'
    #ct_rdict(codonSeqDict)
    #-------compare within protein with repeats----
    #--get codons within repeat and divide codons within other
    #seuquences, and bar graph the number of them, heatmap the 
    #ratio of each codons of one protein.
    codonNumSeq = \
        totalNumberProrep(codonRepDict, codonSeqDict, sys.argv[2])
    singlRatioProRep(codonRepDict, codonSeqDict, sys.argv[2], codonList)
    #--compare proteins have no repeat and proteins have repeats but
    #delete repeats  
    repOrNot(codonNumSeq, codonRepDict, codonSeqDict, sys.argv[2],
            codonList)
Пример #7
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >>sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #------------------------------------------------
    repDict = readFasta(sys.argv[1])
    #----------------------------------
    tmpSet = set()
    for key, value in repDict.items():
        if value not in tmpSet:
            tmpSet.add(value)
            print '>%s\n%s' % (key, value)
Пример #8
0
def main():
    print >>sys.stderr, "Print the result to files"
    print >>sys.stderr, "Split a multiple sequence fasta file to\
multiple files with one sequence each"
    if len(sys.argv) != 2:
        print >>sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #---------------
    seqDict = readFasta(sys.argv[1])
    for key, value in seqDict.items():
        fh = open(key, 'w')
        print >>fh, '>%s\n%s' % (key, value)
        fh.close()
Пример #9
0
def main():
    if len(sys.argv) != 2:
        print >> sys.stderr, "Print the result to screen"
        print >> sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #------------------------------------------------
    repDict = readFasta(sys.argv[1])
    #----------------------------------
    tmpSet = set()
    for key, value in repDict.items():
        if value not in tmpSet:
            tmpSet.add(value)
            print '>%s\n%s' % (key, value)
Пример #10
0
def main():
    print >> sys.stderr, "Print the result to files"
    print >> sys.stderr, "Split a multiple sequence fasta file to\
multiple files with one sequence each"

    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #---------------
    seqDict = readFasta(sys.argv[1])
    for key, value in seqDict.items():
        fh = open(key, 'w')
        print >> fh, '>%s\n%s' % (key, value)
        fh.close()
Пример #11
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 4:
        print >>sys.stderr, 'Using python %s filename subjS atseq' % sys.argv[0]
        sys.exit(0)
    #---------------------------------------------------
    subjSDict = readSubjS(sys.argv[2])
    atDict = readFasta(sys.argv[3])
    at = 1
    for line in open(sys.argv[1]):
        if line[0] == '=':
            group = line[1:].split()[1]
            at = 1  #label the following locus is Arabidopsis
        elif line[0] == '>':
            if at:
                locus = (line[1:].rsplit('.', 1))[0]
                seq = atDict[locus]
                at = 0
            else:
                locus = line[1:-1]
                seq = subjSDict[locus]
            #--------------------------------
        else: