示例#1
0
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) < 4:
        print >> sys.stderr, 'Using python %s pep prospero \
outputfile [overlap percentage]' % sys.argv[0]
        sys.exit(0)
    #---ori--------------------------------------------
    pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ")
    seqDict = readFasta(sys.argv[1])
    repDict = {}
    for line in open(sys.argv[2]):
        if line.startswith('using sequence1'):
            locus = line.strip().split()[-1]
            seq = seqDict[locus]
            repDict[locus] = []
        elif line[0] == '>':
            match = pat.match(line)
            tmpDict = {}
            pos1 = int(match.group(1))
            pos2 = int(match.group(2))
            pos3 = int(match.group(3))
            pos4 = int(match.group(4))
            if len(sys.argv) == 5:
                if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \
                        float(sys.argv[4]):
                    continue
            tmpDict[(pos1, pos2)] = seq[pos1 - 1:pos2]
            tmpDict[(pos3, pos4)] = seq[pos3 - 1:pos4]
            repDict[locus].append(tmpDict)
    #------------------------------------------------
    outputRep(repDict, sys.argv[3])
示例#2
0
def main():
    print >>sys.stderr, "Using the average shannonIndex value \
of a group sequences to represent the last entropy."
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 3:
        print >>sys.stderr, 'Using python %s filename\
 threshold(2)[threethe more the high complexity]' % sys.argv[0]
        sys.exit(0)
    #-----------------------------------
    #this three dict have the same structure
    repDict = {}
    lcsDict = {} #save low complexity sequences
    regularDict = {} #save regular sequences
    readRep(sys.argv[1], repDict)
    lcs = int(sys.argv[2])
    for locus, valueL in repDict.items():
        for itemD in valueL:
            entropy = 0
            i_valueS = set(itemD.values())
            #i_keys = itemD.keys()
            for item in i_valueS:
                entropy += si(item)
            entropy = entropy / len(i_valueS)
            if entropy <= lcs:
                saveDict(lcsDict, locus, itemD)
            else:
                saveDict(regularDict, locus, itemD)
        #--------End one dict---------------
    #-------------end all-----------------
    prefile = sys.argv[1].split('/')[-1]
    outputRep(lcsDict, prefile+'.LCSs')
    outputRep(regularDict, prefile+'.HCSs')
示例#3
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) < 4:
        print >>sys.stderr, 'Using python %s pep prospero \
outputfile [overlap percentage]' % sys.argv[0]
        sys.exit(0)
    #---ori--------------------------------------------
    pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ")
    seqDict = readFasta(sys.argv[1])
    repDict = {}
    for line in open(sys.argv[2]):
        if line.startswith('using sequence1'):
            locus = line.strip().split()[-1]
            seq = seqDict[locus]
            repDict[locus] = []
        elif line[0] == '>':
            match = pat.match(line)
            tmpDict = {}
            pos1 = int(match.group(1))
            pos2 = int(match.group(2))
            pos3 = int(match.group(3))
            pos4 = int(match.group(4))
            if len(sys.argv) == 5:
                if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \
                        float(sys.argv[4]):
                    continue
            tmpDict[(pos1, pos2)] = seq[pos1-1:pos2]  
            tmpDict[(pos3, pos4)] = seq[pos3-1:pos4]  
            repDict[locus].append(tmpDict)
    #------------------------------------------------
    outputRep(repDict, sys.argv[3])
示例#4
0
def main():
    print >>sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >>sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #------------------------------
    repDict = {}
    readRep(sys.argv[1], repDict)
    outputRep(repDict, sys.argv[1]+'ctIO.test')
示例#5
0
def main():
    print >> sys.stderr, "Print the result to screen"
    if len(sys.argv) != 2:
        print >> sys.stderr, 'Using python %s filename' % sys.argv[0]
        sys.exit(0)
    #------------------------------
    repDict = {}
    readRep(sys.argv[1], repDict)
    outputRep(repDict, sys.argv[1] + 'ctIO.test')