def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) < 4: print >> sys.stderr, 'Using python %s pep prospero \ outputfile [overlap percentage]' % sys.argv[0] sys.exit(0) #---ori-------------------------------------------- pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ") seqDict = readFasta(sys.argv[1]) repDict = {} for line in open(sys.argv[2]): if line.startswith('using sequence1'): locus = line.strip().split()[-1] seq = seqDict[locus] repDict[locus] = [] elif line[0] == '>': match = pat.match(line) tmpDict = {} pos1 = int(match.group(1)) pos2 = int(match.group(2)) pos3 = int(match.group(3)) pos4 = int(match.group(4)) if len(sys.argv) == 5: if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \ float(sys.argv[4]): continue tmpDict[(pos1, pos2)] = seq[pos1 - 1:pos2] tmpDict[(pos3, pos4)] = seq[pos3 - 1:pos4] repDict[locus].append(tmpDict) #------------------------------------------------ outputRep(repDict, sys.argv[3])
def main(): print >>sys.stderr, "Using the average shannonIndex value \ of a group sequences to represent the last entropy." print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 3: print >>sys.stderr, 'Using python %s filename\ threshold(2)[threethe more the high complexity]' % sys.argv[0] sys.exit(0) #----------------------------------- #this three dict have the same structure repDict = {} lcsDict = {} #save low complexity sequences regularDict = {} #save regular sequences readRep(sys.argv[1], repDict) lcs = int(sys.argv[2]) for locus, valueL in repDict.items(): for itemD in valueL: entropy = 0 i_valueS = set(itemD.values()) #i_keys = itemD.keys() for item in i_valueS: entropy += si(item) entropy = entropy / len(i_valueS) if entropy <= lcs: saveDict(lcsDict, locus, itemD) else: saveDict(regularDict, locus, itemD) #--------End one dict--------------- #-------------end all----------------- prefile = sys.argv[1].split('/')[-1] outputRep(lcsDict, prefile+'.LCSs') outputRep(regularDict, prefile+'.HCSs')
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) < 4: print >>sys.stderr, 'Using python %s pep prospero \ outputfile [overlap percentage]' % sys.argv[0] sys.exit(0) #---ori-------------------------------------------- pat = re.compile(">.+?from (\d+) to (\d+).+?from (\d+) to (\d+) ") seqDict = readFasta(sys.argv[1]) repDict = {} for line in open(sys.argv[2]): if line.startswith('using sequence1'): locus = line.strip().split()[-1] seq = seqDict[locus] repDict[locus] = [] elif line[0] == '>': match = pat.match(line) tmpDict = {} pos1 = int(match.group(1)) pos2 = int(match.group(2)) pos3 = int(match.group(3)) pos4 = int(match.group(4)) if len(sys.argv) == 5: if (pos2-pos3+1.0)/(pos4-pos1+1.0) > \ float(sys.argv[4]): continue tmpDict[(pos1, pos2)] = seq[pos1-1:pos2] tmpDict[(pos3, pos4)] = seq[pos3-1:pos4] repDict[locus].append(tmpDict) #------------------------------------------------ outputRep(repDict, sys.argv[3])
def main(): print >>sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >>sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #------------------------------ repDict = {} readRep(sys.argv[1], repDict) outputRep(repDict, sys.argv[1]+'ctIO.test')
def main(): print >> sys.stderr, "Print the result to screen" if len(sys.argv) != 2: print >> sys.stderr, 'Using python %s filename' % sys.argv[0] sys.exit(0) #------------------------------ repDict = {} readRep(sys.argv[1], repDict) outputRep(repDict, sys.argv[1] + 'ctIO.test')