def main(args): fo = open(args['file']) for line in fo: if line.startswith("ID\t"): continue m = MisaSSR(line) if m.type != "c" and m.type != "c*": print m.to_s() else: startpos = m.startpos separatepatterns = re.findall("\([ATGC]+\)\d+[*]{0,1}", m.pattern) for separatepattern in separatepatterns: motif = separatepattern[1:separatepattern.index(")")] if separatepattern.endswith("*"): repeats = int(separatepattern[separatepattern.index(")") + 1:-1]) else: repeats = int(separatepattern[separatepattern.index(")") + 1:]) length = len(motif) * repeats endpos = startpos + length - 1 print string.join([ m.geneid, str(m.ssrnr), "p" + str(len(motif)), separatepattern, str(length), str(startpos), str(endpos) ], "\t") startpos = endpos + 1
def get_ssrs(file): hash = {} fo = open(file) for line in fo: if line.startswith("ID\t"): continue m = MisaSSR(line) hash[m.geneid + "|" + str(m.startpos)] = m fo.close() return hash
def get_ssrs(file): hash = defaultdict(list) fo = open(file) for line in fo: if line.startswith("ID\t"): continue m = MisaSSR(line) hash[m.geneid].append(m) fo.close() return hash
def main( args ): fo = open(args['file']) for line in fo: if line.startswith("ID\t"): continue m = MisaSSR(line) if m.type != "c" and m.type != "c*": print m.to_s() else: startpos = m.startpos separatepatterns = re.findall("\([ATGC]+\)\d+[*]{0,1}",m.pattern) for separatepattern in separatepatterns: motif = separatepattern[1:separatepattern.index(")")] if separatepattern.endswith("*"): repeats = int(separatepattern[separatepattern.index(")")+1:-1]) else: repeats = int(separatepattern[separatepattern.index(")")+1:]) length = len(motif)*repeats endpos = startpos + length -1 print string.join([m.geneid, str(m.ssrnr), "p" + str(len(motif)), separatepattern, str(length), str(startpos), str(endpos)], "\t") startpos = endpos+1
def get_ssrs(file): hash = defaultdict(list) fo = open(file) for line in fo: if line.startswith("ID\t"): continue m = MisaSSR(line) hash[m.geneid].append(m) fo.close() print >> sys.stderr, "read %s microsatellites" % len(hash) return hash