Exemplo n.º 1
0
def main(args):

    fo = open(args['file'])
    for line in fo:
        if line.startswith("ID\t"): continue
        m = MisaSSR(line)
        if m.type != "c" and m.type != "c*": print m.to_s()
        else:
            startpos = m.startpos
            separatepatterns = re.findall("\([ATGC]+\)\d+[*]{0,1}", m.pattern)
            for separatepattern in separatepatterns:
                motif = separatepattern[1:separatepattern.index(")")]
                if separatepattern.endswith("*"):
                    repeats = int(separatepattern[separatepattern.index(")") +
                                                  1:-1])
                else:
                    repeats = int(separatepattern[separatepattern.index(")") +
                                                  1:])
                length = len(motif) * repeats
                endpos = startpos + length - 1
                print string.join([
                    m.geneid,
                    str(m.ssrnr), "p" + str(len(motif)), separatepattern,
                    str(length),
                    str(startpos),
                    str(endpos)
                ], "\t")
                startpos = endpos + 1
Exemplo n.º 2
0
def get_ssrs(file):
    hash = {}
    fo = open(file)
    for line in fo:
        if line.startswith("ID\t"): continue
        m = MisaSSR(line)
        hash[m.geneid + "|" + str(m.startpos)] = m
    fo.close()
    return hash
Exemplo n.º 3
0
def get_ssrs(file):
  hash = defaultdict(list)
  fo = open(file)
  for line in fo:
    if line.startswith("ID\t"): continue
    m = MisaSSR(line)
    hash[m.geneid].append(m)
  fo.close()
  return hash
Exemplo n.º 4
0
def main( args ):
  
  fo = open(args['file'])
  for line in fo:
    if line.startswith("ID\t"): continue
    m = MisaSSR(line)
    if m.type != "c" and m.type != "c*": print m.to_s()
    else:
      startpos = m.startpos
      separatepatterns = re.findall("\([ATGC]+\)\d+[*]{0,1}",m.pattern)
      for separatepattern in separatepatterns:
        motif = separatepattern[1:separatepattern.index(")")]
        if separatepattern.endswith("*"): repeats = int(separatepattern[separatepattern.index(")")+1:-1])
        else: repeats = int(separatepattern[separatepattern.index(")")+1:])
        length = len(motif)*repeats
        endpos = startpos + length -1
        print string.join([m.geneid, str(m.ssrnr), "p" + str(len(motif)), separatepattern, str(length), str(startpos), str(endpos)], "\t")
        startpos = endpos+1
Exemplo n.º 5
0
def get_ssrs(file):
    hash = defaultdict(list)
    fo = open(file)
    for line in fo:
        if line.startswith("ID\t"): continue
        m = MisaSSR(line)
        hash[m.geneid].append(m)
    fo.close()
    print >> sys.stderr, "read %s microsatellites" % len(hash)
    return hash