Пример #1
0
def write_genepred_to_fasta_directionless(gpd_filename,ref_fasta,out_fasta):
  ofile = open(out_fasta,'w')
  ref = sequence_basics.read_fasta_into_hash(ref_fasta)
  with open(gpd_filename) as f:
    for line in f:
      if re.match('^#',line): continue
      d = genepred_line_to_dictionary(line)
      if d['chrom'] in ref:
        seq = ''
        for i in range(0,d['exonCount']):
          seq = seq+ref[d['chrom']][d['exonStarts'][i]:d['exonEnds'][i]]
        ofile.write(">"+str(d['name'])+"\n"+seq+"\n")
  ofile.close()
def get_splice_bases(junctions, genome_filename):
    genome = sequence_basics.read_fasta_into_hash(genome_filename)
    bases = {}
    for id in junctions:
        chr1 = junctions[id]["chr1"]
        coo1 = int(junctions[id]["coo1"])
        dir1 = junctions[id]["dir1"]
        chr2 = junctions[id]["chr2"]
        coo2 = int(junctions[id]["coo2"])
        dir2 = junctions[id]["dir2"]
        bases1 = "??"
        if dir1 == "+" and len(genome[chr1]) > coo1 + 1:
            bases1 = genome[chr1][coo1] + genome[chr1][coo1 + 1]
        elif dir1 == "-" and len(genome[chr1]) > coo1 - 2:
            bases1 = sequence_basics.rc(genome[chr1][coo1 - 3] + genome[chr1][coo1 - 2])
        bases2 = "??"
        if dir2 == "+" and len(genome[chr2]) > coo2 - 2:
            bases2 = genome[chr2][coo2 - 3] + genome[chr2][coo2 - 2]
        elif dir2 == "-" and len(genome[chr2]) > coo2 + 1:
            bases2 = sequence_basics.rc(genome[chr2][coo2] + genome[chr2][coo2 + 1])
        bases[id] = bases1.upper() + " " + bases2.upper()
    return bases
Пример #3
0
def get_splice_bases(junctions,genome_filename):
  genome = sequence_basics.read_fasta_into_hash(genome_filename)
  bases = {}
  for id in junctions:
    chr1 = junctions[id]['chr1']
    coo1 = int(junctions[id]['coo1'])
    dir1 = junctions[id]['dir1']
    chr2 = junctions[id]['chr2']
    coo2 = int(junctions[id]['coo2'])
    dir2 = junctions[id]['dir2']
    bases1 = '??'
    if dir1 == '+' and len(genome[chr1]) > coo1+1:
      bases1 = genome[chr1][coo1] + genome[chr1][coo1+1]
    elif dir1 == '-' and len(genome[chr1]) > coo1-2:
      bases1 = sequence_basics.rc(genome[chr1][coo1-3] + genome[chr1][coo1-2])
    bases2 = '??'
    if dir2 == '+' and len(genome[chr2]) > coo2-2:
      bases2 = genome[chr2][coo2-3] + genome[chr2][coo2-2]
    elif dir2 == '-' and len(genome[chr2]) > coo2+1:
      bases2 = sequence_basics.rc(genome[chr2][coo2] + genome[chr2][coo2+1])
    bases[id] = bases1.upper() + " " + bases2.upper()
  return bases