Exemplo n.º 1
0
def make_consensus(bam_file, ref_file, freebayes_vcf):
    ''':retrurn str'''
    refs = list(SeqIO.parse(ref_file, 'fasta'))
    df = bioframes.load_vcf(freebayes_vcf)
    ids, raw_segment_dfs = zip(*df.groupby('CHROM'))
    segment_dfs = map(fix_fb_df, raw_segment_dfs)
    refs = sorted(refs, key=lambda x: x.id)
    assert list(pluck_attr('id', refs)) == sorted(ids)
    ref_seqs = (str(s.seq) for s in refs) #pluck_attr('seq', refs)
    segment_dfs = sorted(segment_dfs, key=lambda x: x.CHROM.iloc[0])
    def process(original_ref, df):
         new_ref, off = reduce(swap_base, zip(df.POS, df.REF, df.ALT), (original_ref, 0))
         #currently only handle inserts, > 0
         offs, off_pos = df[df.OFF > 0].OFF, df[df.OFF > 0].POS
         pileup_positions = zero_coverage_positions(bam_file, ref_file)
         return gap_fill_ref(original_ref, new_ref, pileup_positions, offs, off_pos, off)
    #return map(process, ref_seqs, segment_dfs)
    return zip(sorted(ids), map(process, ref_seqs, segment_dfs))
Exemplo n.º 2
0
 def process(original_ref, df):
      new_ref, off = reduce(swap_base, zip(df.POS, df.REF, df.ALT), (original_ref, 0))
      #currently only handle inserts, > 0
      offs, off_pos = df[df.OFF > 0].OFF, df[df.OFF > 0].POS
      pileup_positions = zero_coverage_positions(bam_file, ref_file)
      return gap_fill_ref(original_ref, new_ref, pileup_positions, offs, off_pos, off)
Exemplo n.º 3
0
def make_dict(classes):
    return dict(zip(map(call('__name__'), classes, classes)))