def make_consensus(bam_file, ref_file, freebayes_vcf): ''':retrurn str''' refs = list(SeqIO.parse(ref_file, 'fasta')) df = bioframes.load_vcf(freebayes_vcf) ids, raw_segment_dfs = zip(*df.groupby('CHROM')) segment_dfs = map(fix_fb_df, raw_segment_dfs) refs = sorted(refs, key=lambda x: x.id) assert list(pluck_attr('id', refs)) == sorted(ids) ref_seqs = (str(s.seq) for s in refs) #pluck_attr('seq', refs) segment_dfs = sorted(segment_dfs, key=lambda x: x.CHROM.iloc[0]) def process(original_ref, df): new_ref, off = reduce(swap_base, zip(df.POS, df.REF, df.ALT), (original_ref, 0)) #currently only handle inserts, > 0 offs, off_pos = df[df.OFF > 0].OFF, df[df.OFF > 0].POS pileup_positions = zero_coverage_positions(bam_file, ref_file) return gap_fill_ref(original_ref, new_ref, pileup_positions, offs, off_pos, off) #return map(process, ref_seqs, segment_dfs) return zip(sorted(ids), map(process, ref_seqs, segment_dfs))
def _write_all_lines(self, cr, uid, ids, vals_to_write, context=None): stmt_line_obj = self.pool['bank.acc.rec.statement.line'] for stmt in self.browse(cr, uid, ids, context=context): stmt_line_ids = map( lambda x: x.id, stmt.credit_move_line_ids + stmt.debit_move_line_ids) stmt_line_obj.write(cr, uid, stmt_line_ids, vals_to_write, context=context) return True
def onchange_account_id(self, cr, uid, ids, account_id, ending_date, suppress_ending_date_filter, keep_previous_uncleared_entries, context=None): aml_obj = self.pool['account.move.line'] stmt_line_obj = self.pool['bank.acc.rec.statement.line'] val = { 'value': { 'credit_move_line_ids': [], 'debit_move_line_ids': [] } } if not account_id: return val for statement in self.browse(cr, uid, ids, context=context): cr.execute( ''' UPDATE account_move_line SET draft_assigned_to_statement=False, cleared_bank_account=False, bank_acc_rec_statement_id=NULL WHERE bank_acc_rec_statement_id=%s''', (statement.id, )) cr.execute( ''' DELETE FROM bank_acc_rec_statement_line WHERE statement_id=%s''', (statement.id, )) # Apply filter on move lines to allow # 1. credit and debit side journal items in posted # state of the selected GL account # 2. Journal items which are not assigned to # previous bank statements # 3. Date less than or equal to ending date provided the # 'Suppress Ending Date Filter' is not checked # get previous uncleared entries domain = [('account_id', '=', account_id), ('move_id.state', '=', 'posted'), ('cleared_bank_account', '=', False)] if not keep_previous_uncleared_entries: domain += [('draft_assigned_to_statement', '=', False)] if not suppress_ending_date_filter: domain += [('date', '<=', ending_date)] if keep_previous_uncleared_entries: keep = curry(self.is_stmt_done, cr, uid, context=context) else: keep = lambda x: True aml_search = curry(aml_obj.search, cr, uid, context=context) aml_browse = curry(aml_obj.browse, cr, uid, context=context) is_credit = (lambda l: '%s_move_line_ids' % (l['type'] == 'cr' and 'credit' or 'debit')) val['value'].update( pipe(domain, aml_search, aml_browse, filter(keep), map(self._get_move_line_write), groupby(is_credit))) val['value'].update({ 'starting_balance': self._get_starting_balance(cr, uid, ids and ids[0] or 0, account_id, context=context) }) return val
def zero_coverage_positions(bam_file, ref_file): pileup = sh.samtools('mpileup', bam_file, f=ref_file, _iter=True) return map(compose(int, second, unicode.split), pileup)
def make_dict(classes): return dict(zip(map(call('__name__'), classes, classes)))
# and the VCFs have multiple references. need to group by reference def fix_fb_df(df): #Freebayes only ever reports one ALT? df.ALT = df.ALT.apply(lambda x: x[0]) # the vcf library reports alts as _Substitution/whatever objects. extract the string. df.REF, df.ALT = df.REF.apply(str), df.ALT.apply(str) '''#TODO: this re-definition of ambiguous bases translates mult-base sections (e.g. AC) into single base alts or something''' ambiguous = ((df.AO / df.DP.apply(float)) < 0.8) #have to use .loc for assignment or else get shallow copy warning #NOTE: temporarily removed the amiguous base business. #df.loc[ambiguous, 'ALT'] = list(map(get_degen, zip(df.loc[ambiguous].REF, df.loc[ambiguous].ALT))) df['OFF'] = df.ALT.apply(len) - df.REF.apply(len) return df pluck_attr = lambda a, A: map(attrgetter(a), A) #string_to_fasta = '>FreebayesConseunsus\n'.__add__ string_to_fasta = '>Freebayes {0}\n{1}'.format def make_consensus(bam_file, ref_file, freebayes_vcf): ''':retrurn str''' refs = list(SeqIO.parse(ref_file, 'fasta')) df = bioframes.load_vcf(freebayes_vcf) ids, raw_segment_dfs = zip(*df.groupby('CHROM')) segment_dfs = map(fix_fb_df, raw_segment_dfs) refs = sorted(refs, key=lambda x: x.id) assert list(pluck_attr('id', refs)) == sorted(ids) ref_seqs = (str(s.seq) for s in refs) #pluck_attr('seq', refs) segment_dfs = sorted(segment_dfs, key=lambda x: x.CHROM.iloc[0]) def process(original_ref, df): new_ref, off = reduce(swap_base, zip(df.POS, df.REF, df.ALT), (original_ref, 0))