def get_consensus_with_trim( c_input ): seqs, seed_id, config = c_input min_cov, K, local_match_count_window, local_match_count_threshold, max_n_read, min_idt, edge_tolerance, trim_size = config trim_seqs = [] seed = seqs[0] for seq in seqs[1:]: aln_data = get_alignment(seq, seed, edge_tolerance) s1, e1, s2, e2, aln_size, aln_score, c_status = aln_data if c_status == "none": continue if aln_score > 1000 and e1 - s1 > 500: e1 -= trim_size s1 += trim_size trim_seqs.append( (e1-s1, seq[s1:e1]) ) trim_seqs.sort(key = lambda x:-x[0]) #use longest alignment first trim_seqs = [x[1] for x in trim_seqs] if len(trim_seqs) > max_n_read: trim_seqs = trim_seqs[:max_n_read] trim_seqs = [seed] + trim_seqs seqs_ptr = (c_char_p * len(trim_seqs))() seqs_ptr[:] = trim_seqs consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(trim_seqs), min_cov, K, local_match_count_window, local_match_count_threshold, min_idt ) consensus = string_at(consensus_data_ptr[0].sequence)[:] eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] falcon.free_consensus_data( consensus_data_ptr ) del seqs_ptr return consensus, seed_id
def get_consensus_without_trim( c_input ): seqs, seed_id, config = c_input min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config if len(seqs) > max_n_read: seqs = get_longest_reads(seqs, max_n_read, max_cov_aln, sort=True) seqs_ptr = (c_char_p * len(seqs))() seqs_ptr[:] = seqs consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(seqs), min_cov, K, min_idt ) consensus = string_at(consensus_data_ptr[0].sequence)[:] eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] falcon.free_consensus_data( consensus_data_ptr ) del seqs_ptr return consensus, seed_id
def get_consensus_without_trim( c_input ): seqs, seed_id, config = c_input min_cov, K, local_match_count_window, local_match_count_threshold, max_n_read, min_idt, edge_tolerance, trim_size = config if len(seqs) > max_n_read: seqs = seqs[:max_n_read] seqs_ptr = (c_char_p * len(seqs))() seqs_ptr[:] = seqs consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(seqs), min_cov, K, local_match_count_window, local_match_count_threshold, min_idt ) consensus = string_at(consensus_data_ptr[0].sequence)[:] eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] falcon.free_consensus_data( consensus_data_ptr ) del seqs_ptr return consensus, seed_id
def get_consensus_without_trim(c_input): seqs, seed_id, config = c_input min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config if len(seqs) > max_n_read: seqs = get_longest_reads(seqs, max_n_read, max_cov_aln, sort=True) seqs_ptr = (c_char_p * len(seqs))() seqs_ptr[:] = seqs consensus_data_ptr = falcon.generate_consensus(seqs_ptr, len(seqs), min_cov, K, min_idt) consensus = string_at(consensus_data_ptr[0].sequence)[:] eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] falcon.free_consensus_data(consensus_data_ptr) del seqs_ptr return consensus, seed_id
def get_consensus_without_trim(c_input): seqs, seed_id, config = c_input LOG.debug('Starting get_consensus_without_trim(len(seqs)=={}, seed_id={})'.format( len(seqs), seed_id)) min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config if len(seqs) > max_n_read: seqs = get_longest_reads(seqs, max_n_read, max_cov_aln, sort=True) seqs_ptr = (c_char_p * len(seqs))() seqs_ptr[:] = seqs consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(seqs), min_cov, K, min_idt) assert consensus_data_ptr consensus = string_at(consensus_data_ptr[0].sequence)[:] eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] LOG.debug(' Freeing1') falcon.free_consensus_data(consensus_data_ptr) del seqs_ptr LOG.debug(' Finishing get_consensus_without_trim(seed_id={})'.format(seed_id)) return consensus, seed_id
def get_consensus_without_trim(c_input): seqs, seed_id, config = c_input LOG.debug('Starting get_consensus_without_trim(len(seqs)=={}, seed_id={})'. format(len(seqs), seed_id)) min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config if len(seqs) > max_n_read: seqs = get_longest_reads(seqs, max_n_read, max_cov_aln, sort=True) seqs_ptr = (c_char_p * len(seqs))() seqs_ptr[:] = seqs consensus_data_ptr = falcon.generate_consensus(seqs_ptr, len(seqs), min_cov, K, min_idt) consensus = string_at(consensus_data_ptr[0].sequence)[:] eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] LOG.debug(' Freeing1') falcon.free_consensus_data(consensus_data_ptr) del seqs_ptr LOG.debug( ' Finishing get_consensus_without_trim(seed_id={})'.format(seed_id)) return consensus, seed_id
def get_consensus_with_trim(c_input): seqs, seed_id, config = c_input LOG.debug( 'Starting get_consensus_with_trim(len(seqs)=={}, seed_id={})'.format( len(seqs), seed_id)) min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config trim_seqs = [] seed = seqs[0] for seq in seqs[1:]: aln_data = get_alignment(seq, seed, edge_tolerance) s1, e1, s2, e2, aln_size, aln_score, c_status = aln_data if c_status == "none": continue if aln_score > 1000 and e1 - s1 > 500: e1 -= trim_size s1 += trim_size trim_seqs.append((e1 - s1, seq[s1:e1])) trim_seqs.sort(key=lambda x: -x[0]) # use longest alignment first trim_seqs = [x[1] for x in trim_seqs] trim_seqs = [seed] + trim_seqs if len(trim_seqs[1:]) > max_n_read: # seqs already sorted, dont' sort again trim_seqs = get_longest_reads(trim_seqs, max_n_read, max_cov_aln, sort=False) seqs_ptr = (c_char_p * len(trim_seqs))() seqs_ptr[:] = trim_seqs consensus_data_ptr = falcon.generate_consensus(seqs_ptr, len(trim_seqs), min_cov, K, min_idt) consensus = string_at(consensus_data_ptr[0].sequence)[:] eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] LOG.debug(' Freeing2') falcon.free_consensus_data(consensus_data_ptr) del seqs_ptr LOG.debug(' Finishing get_consensus_with_trim(seed_id={})'.format(seed_id)) return consensus, seed_id
def get_consensus_core(seqs, min_cov, K, min_idt, allow_external_mapping): seqs_ptr = (c_char_p * len(seqs))() seqs_ptr[:] = [bytes(val.seq, encoding='ascii') for val in seqs] all_seqs_mapped = False if allow_external_mapping: all_seqs_mapped = True for seq in seqs: if not seq.is_mapped: all_seqs_mapped = False break if not all_seqs_mapped: LOG.info('Internally mapping the sequences.') consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(seqs), min_cov, K, min_idt) else: LOG.info('Using external mapping coordinates from input.') aln_ranges_ptr = (POINTER(falcon_kit.AlnRange) * len(seqs))() for i, seq in enumerate(seqs): a = falcon_kit.AlnRange(seq.qstart, seq.qend, seq.tstart, seq.tend, (seq.qend - seq.qstart)) aln_ranges_ptr[i] = pointer(a) consensus_data_ptr = falcon.generate_consensus_from_mapping( seqs_ptr, aln_ranges_ptr, len(seqs), min_cov, K, min_idt) del aln_ranges_ptr del seqs_ptr if not consensus_data_ptr: return '' # assert consensus_data_ptr consensus = string_at(consensus_data_ptr[0].sequence)[:] #eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] LOG.debug(' Freeing') falcon.free_consensus_data(consensus_data_ptr) return consensus.decode('ascii')
def get_consensus_with_trim(c_input): seqs, seed_id, config = c_input LOG.debug('Starting get_consensus_with_trim(len(seqs)=={}, seed_id={})'.format( len(seqs), seed_id)) min_cov, K, max_n_read, min_idt, edge_tolerance, trim_size, min_cov_aln, max_cov_aln = config trim_seqs = [] seed = seqs[0] for seq in seqs[1:]: aln_data = get_alignment(seq, seed, edge_tolerance) s1, e1, s2, e2, aln_size, aln_score, c_status = aln_data if c_status == "none": continue if aln_score > 1000 and e1 - s1 > 500: e1 -= trim_size s1 += trim_size trim_seqs.append((e1 - s1, seq[s1:e1])) trim_seqs.sort(key=lambda x: -x[0]) # use longest alignment first trim_seqs = [x[1] for x in trim_seqs] trim_seqs = [seed] + trim_seqs if len(trim_seqs[1:]) > max_n_read: # seqs already sorted, dont' sort again trim_seqs = get_longest_reads( trim_seqs, max_n_read, max_cov_aln, sort=False) seqs_ptr = (c_char_p * len(trim_seqs))() seqs_ptr[:] = trim_seqs consensus_data_ptr = falcon.generate_consensus( seqs_ptr, len(trim_seqs), min_cov, K, min_idt) assert consensus_data_ptr consensus = string_at(consensus_data_ptr[0].sequence)[:] eff_cov = consensus_data_ptr[0].eff_cov[:len(consensus)] LOG.debug(' Freeing2') falcon.free_consensus_data(consensus_data_ptr) del seqs_ptr LOG.debug(' Finishing get_consensus_with_trim(seed_id={})'.format(seed_id)) return consensus, seed_id