def check_division(data_folder, adaID, fragment, seq_run, qual_min=35, reference='HXB2', maxreads=-1, VERBOSE=0, minor_allele=False): '''Check division into fragments: coverage, etc.''' ref_fn = get_reference_premap_filename(data_folder, adaID, fragment) # FIXME: old nomenclature for F3a if not os.path.isfile(ref_fn): if fragment[:2] == 'F3': ref_fn = ref_fn.replace('F3a', 'F3') refseq = SeqIO.read(ref_fn, 'fasta') # Scan reads input_filename = get_divided_filename(data_folder, adaID, fragment, type='bam') # FIXME: old nomenclature for F3a if not os.path.isfile(input_filename): if fragment[:2] == 'F3': input_filename = input_filename.replace('F3a', 'F3') counts, inserts = get_allele_counts_insertions_from_file(input_filename, len(refseq), maxreads=maxreads, VERBOSE=VERBOSE) # Plot results title=', '.join(map(lambda x: ' '.join([x[0], str(x[1])]), [['run', seq_run], ['adaID', adaID], ['fragment', fragment], ['maxreads', maxreads], ])) plot_coverage(counts, suptitle=title, minor_allele=minor_allele)
def get_allele_counts(data_folder, adaID, fragment, VERBOSE=0, maxreads=1e10): '''Extract allele and insert counts from a bamfile''' # Read reference reffilename = get_consensus_filename(data_folder, adaID, fragment, trim_primers=True) refseq = SeqIO.read(reffilename, 'fasta') # Open BAM file # Note: the reads should already be filtered of unmapped stuff at this point bamfilename = get_mapped_filename(data_folder, adaID, fragment, type='bam', filtered=True) if not os.path.isfile(bamfilename): convert_sam_to_bam(bamfilename) # Call lower-level function return get_allele_counts_insertions_from_file(bamfilename, len(refseq), qual_min=qual_min, maxreads=maxreads, VERBOSE=VERBOSE)
def get_allele_counts(data_folder, adaID, fragment, VERBOSE=0, maxreads=1e10): """Extract allele and insert counts from a bamfile""" # Read reference reffilename = get_consensus_filename(data_folder, adaID, fragment, trim_primers=True) refseq = SeqIO.read(reffilename, "fasta") # Open BAM file # Note: the reads should already be filtered of unmapped stuff at this point bamfilename = get_mapped_filename(data_folder, adaID, fragment, type="bam", filtered=True) if not os.path.isfile(bamfilename): convert_sam_to_bam(bamfilename) # Call lower-level function return get_allele_counts_insertions_from_file( bamfilename, len(refseq), qual_min=qual_min, maxreads=maxreads, VERBOSE=VERBOSE )
def check_division(data_folder, adaID, fragment, seq_run, qual_min=35, reference='HXB2', maxreads=-1, VERBOSE=0, minor_allele=False): '''Check division into fragments: coverage, etc.''' ref_fn = get_reference_premap_filename(data_folder, adaID, fragment) # FIXME: old nomenclature for F3a if not os.path.isfile(ref_fn): if fragment[:2] == 'F3': ref_fn = ref_fn.replace('F3a', 'F3') refseq = SeqIO.read(ref_fn, 'fasta') # Scan reads input_filename = get_divided_filename(data_folder, adaID, fragment, type='bam') # FIXME: old nomenclature for F3a if not os.path.isfile(input_filename): if fragment[:2] == 'F3': input_filename = input_filename.replace('F3a', 'F3') counts, inserts = get_allele_counts_insertions_from_file(input_filename, len(refseq), maxreads=maxreads, VERBOSE=VERBOSE) # Plot results title = ', '.join( map(lambda x: ' '.join([x[0], str(x[1])]), [ ['run', seq_run], ['adaID', adaID], ['fragment', fragment], ['maxreads', maxreads], ])) plot_coverage(counts, suptitle=title, minor_allele=minor_allele)