def select_pileup_range_and_truncate_output( self, pileup, startpos, endpos, pileup_select_range_expected_out, pileup_truncate_expected_out): """ select_pileup_range_and_truncate_output - Checks if the program works as expected when truncating contiguous start and end regions after first selecting a specified range. INPUT: [2D ARRAY of DICTIONARIES] [pileup] [INT] [startpos] [INT] [endpos] [2D ARRAY OF DICTIONARIES] [pileup_select_range_expected_out] [2D ARRAY OF DICTIONARIES] [pileup_truncate_expected_out] RETURN: TODO POST: TODO """ pileups = Pileup_List([Pileup(bam) for bam in pileup]) pileups.select_pileup_range(startpos, endpos) select_pileup = pileups.get_pileups_as_array() # assert that the pileup positions before startpos and after endpos # have been ignored assert select_pileup == pileup_select_range_expected_out pileups.truncate_output() truncated_pileup = pileups.get_pileups_as_array() # assert that the pileup is truncated now as expected assert truncated_pileup == pileup_truncate_expected_out
def test_remove_no_coverage(self, pileup, expected_truncated_pileup, expected_left_pos_truncated, expected_right_pos_truncated): """ test_remove_no_coverage - Checks that the after truncating all empty positions from the pileup that the output is as expected INPUT: [2D ARRAY OF DICTIONARIES] [pileup] # to be truncated [2D ARRAY OF DICTIONARIES] [expected_remove_no_coverage_pileup] [2D ARRAY OF DICTIONARIES] [expected_left_pos_truncated] # number of contiguous left positions that were truncated [2D ARRAY OF DICTIONARIES] [expected_right_pos_truncated] # number of contiguous right positions that were truncated RETURN: [None] POST: Checks that the expected outputs match the actual output """ pileups = Pileup_List([Pileup(bam) for bam in pileup]) pileups.remove_no_coverage() truncated = pileups.get_pileups_as_array() assert truncated == expected_truncated_pileup assert pileups.get_num_left_positions_truncated( ) == expected_left_pos_truncated assert pileups.get_num_right_positions_truncated( ) == expected_right_pos_truncated
def test_select_pileup_range_and_remove_no_coverage( self, pileup, startpos, endpos, expected_remove_no_coverage): """ select_pileup_range_and_truncate_output - Checks if the program works as expected when removing all no coverage regions after first selecting a specified range. INPUT: [2D ARRAY of DICTIONARIES] [pileup] [INT] [startpos] [INT] [endpos] [2D ARRAY OF DICTIONARIES] [expected_remove_no_coverage] RETURN: TODO POST: TODO """ pileups = Pileup_List([Pileup(bam) for bam in pileup]) pileups.select_pileup_range(startpos, endpos) pileups.remove_no_coverage() truncated = pileups.get_pileups_as_array() assert truncated == expected_remove_no_coverage
def test_truncate_output(self, pileup, expected_truncated_pileup, expected_left_pos_truncated, expected_right_pos_truncated): """ test_truncate_output - Checks that the expected truncated outputs matches the actual output. INPUT: [2D ARRAY OF DICTIONARIES] [pileup] # to be truncated [2D ARRAY OF DICTIONARIES] [expected_truncated_pileup] [2D ARRAY OF DICTIONARIES] [expected_left_pos_truncated] # number of contiguous left positions that were truncated [2D ARRAY OF DICTIONARIES] [expected_right_pos_truncated] # number of contiguous right positions that were truncated RETURN: [None] POST: Checks that the expected outputs match the actual output """ pileups = Pileup_List([Pileup(bam) for bam in pileup]) pileups.truncate_output() truncated = pileups.get_pileups_as_array() assert truncated == expected_truncated_pileup assert pileups.get_num_left_positions_truncated( ) == expected_left_pos_truncated assert pileups.get_num_right_positions_truncated( ) == expected_right_pos_truncated
def build_pileup_from_haplotypes(haplotypes): """ # ======================================================================== BUILD PILEUP FROM HAPLOTYPES PURPOSE ------- Creates a pileup from a list of Haplotype objects. INPUT ----- [HAPLOTYPE LIST] [haplotypes] A list of haplotypes. [BOOLEAN] [gaps] Indicate whether or not there are gaps in our haplotype sequences. RETURN ------ [PILEUP] [pileup] A pilup object. # ======================================================================== """ pileup_list = [] if haplotypes: length = len(haplotypes[0].sequence) # Initialize empty dictionaries for i in range(0, length): pileup_list.append({}) for haplotype in haplotypes: for i in range(0, length): base = haplotype.sequence[i] if pileup_list[i].get(base): pileup_list[i][base] += haplotype.count else: pileup_list[i][base] = haplotype.count pileup = Pileup(pileup_list) return pileup
def matrix(self, request): """ matrix - test fixture for test_get_similarity_matrix function and test_get_distance_matrix function INPUT: [LIST OF TUPLES] request.param[0]---[BOOL] [normalize] # normalized or not request.param[1]---[ARRAY] [pileup list] request.param[2]---[ARRAY] [pileup_files] # file names corresponding to pileups request.param[3]---[ARRAY] normalized or unnormalized similarity csv-format output request.param[4]---[ARRAY] normalized or unnormalized distance csv-format output request.param[5]---[INT or NONE] [startpos or default if NONE] request.param[6]---[INT or NONE] [endpos or default if NONE] RETURN: [DistanceMatrix] [matrix with the pileup to be used] POST: self.expected_csv_distance is now a csv representation of the expected distance that should be calculated from this matrix. self.expected_csv_similarity is now a csv representation of the expected similarity that should be calculated from this matrix. """ pileups = Pileup_List([Pileup(bam) for bam in request.param[1]]) # if startpos is int and endpos is int (aka they are not None) if type(request.param[5]) is int and type(request.param[6]) is int: pileups.select_pileup_range(request.param[5], request.param[6]) # if boolean normalize flag (request.param[0]) is true normalize if request.param[0] is True: pileups.normalize_pileups() # create matrix with pileup dist = DistanceMatrix(pileups.get_pileups_as_numerical_array(), request.param[2]) self.expected_csv_similarity = request.param[3] self.expected_csv_distance = request.param[4] return dist
def parse_pileup_from_fasta(reads_location, gaps=False): """ # ======================================================================== PARSE PILEUP FROM FASTA PURPOSE ------- Parses an aligned FASTA file and returns a Pileup file corresponding to the aligned FASTA file. INPUT ----- [(FASTA) FILE LOCATION] [reads_location] The file location of the aligned FASTA file. [BOOLEAN] [gaps] Whether or not to include gaps in the pileup. This is default by false. RETURN ------ [Pileup] A new pileup object constructed from the information in the aligned FASTA file. # ======================================================================== """ pileup = [] reads = Bio.SeqIO.parse(reads_location, "fasta") read = next(reads) for i in range(len(read)): pileup.append({}) while read: for i in range(len(read)): base = read[i] if pileup[i].get(base): pileup[i][base] += 1 else: pileup[i][base] = 1 read = next(reads, None) # Remove the gaps from the pileup. if not gaps: for position in pileup: position.pop(GAP, None) return Pileup(pileup)
def parse_pileup_from_bam(references, bam_location): """ PARSE PILEUP FROM BAM PURPOSE ------- Constructs a Pileup obect from reference objects and a BAM file. INPUT ----- [LIST (REFERENCE)] [references] A list of quasitools Reference objects. [BAM FILE LOCATION)] [bam_location] The file location of the aligned BAM file from which to build the pileup object. RETURN ------ [Pileup] A new pileup object constructed from the information in the Reference object(s) and the BAM file. """ # PySam bases: A = 0 C = 1 G = 2 T = 3 pileup = [] samfile = pysam.AlignmentFile(bam_location, "rb") for reference in references: coverage = samfile.count_coverage(contig=reference.name, start=0, stop=len(reference.seq), quality_threshold=0) for column in range(len(coverage[0])): dictionary = {} if coverage[A][column] > 0: dictionary["A"] = coverage[A][column] if coverage[C][column] > 0: dictionary["C"] = coverage[C][column] if coverage[G][column] > 0: dictionary["G"] = coverage[G][column] if coverage[T][column] > 0: dictionary["T"] = coverage[T][column] pileup.append(dictionary) return Pileup(pileup)