示例#1
0
    def select_pileup_range_and_truncate_output(
            self, pileup, startpos, endpos, pileup_select_range_expected_out,
            pileup_truncate_expected_out):
        """
        select_pileup_range_and_truncate_output - Checks if the program works
        as expected when truncating contiguous start and end regions after
        first selecting a specified range.

        INPUT:
            [2D ARRAY of DICTIONARIES] [pileup]
            [INT] [startpos]
            [INT] [endpos]
            [2D ARRAY OF DICTIONARIES] [pileup_select_range_expected_out]
            [2D ARRAY OF DICTIONARIES] [pileup_truncate_expected_out]
        RETURN:
            TODO
        POST:
            TODO
        """

        pileups = Pileup_List([Pileup(bam) for bam in pileup])
        pileups.select_pileup_range(startpos, endpos)
        select_pileup = pileups.get_pileups_as_array()

        # assert that the pileup positions before startpos and after endpos
        # have been ignored
        assert select_pileup == pileup_select_range_expected_out

        pileups.truncate_output()
        truncated_pileup = pileups.get_pileups_as_array()

        # assert that the pileup is truncated now as expected
        assert truncated_pileup == pileup_truncate_expected_out
示例#2
0
    def test_remove_no_coverage(self, pileup, expected_truncated_pileup,
                                expected_left_pos_truncated,
                                expected_right_pos_truncated):
        """
        test_remove_no_coverage - Checks that the after truncating all
        empty positions from the pileup that the output is as expected

        INPUT:
            [2D ARRAY OF DICTIONARIES] [pileup] # to be truncated
            [2D ARRAY OF DICTIONARIES] [expected_remove_no_coverage_pileup]
            [2D ARRAY OF DICTIONARIES] [expected_left_pos_truncated]
            # number of contiguous left positions that were truncated
            [2D ARRAY OF DICTIONARIES] [expected_right_pos_truncated]
            # number of contiguous right positions that were truncated

        RETURN:
            [None]

        POST:
            Checks that the expected outputs match the actual output
        """
        pileups = Pileup_List([Pileup(bam) for bam in pileup])
        pileups.remove_no_coverage()
        truncated = pileups.get_pileups_as_array()

        assert truncated == expected_truncated_pileup
        assert pileups.get_num_left_positions_truncated(
        ) == expected_left_pos_truncated
        assert pileups.get_num_right_positions_truncated(
        ) == expected_right_pos_truncated
示例#3
0
    def test_select_pileup_range_and_remove_no_coverage(
            self, pileup, startpos, endpos, expected_remove_no_coverage):
        """
        select_pileup_range_and_truncate_output - Checks if the program works
        as expected when removing all no coverage regions after first selecting
        a specified range.

        INPUT:
            [2D ARRAY of DICTIONARIES] [pileup]
            [INT] [startpos]
            [INT] [endpos]
            [2D ARRAY OF DICTIONARIES] [expected_remove_no_coverage]

        RETURN:
            TODO
        POST:
            TODO
        """

        pileups = Pileup_List([Pileup(bam) for bam in pileup])
        pileups.select_pileup_range(startpos, endpos)
        pileups.remove_no_coverage()
        truncated = pileups.get_pileups_as_array()

        assert truncated == expected_remove_no_coverage
示例#4
0
    def test_truncate_output(self, pileup, expected_truncated_pileup,
                             expected_left_pos_truncated,
                             expected_right_pos_truncated):
        """
        test_truncate_output - Checks that the expected truncated outputs
        matches the actual output.

        INPUT:
            [2D ARRAY OF DICTIONARIES] [pileup] # to be truncated
            [2D ARRAY OF DICTIONARIES] [expected_truncated_pileup]
            [2D ARRAY OF DICTIONARIES] [expected_left_pos_truncated]
            # number of contiguous left positions that were truncated
            [2D ARRAY OF DICTIONARIES] [expected_right_pos_truncated]
            # number of contiguous right positions that were truncated

        RETURN:
            [None]

        POST:
            Checks that the expected outputs match the actual output
        """
        pileups = Pileup_List([Pileup(bam) for bam in pileup])
        pileups.truncate_output()
        truncated = pileups.get_pileups_as_array()

        assert truncated == expected_truncated_pileup
        assert pileups.get_num_left_positions_truncated(
        ) == expected_left_pos_truncated
        assert pileups.get_num_right_positions_truncated(
        ) == expected_right_pos_truncated
示例#5
0
def parse_pileup_list_from_bam(references, file_list):
    """
    # ========================================================================

    PARSE PILEUP LIST FROM BAM


    PURPOSE
    -------

    Constructs a Pileup_List object from Reference objects and multiple BAM
    files. The Pileup_List will contain multiple Pileup objects, one
    associated with each BAM file. The Reference objects must be correspond to
    every BAM file.


    INPUT
    -----

    [LIST (REFERENCE)] [references]
        A list of quasitools Reference objects.

    [LIST (BAM FILE LOCATIONS)] [file_list]
        A list of BAM file locations, each corresponding to one alignment
        pileup. All BAM files must each correspond to the same associated
        References objects.


    RETURN
    ------

    [Pileup_List]
        A new Pileup_List object representing a collection of Pileup objects.

    # ========================================================================
    """

    pileups = []

    for bam_location in file_list:

        pileup = parse_pileup_from_bam(references, bam_location)
        pileups.append(pileup)

    return Pileup_List(pileups)
示例#6
0
    def matrix(self, request):
        """
        matrix - test fixture for test_get_similarity_matrix function
                 and test_get_distance_matrix function

        INPUT:
            [LIST OF TUPLES]
            request.param[0]---[BOOL] [normalize] # normalized or not
            request.param[1]---[ARRAY] [pileup list]
            request.param[2]---[ARRAY] [pileup_files] # file names corresponding to pileups
            request.param[3]---[ARRAY] normalized or unnormalized similarity csv-format output
            request.param[4]---[ARRAY] normalized or unnormalized distance csv-format output
            request.param[5]---[INT or NONE] [startpos or default if NONE]
            request.param[6]---[INT or NONE] [endpos or default if NONE]

        RETURN:
            [DistanceMatrix] [matrix with the pileup to be used]

        POST:
            self.expected_csv_distance is now a csv representation of the
            expected distance that should be calculated from this matrix.

            self.expected_csv_similarity is now a csv representation of the
            expected similarity that should be calculated from this matrix.
        """
        pileups = Pileup_List([Pileup(bam) for bam in request.param[1]])

        # if startpos is int and endpos is int (aka they are not None)
        if type(request.param[5]) is int and type(request.param[6]) is int:
            pileups.select_pileup_range(request.param[5], request.param[6])

        # if boolean normalize flag (request.param[0]) is true normalize
        if request.param[0] is True:
            pileups.normalize_pileups()

        # create matrix with pileup
        dist = DistanceMatrix(pileups.get_pileups_as_numerical_array(),
                              request.param[2])

        self.expected_csv_similarity = request.param[3]
        self.expected_csv_distance = request.param[4]

        return dist
示例#7
0
    def test_construct_pileup_list(self):
        """
        test_construct_pileup_list - Checks that the pileup length and the
        first few indices of the pileup are correct.

        INPUT:
            [None]

        RETURN:
            [None]

        POST:
            [None]
        """

        bamPileup = Pileup_List.construct_pileup_list(self.test_cp_files,
                                                      self.references)
        pileup_as_array = bamPileup.get_pileups_as_array()
        pileup_as_numerical_array = bamPileup.get_pileups_as_numerical_array()

        assert len(pileup_as_array) == 2
        assert len(pileup_as_array[0]) == 2844
        assert len(pileup_as_array[1]) == 2844
        assert len(pileup_as_numerical_array[0]) == (2844 * 4)
        assert len(pileup_as_numerical_array[1]) == (2844 * 4)
        assert pileup_as_array[0][0:10] == [{
            'C': 12
        }, {
            'C': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'G': 2,
            'C': 3,
            'T': 1,
            'A': 6
        }, {
            'G': 12
        }, {
            'G': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'G': 2,
            'C': 3,
            'T': 1,
            'A': 6
        }]
        assert pileup_as_array[1][0:10] == [{
            'C': 12
        }, {
            'C': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'A': 6,
            'C': 5,
            'G': 1
        }, {
            'G': 12
        }, {
            'A': 4,
            'G': 8
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'A': 7,
            'T': 1,
            'C': 3,
            'G': 1
        }]
示例#8
0
def dist(ctx, reference, bam, normalize, output_distance, startpos, endpos,
         output, no_coverage):
    """
    dist - Performs the main part of the program

    INPUT:
        [CONTEXT] [ctx]
        [FASTA FILE LOCATION] [reference]
        [BAM FILE LOCATION] [bam]
        [BOOL] [normalize/dont_normalize]
        [BOOL] [output_distance/output_similarity]
        [INT] [startpos]
        [INT] [endpos]
        [STRING] [output]
            Output the CSV-formatted matrix output in a file
            instead of in the terminal.
        [STRING] [truncate/remove_no_coverage/keep_no_coverage]
            Options to truncate low-coverage regions on the ends of the pileup,
            ignore all low coverage regions, or keep all low coverage regions

    RETURN:
        None.

    POST:
        The distance matrix is printed out unless an error message was raised.

    """

    if len(bam) < 2:
        raise click.UsageError("At least two bam file locations are required" +
                               " to perform quasispecies distance comparison")
    # indicate if the start or end position is < 0 or a priori invalid
    if type(startpos) == int and int(startpos) < 1:
        raise click.UsageError("Start position must be >= 1.")
    if type(endpos) == int and int(endpos) < 1:
        raise click.UsageError("End position must be >= 1.")
    if (type(startpos) == int and type(endpos) == int and (startpos > endpos)):
        raise click.UsageError("Start position must be <= end position")

    # Build the reference object.
    references = parse_references_from_fasta(reference)

    pileups = Pileup_List.construct_pileup_list(bam, references)

    if startpos is None:
        startpos = 1
    if endpos is None:
        endpos = pileups.get_pileup_length()

    if pileups.get_pileup_length() == 0:
        raise click.UsageError("Empty pileup was produced from BAM files." +
                               "Halting program")

    click.echo("The start position is %d." % startpos)
    click.echo("The end position is %d." % endpos)
    click.echo("Constructed pileup from reference.")
    # click.echo the number of positions in pileup
    click.echo("The pileup covers %d positions before modifications." %
               pileups.get_pileup_length())

    # indicate whether the user-specified start and end position is out
    # of bounds (comparing to actual number of positions in pileup)
    if startpos > pileups.get_pileup_length():
        raise click.UsageError("Start position must be less than or" +
                               " equal to the number of nucleotide base " +
                               "positions in pileup (%s)." %
                               pileups.get_pileup_length())
    if endpos > pileups.get_pileup_length():
        raise click.UsageError("End position must be less than or equal to " +
                               "the number of nucleotide base positions in " +
                               "pileup (%s)." % pileups.get_pileup_length())

    # we convert the start and end positions from one-based indexing to
    # zero-based indexing which is expected by distance.py and pileup.py
    startpos -= 1
    endpos -= 1

    # if there is no errors so far, proceed with running program
    modified = modify_pileups(ctx, normalize, startpos, endpos, no_coverage,
                              pileups)

    if (no_coverage is not 'keep_no_coverage') and (len(modified) == 0):
        raise click.UsageError("Entire pileup was truncated due to " +
                               "lack of coverage. Halting program")

    dist = DistanceMatrix(modified, bam)

    if output_distance:
        click.echo("Outputting an angular cosine distance matrix.")
        if output:
            output.write(dist.get_distance_matrix_as_csv())
        else:
            click.echo(dist.get_distance_matrix_as_csv())

    else:
        click.echo("Outputting a cosine similarity matrix.")
        if output:
            output.write(dist.get_similarity_matrix_as_csv())
        else:
            click.echo(dist.get_similarity_matrix_as_csv())