Пример #1
0
    def test_construct_pileup_list(self):
        """
        test_construct_pileup_list - Checks that the pileup length and the
        first few indices of the pileup are correct.

        INPUT:
            [None]

        RETURN:
            [None]

        POST:
            [None]
        """

        bamPileup = Pileup_List.construct_pileup_list(self.test_cp_files,
                                                      self.references)
        pileup_as_array = bamPileup.get_pileups_as_array()
        pileup_as_numerical_array = bamPileup.get_pileups_as_numerical_array()

        assert len(pileup_as_array) == 2
        assert len(pileup_as_array[0]) == 2844
        assert len(pileup_as_array[1]) == 2844
        assert len(pileup_as_numerical_array[0]) == (2844 * 4)
        assert len(pileup_as_numerical_array[1]) == (2844 * 4)
        assert pileup_as_array[0][0:10] == [{
            'C': 12
        }, {
            'C': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'G': 2,
            'C': 3,
            'T': 1,
            'A': 6
        }, {
            'G': 12
        }, {
            'G': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'G': 2,
            'C': 3,
            'T': 1,
            'A': 6
        }]
        assert pileup_as_array[1][0:10] == [{
            'C': 12
        }, {
            'C': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'A': 6,
            'C': 5,
            'G': 1
        }, {
            'G': 12
        }, {
            'A': 4,
            'G': 8
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'A': 7,
            'T': 1,
            'C': 3,
            'G': 1
        }]
Пример #2
0
def dist(ctx, reference, bam, normalize, output_distance, startpos, endpos,
         output, no_coverage):
    """
    dist - Performs the main part of the program

    INPUT:
        [CONTEXT] [ctx]
        [FASTA FILE LOCATION] [reference]
        [BAM FILE LOCATION] [bam]
        [BOOL] [normalize/dont_normalize]
        [BOOL] [output_distance/output_similarity]
        [INT] [startpos]
        [INT] [endpos]
        [STRING] [output]
            Output the CSV-formatted matrix output in a file
            instead of in the terminal.
        [STRING] [truncate/remove_no_coverage/keep_no_coverage]
            Options to truncate low-coverage regions on the ends of the pileup,
            ignore all low coverage regions, or keep all low coverage regions

    RETURN:
        None.

    POST:
        The distance matrix is printed out unless an error message was raised.

    """

    if len(bam) < 2:
        raise click.UsageError("At least two bam file locations are required" +
                               " to perform quasispecies distance comparison")
    # indicate if the start or end position is < 0 or a priori invalid
    if type(startpos) == int and int(startpos) < 1:
        raise click.UsageError("Start position must be >= 1.")
    if type(endpos) == int and int(endpos) < 1:
        raise click.UsageError("End position must be >= 1.")
    if (type(startpos) == int and type(endpos) == int and (startpos > endpos)):
        raise click.UsageError("Start position must be <= end position")

    # Build the reference object.
    references = parse_references_from_fasta(reference)

    pileups = Pileup_List.construct_pileup_list(bam, references)

    if startpos is None:
        startpos = 1
    if endpos is None:
        endpos = pileups.get_pileup_length()

    if pileups.get_pileup_length() == 0:
        raise click.UsageError("Empty pileup was produced from BAM files." +
                               "Halting program")

    click.echo("The start position is %d." % startpos)
    click.echo("The end position is %d." % endpos)
    click.echo("Constructed pileup from reference.")
    # click.echo the number of positions in pileup
    click.echo("The pileup covers %d positions before modifications." %
               pileups.get_pileup_length())

    # indicate whether the user-specified start and end position is out
    # of bounds (comparing to actual number of positions in pileup)
    if startpos > pileups.get_pileup_length():
        raise click.UsageError("Start position must be less than or" +
                               " equal to the number of nucleotide base " +
                               "positions in pileup (%s)." %
                               pileups.get_pileup_length())
    if endpos > pileups.get_pileup_length():
        raise click.UsageError("End position must be less than or equal to " +
                               "the number of nucleotide base positions in " +
                               "pileup (%s)." % pileups.get_pileup_length())

    # we convert the start and end positions from one-based indexing to
    # zero-based indexing which is expected by distance.py and pileup.py
    startpos -= 1
    endpos -= 1

    # if there is no errors so far, proceed with running program
    modified = modify_pileups(ctx, normalize, startpos, endpos, no_coverage,
                              pileups)

    if (no_coverage is not 'keep_no_coverage') and (len(modified) == 0):
        raise click.UsageError("Entire pileup was truncated due to " +
                               "lack of coverage. Halting program")

    dist = DistanceMatrix(modified, bam)

    if output_distance:
        click.echo("Outputting an angular cosine distance matrix.")
        if output:
            output.write(dist.get_distance_matrix_as_csv())
        else:
            click.echo(dist.get_distance_matrix_as_csv())

    else:
        click.echo("Outputting a cosine similarity matrix.")
        if output:
            output.write(dist.get_similarity_matrix_as_csv())
        else:
            click.echo(dist.get_similarity_matrix_as_csv())