Пример #1
    def test_construct_pileup_list(self):
        test_construct_pileup_list - Checks that the pileup length and the
        first few indices of the pileup are correct.




        bamPileup = Pileup_List.construct_pileup_list(self.test_cp_files,
        pileup_as_array = bamPileup.get_pileups_as_array()
        pileup_as_numerical_array = bamPileup.get_pileups_as_numerical_array()

        assert len(pileup_as_array) == 2
        assert len(pileup_as_array[0]) == 2844
        assert len(pileup_as_array[1]) == 2844
        assert len(pileup_as_numerical_array[0]) == (2844 * 4)
        assert len(pileup_as_numerical_array[1]) == (2844 * 4)
        assert pileup_as_array[0][0:10] == [{
            'C': 12
        }, {
            'C': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'G': 2,
            'C': 3,
            'T': 1,
            'A': 6
        }, {
            'G': 12
        }, {
            'G': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'G': 2,
            'C': 3,
            'T': 1,
            'A': 6
        assert pileup_as_array[1][0:10] == [{
            'C': 12
        }, {
            'C': 12
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'A': 6,
            'C': 5,
            'G': 1
        }, {
            'G': 12
        }, {
            'A': 4,
            'G': 8
        }, {
            'T': 12
        }, {
            'C': 12
        }, {
            'A': 7,
            'T': 1,
            'C': 3,
            'G': 1
Пример #2
def dist(ctx, reference, bam, normalize, output_distance, startpos, endpos,
         output, no_coverage):
    dist - Performs the main part of the program

        [CONTEXT] [ctx]
        [FASTA FILE LOCATION] [reference]
        [BAM FILE LOCATION] [bam]
        [BOOL] [normalize/dont_normalize]
        [BOOL] [output_distance/output_similarity]
        [INT] [startpos]
        [INT] [endpos]
        [STRING] [output]
            Output the CSV-formatted matrix output in a file
            instead of in the terminal.
        [STRING] [truncate/remove_no_coverage/keep_no_coverage]
            Options to truncate low-coverage regions on the ends of the pileup,
            ignore all low coverage regions, or keep all low coverage regions


        The distance matrix is printed out unless an error message was raised.


    if len(bam) < 2:
        raise click.UsageError("At least two bam file locations are required" +
                               " to perform quasispecies distance comparison")
    # indicate if the start or end position is < 0 or a priori invalid
    if type(startpos) == int and int(startpos) < 1:
        raise click.UsageError("Start position must be >= 1.")
    if type(endpos) == int and int(endpos) < 1:
        raise click.UsageError("End position must be >= 1.")
    if (type(startpos) == int and type(endpos) == int and (startpos > endpos)):
        raise click.UsageError("Start position must be <= end position")

    # Build the reference object.
    references = parse_references_from_fasta(reference)

    pileups = Pileup_List.construct_pileup_list(bam, references)

    if startpos is None:
        startpos = 1
    if endpos is None:
        endpos = pileups.get_pileup_length()

    if pileups.get_pileup_length() == 0:
        raise click.UsageError("Empty pileup was produced from BAM files." +
                               "Halting program")

    click.echo("The start position is %d." % startpos)
    click.echo("The end position is %d." % endpos)
    click.echo("Constructed pileup from reference.")
    # click.echo the number of positions in pileup
    click.echo("The pileup covers %d positions before modifications." %

    # indicate whether the user-specified start and end position is out
    # of bounds (comparing to actual number of positions in pileup)
    if startpos > pileups.get_pileup_length():
        raise click.UsageError("Start position must be less than or" +
                               " equal to the number of nucleotide base " +
                               "positions in pileup (%s)." %
    if endpos > pileups.get_pileup_length():
        raise click.UsageError("End position must be less than or equal to " +
                               "the number of nucleotide base positions in " +
                               "pileup (%s)." % pileups.get_pileup_length())

    # we convert the start and end positions from one-based indexing to
    # zero-based indexing which is expected by distance.py and pileup.py
    startpos -= 1
    endpos -= 1

    # if there is no errors so far, proceed with running program
    modified = modify_pileups(ctx, normalize, startpos, endpos, no_coverage,

    if (no_coverage is not 'keep_no_coverage') and (len(modified) == 0):
        raise click.UsageError("Entire pileup was truncated due to " +
                               "lack of coverage. Halting program")

    dist = DistanceMatrix(modified, bam)

    if output_distance:
        click.echo("Outputting an angular cosine distance matrix.")
        if output:

        click.echo("Outputting a cosine similarity matrix.")
        if output: