Exemplo n.º 1
0
            likely_stutter += nreads[1]

    return array([ sum(nreads), likely_stutter, 1, 1 if likely_stutter else 0 ])


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('--error-distn-file', metavar='file', type=str,
        required=True,
        help="File to which STR polymorphism error rates will be written.")
    parser.add_argument('--filter-metrics-file', metavar='file', type=str,
        help='File to store metrics related to locus and read filtering.')
    parser.add_argument('--single-cell', action='store_true', default=False,
        help="Library was generated from a single cell.  Disables the " \
             "binomial model for >1 primary alleles.")
    STRLocusIterator.add_parser_args(parser)
    args = parser.parse_args()

    # Many of the command line args are STRLocusWalker parameters
    lw_params = dict(vars(args))
    del(lw_params['error_distn_file'])
    del(lw_params['single_cell'])
    del(lw_params['filter_metrics_file'])

    # Step 1. Generate an STR length polymorphism error profile and save it.
    errors = profile_error_distn(lw_params, is_single_cell=args.single_cell)
    save_error_distn(args.error_distn_file, errors)

    # Step 2. Now that we have an empirical distribution of STR polymorphism
    # error rates, genotype the loci.  The results are printed to stdout.
    genotype(lw_params, args.filter_metrics_file, errors)
Exemplo n.º 2
0
    should be haploid and thus deviations from expectation should reflect
    experimental errors.
  * maximum mapQ option: --mapq60, only use mapq60 (very high confidence)
    alignments
  * minimum repeat unit option: --min-unit (recommended=3, default=1?).  Only
    consider loci where the repeat unit (end-start+1) / unit size is greater
    than the specified value.  This could be useful to remove questionable
    loci, like 2~3 repeat units of tri or tetranucleotide repeats.
"""

import sys
from argparse import ArgumentParser
from strlocusiterator import STRLocusIterator

parser = ArgumentParser()
STRLocusIterator.add_parser_args(parser)
args = parser.parse_args()

locus_f = STRLocusIterator(**vars(args))
for (chrom, start, end, unit, region, reads) in locus_f:
    # Don't do anything, just accumulate metrics
    continue

for (description, value) in locus_f.filter_metrics():
    print("%s\t%d" % (description, value))

for (description, hist) in locus_f.hist_metrics():
    print(description)
    for k in sorted(hist.keys()):
        print("%s\t%d" % (k, hist[k]))