Пример #1
0
def action(args):
    if args.inplace and args.infile is sys.stdin:
        log.error('Error: cannot use the --inplace option with stdin')
        return

    if args.rename:
        raise NotImplementedError

    reader = csv.DictReader(args.infile)
    fieldnames = reader.fieldnames or []

    new_fields = parse_extras(args.add) if args.add else {}

    if new_fields:
        fieldnames.extend(new_fields.keys())
        reader = imap(lambda row: dict(row, **new_fields), reader)

    if args.inplace:
        outfile = tmp(args.infile.name)
    else:
        outfile = args.outfile

    with opener(outfile, 'w') as fout:
        writer = csv.DictWriter(fout, fieldnames, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(reader)

    if args.inplace:
        os.rename(fout.name, args.infile.name)
Пример #2
0
def build_parser(parser):
    parser.add_argument('aligns',
            nargs = '?',
            default = sys.stdin,
            type = Opener('r'),
            help = 'csvfile of ssearch results')
    parser.add_argument('-o', '--out',
            default = sys.stdout,
            type = Opener('w'),
            help = 'csv file tallying each error category for each read')
    parser.add_argument('-m', '--homopolymer-matrix',
            dest = 'matrix',
            type = lambda f: writer(Opener('w')(f)),
            help = 'csv file containing transition matrix of homopolymer lengths')
    parser.add_argument('-M', '--homopolymer-max',
            default = 6,
            type = int,
            help = 'csv homopolymer length above which counts are binned')
    parser.add_argument('--step',
            action = 'store_true',
            help = 'step through reults (for debugging)')
    parser.add_argument('-f', '--extra-fields',
            type = lambda f: parse_extras(f),
            default = {},
            help="extra fields for csv file in form 'name1:val1,name2:val2'")
    parser.add_argument('--output-alignment',
            action='store_true',
            help = 'Include the actual alignment in csv output')
Пример #3
0
def action(args):
    extras = parse_extras(args.extra_fields) if args.extra_fields else {}

    aligns = islice(parse_ssearch36(args.alignments, False), args.limit)

    if args.min_zscore:
        aligns = (a for a in aligns
                  if float(a['sw_zscore']) >= args.min_zscore)
    aligns = groupby(aligns, key=itemgetter('q_name'))

    if args.top_alignment:
        aligns = (next(a) for _, a in aligns)
    else:
        aligns = (a for _, i in aligns for a in i)  # flatten groupby iters

    if args.rlefile:
        decoding = {k: v for d in args.rlefile for k, v in d.items()}

        def decode(aligns):
            aligns['t_seq'], aligns['q_seq'] = homodecodealignment(
                aligns['t_seq'], from_ascii(decoding[aligns['t_name']]),
                aligns['q_seq'], from_ascii(decoding[aligns['q_name']]))
            return aligns

        aligns = imap(decode, aligns)

    if args.print_one:
        pprint.pprint(aligns.next())
        sys.exit()

    if args.with_diff:
        aligns = imap(add_diff, aligns)

    if args.fieldnames:
        fieldnames = args.fieldnames
    else:
        # peek at first row fieldnames
        top = next(aligns, {})
        fieldnames = top.keys()
        aligns = chain([top], aligns)

    if extras:
        fieldnames += extras.keys()
        aligns = (dict(d, **extras) for d in aligns)

    writer = csv.DictWriter(args.out,
                            extrasaction='ignore',
                            fieldnames=fieldnames)

    if args.header:
        writer.writeheader()

    for a in aligns:
        writer.writerow(a)
Пример #4
0
def build_parser(parser):
    parser.add_argument('aligns',
                        nargs='?',
                        default=sys.stdin,
                        type=Opener('r'),
                        help='csvfile of ssearch results')
    parser.add_argument(
        '-o',
        '--out',
        default=sys.stdout,
        type=Opener('w'),
        help='csv file tallying each error category for each read')
    parser.add_argument(
        '-m',
        '--homopolymer-matrix',
        dest='matrix',
        type=lambda f: writer(Opener('w')(f)),
        help='csv file containing transition matrix of homopolymer lengths')
    parser.add_argument(
        '-M',
        '--homopolymer-max',
        default=6,
        type=int,
        help='csv homopolymer length above which counts are binned')
    parser.add_argument('--step',
                        action='store_true',
                        help='step through reults (for debugging)')
    parser.add_argument(
        '-f',
        '--extra-fields',
        type=lambda f: parse_extras(f),
        default={},
        help="extra fields for csv file in form 'name1:val1,name2:val2'")
    parser.add_argument('--output-alignment',
                        action='store_true',
                        help='Include the actual alignment in csv output')
Пример #5
0
                        default=True,
                        dest='show_header')


BASES = set(['A', 'C', 'G', 'T'])


def action(args):

    try:
        from numpy import mean
    except ImportError, e:
        print(e)
        sys.exit(1)

    extras = parse_extras(args.extra_fields) if args.extra_fields else {}

    # TODO: add 'ambiguities'
    fieldnames = ['name', 'length', 'mean', 'ambig']
    fieldnames += extras.keys()

    stats = DictWriter(args.out, fieldnames=fieldnames)

    if args.show_header:
        stats.writeheader()

    for s in islice(SeqIO.parse(args.fastq, 'fastq'), args.limit):
        qual = s.letter_annotations["phred_quality"]
        ambig = len([1 for b in s.seq if b not in BASES])
        row = [s.name.replace(':', '_'), len(s), mean(qual), ambig]
        row += extras.values()