Пример #1
0
def assess_sample_primers(sample, fqpair, outfiles, primerlist, primerinfo,
                          args):
    outlabel = fastq_file_label(sample, args.outdir)
    loglabel = fastq_file_label(sample, args.logdir) if args.logdir \
               else outlabel
    frag_primer_file = outlabel + ".read2primers.txt"
    primer_read_file = outlabel + ".primer2reads.txt"
    logfile = loglabel + ".primer_trim.log"
    logfh = open(logfile, 'w')
    try:
        sys.stderr.write("  Creating primer reports for {}\n".format(sample))
        logfh.write("Sample {}\n".format(sample))
        if have_files([frag_primer_file, primer_read_file],
                      args.force,
                      stderr=logfh):
            logfh.write("  Already have {} and {}\n".format(
                frag_primer_file, primer_read_file))
        frag2primers = arrange_by_fragments(fqpair, outfiles, primerinfo,
                                            logfh)
        fragcounts = create_fragment_report(fqpair, frag2primers,
                                            frag_primer_file, True, logfh)
        primerreads = bin_primer_reads(frag2primers, primerlist, logfh)
        (primercounts,
         primerkeys) = create_primer_report(primerreads, primerlist,
                                            primer_read_file, logfh, True,
                                            args.debug)
    except Exception, e:
        e.args += (sample, )
        raise
Пример #2
0
def assess_sample_primers(sample, fqpair, outfiles, primerlist, primerinfo, 
                          args):
    outlabel = fastq_file_label(sample, args.outdir)
    loglabel = fastq_file_label(sample, args.logdir) if args.logdir \
               else outlabel
    frag_primer_file = outlabel + ".read2primers.txt"
    primer_read_file = outlabel + ".primer2reads.txt"
    logfile = loglabel + ".primer_trim.log"
    logfh = open(logfile, 'w')
    try:
        sys.stderr.write("  Creating primer reports for {}\n".format(sample))
        logfh.write("Sample {}\n".format(sample))
        if have_files([frag_primer_file, primer_read_file], args.force,
                      stderr=logfh):
            logfh.write("  Already have {} and {}\n".format(frag_primer_file,
                        primer_read_file))
        frag2primers = arrange_by_fragments(fqpair, outfiles, primerinfo, 
                                            logfh)
        fragcounts = create_fragment_report(fqpair, frag2primers, 
                                            frag_primer_file, True, logfh)
        primerreads = bin_primer_reads(frag2primers, primerlist, logfh)
        (primercounts, primerkeys) = create_primer_report(primerreads, 
                      primerlist, primer_read_file, logfh, True, args.debug)
    except Exception, e:
        e.args += (sample, )
        raise
Пример #3
0
def trim_primers(fqfile, alignout, max_trim_len, primerinfo, outlabel, logfh,
                 args):
    """Returns trimmed fastq file and file with list of sequence names"""
    trimmedfq = outlabel + ".trimmed.fastq"
    seqfile = outlabel + ".seqlist.txt"
    logfh.write("    Trimming fq: {}\n".format(trimmedfq))
    if have_files([trimmedfq, seqfile], args.force, stderr=logfh):
        logfh.write("      Already have {}\n".format(trimmedfq))
        return (trimmedfq, seqfile)
    aligns = parse_alignout(alignout)
    seqlist = []
    with open(trimmedfq, 'w') as outfq:
        inseq = FastQParser(fqfile)
        for seqrec in inseq:
            seqlist.append(seqrec.id)
            if seqrec.id in aligns:
                primer = aligns[seqrec.id]['primer']
                if primerinfo[primer]['overlap']:
                    primerend = aligns[seqrec.id]['end'] +\
                                aligns[seqrec.id]['left']
                    subrec = seqrec[primerend:]
                    if args.debug:
                        logfh.write("{}\tTrimming\t{}\n".format(
                            primer, seqrec.id))
                else:
                    if args.debug:
                        logfh.write("{}\tNot trimming\t{}\n".format(
                            primer, seqrec.id))
                    subrec = seqrec
            else:  #trim default max_primer_len+2
                subrec = seqrec[max_trim_len:]
            outfq.write("{}\n".format(subrec.fastq()))
    logfh.write("    Seq list: {}\n".format(seqfile))
    if have_file(seqfile, True, stderr=logfh):
        logfh.write("      Still have {}\n".format(seqfile))
        sys.exit()
    with open_file(seqfile, 'w') as ifh:
        ifh.write("\n".join(seqlist) + "\n")
    return (trimmedfq, seqfile)
Пример #4
0
def trim_primers(fqfile, alignout, max_trim_len, primerinfo, outlabel, logfh, 
                 args):
    """Returns trimmed fastq file and file with list of sequence names"""
    trimmedfq = outlabel + ".trimmed.fastq"
    seqfile = outlabel + ".seqlist.txt"
    logfh.write("    Trimming fq: {}\n".format(trimmedfq))
    if have_files([trimmedfq, seqfile], args.force, stderr=logfh):
        logfh.write("      Already have {}\n".format(trimmedfq))
        return (trimmedfq, seqfile)
    aligns = parse_alignout(alignout)
    seqlist = []
    with open(trimmedfq, 'w') as outfq:
        inseq = FastQParser(fqfile)
        for seqrec in inseq:
            seqlist.append(seqrec.id)
            if seqrec.id in aligns:
                primer = aligns[seqrec.id]['primer']
                if primerinfo[primer]['overlap']:
                    primerend = aligns[seqrec.id]['end'] +\
                                aligns[seqrec.id]['left']
                    subrec = seqrec[primerend:]
                    if args.debug:
                        logfh.write("{}\tTrimming\t{}\n".format(
                                         primer, seqrec.id))
                else:
                    if args.debug:
                        logfh.write("{}\tNot trimming\t{}\n".format(
                                         primer, seqrec.id))
                    subrec = seqrec
            else: #trim default max_primer_len+2
                subrec = seqrec[max_trim_len:]
            outfq.write("{}\n".format(subrec.fastq()))
    logfh.write("    Seq list: {}\n".format(seqfile))
    if have_file(seqfile, True, stderr=logfh):
        logfh.write("      Still have {}\n".format(seqfile))
        sys.exit()
    with open_file(seqfile, 'w') as ifh:
        ifh.write("\n".join(seqlist)+"\n")
    return (trimmedfq, seqfile)