Пример #1
0
def bowtie(fastq, outfile, config):
    """
    Use bowtie to map `fastq`, saving the SAM file as `outfile`.  Ensures that
    '--sam' is in the parameters.
    """
    index = config['index']
    params = config['bowtie params'].split()
    if ('--sam' not in params) and ('-S' not in params):
        params.append('-S')

    cmds = ['bowtie']
    cmds.extend(params)
    cmds.append(index)
    cmds.append(fastq)
    print outfile
    logfn = outfile + '.log'
    p = subprocess.Popen(cmds,
                         stdout=open(outfile, 'w'),
                         stderr=open(logfn, 'w'),
                         bufsize=1)
    stdout, stderr = p.communicate()
    return Result(infiles=fastq,
                  outfiles=outfile,
                  cmds=' '.join(cmds),
                  log=logfn)
Пример #2
0
def bam2sam(bam, sam):
    cmds = ['samtools', 'view', '-h', bam]
    p = subprocess.Popen(cmds,
                         stdout=open(sam, 'w'),
                         stderr=subprocess.PIPE,
                         bufsize=1)
    stdout, stderr = p.communicate()
    return Result(sam, bam, stderr=stderr, cmds=' '.join(cmds))
Пример #3
0
def clip(fastq, clipped_fastq, config):
    adapter = config['adapter']
    clipping_report = clipped_fastq + '.clipping_report'
    if adapter is None:
        fout = open(clipped_fastq, 'w')
        fout.write(open(fastq).read())
        fout.close()

        fout = open(clipping_report, 'w')
        fout.write('No adapter specified; %s is a copy of %s' %
                   (clipped_fastq, fastq))
        fout.close()
        return Result(fastq, (clipped_fastq, clipping_report))

    cmds = [
        'fastx_clipper',
        '-i',
        fastq,
        '-o',
        clipped_fastq,
        '-n',  # *keep* Ns
        '-a',
        adapter,
        '-v',  # report to stdout
    ]
    p = subprocess.Popen(cmds,
                         stdout=open(clipping_report, 'w'),
                         stderr=subprocess.PIPE,
                         bufsize=1)
    stdout, stderr = p.communicate()
    failed = False
    if p.returncode or not os.path.exists(clipped_fastq):
        failed = True

    return Result(fastq, (clipped_fastq, clipping_report),
                  stdout=stdout,
                  stderr=stderr,
                  failed=failed,
                  cmds=' '.join(cmds))
Пример #4
0
def count(samfile, countfile, config):
    cmds = ['htseq-count']
    cmds += config['htseq params'].split()
    cmds += [samfile, config['gff']]
    p = subprocess.Popen(cmds,
                         stdout=open(countfile, 'w'),
                         stderr=subprocess.PIPE,
                         bufsize=1)
    stdout, stderr = p.communicate()
    failed = p.returncode
    return Result(infiles=samfile,
                  outfiles=countfile,
                  stderr=stderr,
                  failed=failed,
                  cmds=' '.join(cmds))
Пример #5
0
def filter(sam, outfile, config):
    bam = tempfile.mktemp()
    filtered_bam = tempfile.mktemp()

    result0 = sam2bam(sam, bam)

    cmds1 = ['intersectBed', '-abam', bam, '-b', config['filter bed'], '-v']

    p = subprocess.Popen(cmds1,
                         stdout=open(filtered_bam, 'w'),
                         stderr=subprocess.PIPE,
                         bufsize=1)
    stdout1, stderr1 = p.communicate()
    result2 = bam2sam(filtered_bam, outfile)
    #os.unlink(bam)
    #os.unlink(filtered_bam)
    failed = p.returncode
    cmds = '\n'.join([result0.cmds, ' '.join(cmds1), result2.cmds])
    stderr = '\n'.join([result0.stderr, stderr1, result2.stderr])
    return Result(sam, outfile, failed=failed, cmds=cmds, stderr=stderr)
                                      ' ',
                                      progressbar.ETA(),
                                      ' ',
                                      progressbar.AdaptiveETA(),
                                  ])
    bar.start()
    bar_worker = BarWorker(bar, tasks, task_count)
    bar_worker.start()

    for i in range(num_workers):
        tasks.put(None)
    tasks.join()

    result_dict = {}
    while task_count:
        r = Result(results.get())
        permutations = (result_dict.get(r.args['theta_name'], {}).get(
            r.args['main_method_name'], {}).get(r.args['line_search_name'],
                                                []))
        existing = next((p for p in permutations if p.is_same(r)), None)
        if existing:
            existing += r
        else:
            (result_dict.setdefault(r.args['theta_name'], {}).setdefault(
                r.args['main_method_name'],
                {}).setdefault(r.args['line_search_name'], [])).append(r)
        task_count -= 1

    for th in result_dict.values():
        for mm in th.values():
            for ls in mm.values():