Exemplo n.º 1
0
def trimmomatic_se(fastq1,
                   crop_length,
                   crop_length_tol,
                   out_dir,
                   nth=1,
                   java_heap=None):
    prefix = os.path.join(out_dir, os.path.basename(strip_ext_fastq(fastq1)))
    crop_length_tol = abs(crop_length_tol)
    min_length = crop_length - crop_length_tol
    cropped = '{p}.crop_{cl}-{tol}bp.fastq.gz'.format(p=prefix,
                                                      cl=crop_length,
                                                      tol=crop_length_tol)

    if java_heap is None:
        java_heap_param = '-Xmx6G'
    else:
        java_heap_param = '-Xmx{}'.format(java_heap)

    cmd = 'java -XX:ParallelGCThreads=1 {param} -jar {jar} SE -threads {nth} '
    cmd += '{fq1} {cropped} MINLEN:{ml} CROP:{cl}'
    cmd = cmd.format(param=java_heap_param,
                     jar=locate_trimmomatic(),
                     nth=nth,
                     fq1=fastq1,
                     cropped=cropped,
                     ml=min_length,
                     cl=crop_length)
    run_shell_cmd(cmd)

    return cropped
Exemplo n.º 2
0
def trimmomatic_pe(fastq1,
                   fastq2,
                   crop_length,
                   out_dir_R1,
                   out_dir_R2,
                   nth=1,
                   java_heap=None):
    prefix_R1 = os.path.join(out_dir_R1,
                             os.path.basename(strip_ext_fastq(fastq1)))
    prefix_R2 = os.path.join(out_dir_R2,
                             os.path.basename(strip_ext_fastq(fastq2)))
    cropped_R1 = '{}.crop_{}bp.fastq.gz'.format(prefix_R1, crop_length)
    cropped_R2 = '{}.crop_{}bp.fastq.gz'.format(prefix_R2, crop_length)
    tmp_cropped_R1 = '{}.tmp'.format(cropped_R1)
    tmp_cropped_R2 = '{}.tmp'.format(cropped_R2)

    if java_heap is None:
        java_heap_param = '-Xmx6G'
    else:
        java_heap_param = '-Xmx{}'.format(java_heap)

    cmd = 'java -XX:ParallelGCThreads=1 {} -jar {} PE -threads {} '
    cmd += '{} {} {} {} {} {} MINLEN:{} CROP:{}'
    cmd = cmd.format(java_heap_param, locate_trimmomatic(), nth, fastq1,
                     fastq2, cropped_R1, tmp_cropped_R1, cropped_R2,
                     tmp_cropped_R2, crop_length, crop_length)
    run_shell_cmd(cmd)
    rm_f([tmp_cropped_R1, tmp_cropped_R2])

    return cropped_R1, cropped_R2
Exemplo n.º 3
0
def trimmomatic_pe(fastq1,
                   fastq2,
                   crop_length,
                   crop_length_tol,
                   out_dir_R1,
                   out_dir_R2,
                   nth=1,
                   java_heap=None):
    prefix_R1 = os.path.join(out_dir_R1,
                             os.path.basename(strip_ext_fastq(fastq1)))
    prefix_R2 = os.path.join(out_dir_R2,
                             os.path.basename(strip_ext_fastq(fastq2)))

    crop_length_tol = abs(crop_length_tol)
    min_length = crop_length - crop_length_tol

    cropped_R1 = '{p}.crop_{cl}-{tol}bp.fastq.gz'.format(p=prefix_R1,
                                                         cl=crop_length,
                                                         tol=crop_length_tol)
    cropped_R2 = '{p}.crop_{cl}-{tol}bp.fastq.gz'.format(p=prefix_R2,
                                                         cl=crop_length,
                                                         tol=crop_length_tol)
    tmp_cropped_R1 = '{}.tmp'.format(cropped_R1)
    tmp_cropped_R2 = '{}.tmp'.format(cropped_R2)

    if java_heap is None:
        java_heap_param = '-Xmx6G'
    else:
        java_heap_param = '-Xmx{}'.format(java_heap)

    cmd = 'java -XX:ParallelGCThreads=1 {param} -jar {jar} PE -threads {nth} '
    cmd += '{fq1} {fq2} {cropped1} {tmp_cropped1} {cropped2} {tmp_cropped2} '
    cmd += 'MINLEN:{ml} CROP:{cl}'
    cmd = cmd.format(param=java_heap_param,
                     jar=locate_trimmomatic(),
                     nth=nth,
                     fq1=fastq1,
                     fq2=fastq2,
                     cropped1=cropped_R1,
                     tmp_cropped1=tmp_cropped_R1,
                     cropped2=cropped_R2,
                     tmp_cropped2=tmp_cropped_R2,
                     ml=min_length,
                     cl=crop_length)
    run_shell_cmd(cmd)
    rm_f([tmp_cropped_R1, tmp_cropped_R2])

    return cropped_R1, cropped_R2
Exemplo n.º 4
0
def trimmomatic_se(fastq1, crop_length, out_dir, nth=1, java_heap=None):
    prefix = os.path.join(out_dir, os.path.basename(strip_ext_fastq(fastq1)))
    cropped = '{}.crop_{}bp.fastq.gz'.format(prefix, crop_length)

    if java_heap is None:
        java_heap_param = '-Xmx6G'
    else:
        java_heap_param = '-Xmx{}'.format(java_heap)

    cmd = 'java -XX:ParallelGCThreads=1 {} -jar {} SE -threads {} '
    cmd += '{} {} MINLEN:{} CROP:{}'
    cmd = cmd.format(java_heap_param, locate_trimmomatic(), nth, fastq1,
                     cropped, crop_length, crop_length)
    run_shell_cmd(cmd)

    return cropped
Exemplo n.º 5
0
def trimmomatic_se(fastq1,
                   crop_length,
                   crop_length_tol,
                   phred_score_format,
                   out_dir,
                   nth=1,
                   java_heap=None):
    prefix = os.path.join(out_dir, os.path.basename(strip_ext_fastq(fastq1)))
    crop_length_tol = abs(crop_length_tol)
    min_length = crop_length - crop_length_tol
    cropped = '{p}.crop_{cl}-{tol}bp.fastq.gz'.format(p=prefix,
                                                      cl=crop_length,
                                                      tol=crop_length_tol)

    if java_heap is None:
        java_heap_param = '-Xmx6G'
    else:
        java_heap_param = '-Xmx{}'.format(java_heap)

    phred_score_format = phred_score_format.lower()
    if phred_score_format == 'auto':
        phred_score_param = ''
    elif phred_score_format == 'phred33':
        phred_score_param = '-phred33'
    elif phred_score_format == 'phred64':
        phred_score_param = '-phred64'
    else:
        raise ValueError('Wrong phred_score_format!')

    cmd = 'java -XX:ParallelGCThreads=1 {param} -jar {jar} SE -threads {nth} {phred_score_param} ' \
          '{fq1} {cropped} MINLEN:{ml} CROP:{cl}'.format(
        param=java_heap_param,
        jar=locate_trimmomatic(),
        nth=nth,
        phred_score_param=phred_score_param,
        fq1=fastq1,
        cropped=cropped,
        ml=min_length,
        cl=crop_length,
    )
    run_shell_cmd(cmd)

    return cropped