Python get_rename_command示例，qiime.parallel.util.get_rename_command Python示例

示例#1

0

显示文件

文件： alpha_diversity.py 项目： Ecogenomics/FrankenQIIME

def get_job_commands(python_exe_fp,alpha_diversity_fp,tree_fp,job_prefix,\
    metrics,input_fps,output_dir,working_dir,\
    command_prefix=None,command_suffix=None):
    """Generate alpha diversity commands to be submitted to cluster
    """

    command_prefix = command_prefix or '/bin/bash; '
    command_suffix = command_suffix or '; exit'
    
    commands = []
    result_filepaths = []
    
    for input_fp in input_fps:
        input_path, input_fn = split(input_fp)
        output_fn = 'alpha_%s' % input_fn
        rename_command, current_result_filepaths = get_rename_command(\
         [output_fn],working_dir,output_dir)
        result_filepaths += current_result_filepaths
        
        command = '%s %s %s -i %s -o %s -t %s -m %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          alpha_diversity_fp,\
          input_fp,
          working_dir + '/' + output_fn,
          tree_fp,
          metrics,
          rename_command,
          command_suffix)
          
        commands.append(command)
        
    return commands, result_filepaths

示例#2

0

显示文件

def get_job_commands(python_exe_fp,alpha_diversity_fp,tree_fp,job_prefix,\
    metrics,input_fps,output_dir,working_dir,\
    command_prefix=None,command_suffix=None):
    """Generate alpha diversity commands to be submitted to cluster
    """

    command_prefix = command_prefix or '/bin/bash; '
    command_suffix = command_suffix or '; exit'

    commands = []
    result_filepaths = []

    for input_fp in input_fps:
        input_path, input_fn = split(input_fp)
        output_fn = 'alpha_%s' % input_fn
        rename_command, current_result_filepaths = get_rename_command(\
         [output_fn],working_dir,output_dir)
        result_filepaths += current_result_filepaths

        command = '%s %s %s -i %s -o %s -t %s -m %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          alpha_diversity_fp,\
          input_fp,
          working_dir + '/' + output_fn,
          tree_fp,
          metrics,
          rename_command,
          command_suffix)

        commands.append(command)

    return commands, result_filepaths

示例#3

0

显示文件

文件： align_seqs_pynast.py 项目： Ecogenomics/FrankenQIIME

def get_job_commands(python_exe_fp,align_seqs_fp,fasta_fps,template_aln_fp,\
    pairwise_alignment_method,output_dir,blast_db,\
    min_length,min_percent_id,job_prefix,working_dir,command_prefix=None,\
    command_suffix=None):
    """Generate PyNAST commands which should be submitted to cluster
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [job_prefix + '.%d_aligned.fasta', 
                     job_prefix + '.%d_failures.fasta',
                     job_prefix + '.%d_log.txt']
    
    # Initialize the command_prefix and command_suffix
    command_prefix = command_prefix or '/bin/bash; '
    command_suffix = command_suffix or '; exit'
    
    # Create lists to store the results
    commands = []
    result_filepaths = []
    
    # If there is a value for blast_db, pass it. If not, it
    # will be created on-the-fly. Note that on-the-fly blast dbs
    # are created with a string of random chars in the name, so this is safe.
    # They shouldn't overwrite one another, and will be cleaned up.
    if blast_db:
        blast_option = '-d %s' % blast_db
    else:
        blast_option = ''
    
    # Iterate over the input files
    for i,fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)
        result_filepaths += current_result_filepaths
            
        command = \
         '%s %s %s %s -p %1.2f -e %d -m pynast -t %s -a %s -o %s -i %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          align_seqs_fp,\
          blast_option,\
          min_percent_id,\
          min_length,\
          template_aln_fp,\
          pairwise_alignment_method,
          working_dir,
          fasta_fp,
          rename_command,
          command_suffix)
          
        commands.append(command)

    return commands, result_filepaths

示例#4

0

显示文件

def get_job_commands_single_otu_table(python_exe_fp,
                                      beta_diversity_fp,
                                      tree_fp,
                                      job_prefix,
                                      metrics,
                                      input_fp,
                                      output_dir,
                                      working_dir,
                                      jobs_to_start,
                                      command_prefix=None,
                                      command_suffix=None):
    """Generate beta diversity to split single OTU table to multiple jobs
    
    always passes -f to beta_diversity.py
    """

    command_prefix = command_prefix or '/bin/bash; '
    command_suffix = command_suffix or '; exit'

    commands = []
    result_filepaths = []

    sids = parse_otu_table(open(input_fp, 'U'))[0]

    sample_id_groups = merge_to_n_commands(sids, jobs_to_start, ',', '', '')
    for i, sample_id_group in enumerate(sample_id_groups):
        working_dir_i = os.path.join(working_dir, str(i))
        output_dir_i = os.path.join(output_dir, str(i))
        input_dir, input_fn = split(input_fp)
        sample_id_desc = sample_id_group.replace(',', '_')
        output_fns = ['%s_%s' % (metric, input_fn) \
         for metric in metrics.split(',')]
        rename_command, current_result_filepaths = get_rename_command(\
         output_fns,working_dir_i,output_dir_i)

        result_filepaths += current_result_filepaths

        command = '%s %s %s -i %s -o %s -t %s -m %s -f -r %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          beta_diversity_fp,\
          input_fp,
          working_dir_i + '/',
          tree_fp,
          metrics,
          sample_id_group,
          rename_command,
          command_suffix)

        commands.append(command)

    return commands, result_filepaths

示例#5

0

显示文件

def get_job_commands_multiple_otu_tables(python_exe_fp,
                                         beta_diversity_fp,
                                         tree_fp,
                                         job_prefix,
                                         metrics,
                                         input_fps,
                                         output_dir,
                                         working_dir,
                                         command_prefix=None,
                                         command_suffix=None,
                                         full_tree=False):
    """Generate beta diversity to split multiple OTU tables to multiple jobs
    """

    command_prefix = command_prefix or '/bin/bash; '
    command_suffix = command_suffix or '; exit'

    if full_tree:
        full_tree_str = '-f'
    else:
        full_tree_str = ''

    commands = []
    result_filepaths = []

    for input_fp in input_fps:
        input_path, input_fn = split(input_fp)
        output_fns = ['%s_%s' % (metric, input_fn) \
         for metric in metrics.split(',')]
        rename_command, current_result_filepaths = get_rename_command(\
         output_fns,working_dir,output_dir)
        result_filepaths += current_result_filepaths

        command = '%s %s %s -i %s -o %s -t %s -m %s %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          beta_diversity_fp,\
          input_fp,
          working_dir + '/',
          tree_fp,
          metrics,
          full_tree_str,
          rename_command,
          command_suffix)

        commands.append(command)

    return commands, result_filepaths

示例#6

0

显示文件

def get_job_commands(python_exe_fp,
                     pick_otus_fp,
                     fasta_fps,
                     output_dir,
                     blast_db,
                     job_prefix,
                     working_dir,
                     max_e_value,
                     similarity,
                     min_aligned_percent,
                     command_prefix='/bin/bash; ',
                     command_suffix='; exit'):
    """Generate pick_otus commands which should be submitted to cluster
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [job_prefix + '.%d_otus.log', job_prefix + '.%d_otus.txt']

    # Create lists to store the results
    commands = []
    result_filepaths = []

    # Iterate over the input files
    for i, fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)
        result_filepaths += current_result_filepaths

        command = \
         '%s %s %s -i %s -b %s -m blast -o %s -e %s -s %s --min_aligned_percent %s %s %s' %\
         (command_prefix,
          python_exe_fp,
          pick_otus_fp,
          fasta_fp,
          blast_db,
          working_dir,
          max_e_value,
          similarity,
          min_aligned_percent,
          rename_command,
          command_suffix)

        commands.append(command)

    return commands, result_filepaths

示例#7

0

显示文件

文件： assign_taxonomy_rdp.py 项目： Ecogenomics/FrankenQIIME

def get_commands(python_exe_fp,assign_taxonomy_fp,confidence,job_prefix,\
    fasta_fps,rdp_jar_fp,output_dir,working_dir,\
    command_prefix=None,command_suffix=None,\
    id_to_taxonomy_fp=None,reference_seqs_fp=None):
    """Generate RDP classifier commands which should be submitted to cluster
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [job_prefix + '.%d_tax_assignments.log', 
                     job_prefix + '.%d_tax_assignments.txt']
    
    command_prefix = command_prefix or\
     '/bin/bash; export RDP_JAR_PATH=%s; ' % rdp_jar_fp
    command_suffix = command_suffix or\
     '; exit'
    
    rdp_extra_params = ''
    if id_to_taxonomy_fp and reference_seqs_fp:
        rdp_extra_params = '-t %s -r %s' % (id_to_taxonomy_fp, reference_seqs_fp)
    
    commands = []
    result_filepaths = []
    
    for i,fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)#,\
         #id_to_taxonomy_fp,reference_seqs_fp)
        result_filepaths += current_result_filepaths
        command = '%s %s %s %s -c %1.2f -m rdp -o %s -i %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          assign_taxonomy_fp,\
          rdp_extra_params,
          confidence,
          working_dir,
          fasta_fp,
          rename_command,
          command_suffix)
        commands.append(command)
        
    return commands, result_filepaths

示例#8

0

显示文件

def get_job_commands(python_exe_fp,assign_taxonomy_fp,id_to_taxonomy_fp,\
    e_value,blast_db,job_prefix,\
    blastmat_path,fasta_fps,output_dir,working_dir,\
    command_prefix=None,command_suffix=None):
    """Generate BlastTaxonAssiger classifier commands to be submitted to cluster
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [
        job_prefix + '.%d_tax_assignments.log',
        job_prefix + '.%d_tax_assignments.txt'
    ]

    command_prefix = command_prefix or\
     '/bin/bash; cd %s; export BLASTMAT=%s;' \
       % (working_dir,blastmat_path)
    command_suffix = command_suffix or\
     '; exit'

    commands = []
    result_filepaths = []

    for i, fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)
        result_filepaths += current_result_filepaths

        command = '%s %s %s -o %s -m blast -e %s -b %s -i %s -t %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          assign_taxonomy_fp,\
          working_dir,
          e_value,
          blast_db,
          fasta_fp,
          id_to_taxonomy_fp,
          rename_command,
          command_suffix)

        commands.append(command)

    return commands, result_filepaths

示例#9

0

显示文件

文件： multiple_rarefactions.py 项目： Ecogenomics/FrankenQIIME

def get_job_commands(python_exe_fp,rarefaction_fp,job_prefix,\
    input_fp,output_dir,working_dir,min_seqs,max_seqs,step,num_reps,\
    lineages_included, command_prefix=None,command_suffix=None):
    """Generate alpha diversity commands to be submitted to cluster
    """
    # Create data for each run (depth, output_fn)
    run_parameters = []
    for num_seqs in range(min_seqs,max_seqs+1, step):
        for rep_num in range(num_reps):
            run_parameters.append((\
             num_seqs,'rarefaction_%d_%d.txt' % (num_seqs,rep_num)))

    command_prefix = command_prefix or '/bin/bash; '
    command_suffix = command_suffix or '; exit'
    
    commands = []
    result_filepaths = []
    
    if lineages_included:
        lineages_included_param = '--lineages_included'
    else:
        lineages_included_param = ''
    
    for depth,output_fn in run_parameters:
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [output_fn],working_dir,output_dir)
        result_filepaths += current_result_filepaths
        
        command = '%s %s %s -i %s -o %s %s -d %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          rarefaction_fp,\
          input_fp,
          working_dir + '/' + output_fn,
          lineages_included_param,
          depth,
          rename_command,
          command_suffix)
          
        commands.append(command)
        
    return commands, result_filepaths

示例#10

0

显示文件

def get_job_commands(python_exe_fp,rarefaction_fp,job_prefix,\
    input_fp,output_dir,working_dir,min_seqs,max_seqs,step,num_reps,\
    lineages_included, command_prefix=None,command_suffix=None):
    """Generate alpha diversity commands to be submitted to cluster
    """
    # Create data for each run (depth, output_fn)
    run_parameters = []
    for num_seqs in range(min_seqs, max_seqs + 1, step):
        for rep_num in range(num_reps):
            run_parameters.append((\
             num_seqs,'rarefaction_%d_%d.txt' % (num_seqs,rep_num)))

    command_prefix = command_prefix or '/bin/bash; '
    command_suffix = command_suffix or '; exit'

    commands = []
    result_filepaths = []

    if lineages_included:
        lineages_included_param = '--lineages_included'
    else:
        lineages_included_param = ''

    for depth, output_fn in run_parameters:
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [output_fn],working_dir,output_dir)
        result_filepaths += current_result_filepaths

        command = '%s %s %s -i %s -o %s %s -d %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          rarefaction_fp,\
          input_fp,
          working_dir + '/' + output_fn,
          lineages_included_param,
          depth,
          rename_command,
          command_suffix)

        commands.append(command)

    return commands, result_filepaths

示例#11

0

显示文件

文件： assign_taxonomy_blast.py 项目： Ecogenomics/FrankenQIIME

def get_job_commands(python_exe_fp,assign_taxonomy_fp,id_to_taxonomy_fp,\
    e_value,blast_db,job_prefix,\
    blastmat_path,fasta_fps,output_dir,working_dir,\
    command_prefix=None,command_suffix=None):
    """Generate BlastTaxonAssiger classifier commands to be submitted to cluster
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [job_prefix + '.%d_tax_assignments.log', 
                     job_prefix + '.%d_tax_assignments.txt']

    command_prefix = command_prefix or\
     '/bin/bash; cd %s; export BLASTMAT=%s;' \
       % (working_dir,blastmat_path)
    command_suffix = command_suffix or\
     '; exit'
    
    commands = []
    result_filepaths = []
    
    for i,fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)
        result_filepaths += current_result_filepaths
        
        command = '%s %s %s -o %s -m blast -e %s -b %s -i %s -t %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          assign_taxonomy_fp,\
          working_dir,
          e_value,
          blast_db,
          fasta_fp,
          id_to_taxonomy_fp,
          rename_command,
          command_suffix)
          
        commands.append(command)
        
    return commands, result_filepaths

示例#12

0

显示文件

文件： pick_otus_blast.py 项目： Ecogenomics/FrankenQIIME

def get_job_commands(python_exe_fp,pick_otus_fp,fasta_fps,
    output_dir,blast_db,job_prefix,working_dir,max_e_value,
    similarity,min_aligned_percent,
    command_prefix='/bin/bash; ',command_suffix='; exit'):
    """Generate pick_otus commands which should be submitted to cluster
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [job_prefix + '.%d_otus.log', 
                     job_prefix + '.%d_otus.txt']
    
    # Create lists to store the results
    commands = []
    result_filepaths = []
    
    # Iterate over the input files
    for i,fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)
        result_filepaths += current_result_filepaths
            
        command = \
         '%s %s %s -i %s -b %s -m blast -o %s -e %s -s %s --min_aligned_percent %s %s %s' %\
         (command_prefix,
          python_exe_fp,
          pick_otus_fp,
          fasta_fp,
          blast_db,
          working_dir,
          max_e_value,
          similarity,
          min_aligned_percent,
          rename_command,
          command_suffix)
          
        commands.append(command)

    return commands, result_filepaths

示例#13

0

显示文件

def get_commands(python_exe_fp,assign_taxonomy_fp,confidence,job_prefix,\
    fasta_fps,rdp_jar_fp,output_dir,working_dir,\
    command_prefix=None,command_suffix=None):
    """Generate RDP classifier commands which should be submitted to cluster
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [
        job_prefix + '.%d_tax_assignments.log',
        job_prefix + '.%d_tax_assignments.txt'
    ]

    command_prefix = command_prefix or\
     '/bin/bash; export RDP_JAR_PATH=%s; ' % rdp_jar_fp
    command_suffix = command_suffix or\
     '; exit'

    commands = []
    result_filepaths = []

    for i, fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)
        result_filepaths += current_result_filepaths
        command = '%s %s %s -c %1.2f -m rdp -o %s -i %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          assign_taxonomy_fp,\
          confidence,
          working_dir,
          fasta_fp,
          rename_command,
          command_suffix)

        commands.append(command)

    return commands, result_filepaths

示例#14

0

显示文件

文件： pick_otus_uclust_ref.py 项目： Ecogenomics/FrankenQIIME

def get_job_commands(python_exe_fp,pick_otus_fp,fasta_fps,
     output_dir,refseqs_fp,job_prefix,working_dir,similarity,
     enable_rev_strand_match,optimal_uclust,exact_uclust,
     max_accepts,max_rejects,stepwords,word_length,
     stable_sort,save_uc_files,command_prefix='/bin/bash; ',
     command_suffix='; exit'):
    """Generate pick_otus commands which should be run
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [job_prefix + '.%d_otus.log', 
                     job_prefix + '.%d_otus.txt',
                     job_prefix + '.%s_failures.txt']
    
    # Create lists to store the results
    commands = []
    result_filepaths = []
    
    if enable_rev_strand_match:
        enable_rev_strand_match_str = '-z'
    else:
        enable_rev_strand_match_str = ''
    if optimal_uclust:
        optimal_uclust_str = '-A'
    else:
        optimal_uclust_str = ''
    if exact_uclust:
        exact_uclust_str = '-E'
    else:
        exact_uclust_str = ''
    if stable_sort:
        stable_sort_str = ''
    else:
        stable_sort_str = '--suppress_uclust_stable_sort'
    if save_uc_files:
        save_uc_files = ''
        out_filenames += [job_prefix + '%d_clusters.uc']
    else:
        save_uc_files = '-d'
        
    
    # Iterate over the input files
    for i,fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)
        result_filepaths += current_result_filepaths
            
        command = \
         '%s %s %s -i %s -r %s -m uclust_ref --suppress_new_clusters -o %s -s %s %s %s %s --max_accepts %s --max_rejects %s --stepwords %d --w %d %s %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          pick_otus_fp,\
          fasta_fp,\
          refseqs_fp,\
          working_dir,\
          similarity,\
          enable_rev_strand_match_str,
          optimal_uclust_str,
          exact_uclust_str,
          max_accepts,
          max_rejects,
          stepwords,
          word_length,
          stable_sort_str,
          save_uc_files,
          rename_command,
          command_suffix)

          
        commands.append(command)

    return commands, result_filepaths

示例#15

0

显示文件

def get_job_commands(python_exe_fp,
                     pick_otus_fp,
                     fasta_fps,
                     output_dir,
                     refseqs_fp,
                     job_prefix,
                     working_dir,
                     similarity,
                     enable_rev_strand_match,
                     optimal_uclust,
                     exact_uclust,
                     max_accepts,
                     max_rejects,
                     stable_sort,
                     save_uc_files,
                     command_prefix='/bin/bash; ',
                     command_suffix='; exit'):
    """Generate pick_otus commands which should be run
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [
        job_prefix + '.%d_otus.log', job_prefix + '.%d_otus.txt',
        job_prefix + '.%s_failures.txt'
    ]

    # Create lists to store the results
    commands = []
    result_filepaths = []

    if enable_rev_strand_match:
        enable_rev_strand_match_str = '-z'
    else:
        enable_rev_strand_match_str = ''
    if optimal_uclust:
        optimal_uclust_str = '-A'
    else:
        optimal_uclust_str = ''
    if exact_uclust:
        exact_uclust_str = '-E'
    else:
        exact_uclust_str = ''
    if stable_sort:
        stable_sort_str = '--uclust_stable_sort'
    else:
        stable_sort_str = ''
    if save_uc_files:
        save_uc_files = ''
        out_filenames += [job_prefix + '%d_clusters.uc']
    else:
        save_uc_files = '-d'

    # Iterate over the input files
    for i, fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
         [fn % i for fn in out_filenames],working_dir,output_dir)
        result_filepaths += current_result_filepaths

        command = \
         '%s %s %s -i %s -r %s -m uclust_ref --suppress_new_clusters -o %s -s %s %s %s %s --max_accepts %s --max_rejects %s %s %s %s %s' %\
         (command_prefix,\
          python_exe_fp,\
          pick_otus_fp,\
          fasta_fp,\
          refseqs_fp,\
          working_dir,\
          similarity,\
          enable_rev_strand_match_str,
          optimal_uclust_str,
          exact_uclust_str,
          max_accepts,
          max_rejects,
          stable_sort_str,
          save_uc_files,
          rename_command,
          command_suffix)

        commands.append(command)

    return commands, result_filepaths

示例#16

0

显示文件

def get_job_commands(python_exe_fp,
                     identify_chimeric_seqs_fp,
                     fasta_fps,
                     output_dir,
                     ref_seqs_fp,
                     job_prefix,
                     working_dir,
                     aligned_reference_seqs_fp,
                     blast_db,
                     chimera_detection_method,
                     min_div_ratio,
                     num_fragments,
                     taxonomy_depth,
                     max_e_value,
                     id_to_taxonomy_fp,
                     command_prefix='',
                     command_suffix=''):
    #                     command_prefix='/bin/bash; ', command_suffix='; exit'):
    """Generate identify_chimeric_seqs commands which should be run
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [job_prefix + '.%d_chimeric.txt']

    # Create lists to store the results
    commands = []
    result_filepaths = []

    # Iterate over the input files
    for i, fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
        [fn % i for fn in out_filenames], working_dir, output_dir)
        result_filepaths += current_result_filepaths

        #Need to be filled
        optional_options = ""

        if chimera_detection_method == 'blast_fragments':

            if ref_seqs_fp:
                optional_options += " -r %s" % ref_seqs_fp
            if blast_db:
                optional_options += " -b %s" % blast_db

            command = \
                '%s %s %s -i %s -t %s -m blast_fragments -o %s -n %s -d %s -e %s %s %s %s' %\
                (command_prefix,
                 python_exe_fp,
                 identify_chimeric_seqs_fp,
                 fasta_fp,
                 id_to_taxonomy_fp,
                 working_dir+"/"+out_filenames[0] % i,
                 num_fragments,
                 taxonomy_depth,
                 max_e_value,
                 optional_options,
                 rename_command,
                 command_suffix)

        elif chimera_detection_method == 'ChimeraSlayer':
            if min_div_ratio:
                optional_options += " --min_div_ratio %s" % min_div_ratio
            if ref_seqs_fp:
                optional_options += " -r %s" % ref_seqs_fp
            command = \
                '%s %s %s -i %s -a %s -m ChimeraSlayer -o %s %s %s %s' %\
                (command_prefix,
                 python_exe_fp,
                 identify_chimeric_seqs_fp,
                 fasta_fp,
                 aligned_reference_seqs_fp,
                 working_dir+"/"+out_filenames[0] % i,
                 optional_options,
                 rename_command,
                 command_suffix)
        else:
            raise NotImplementedError
        commands.append(command)

    return commands, result_filepaths

示例#17

0

显示文件

文件： identify_chimeric_seqs.py 项目： Ecogenomics/FrankenQIIME

def get_job_commands(python_exe_fp, identify_chimeric_seqs_fp, fasta_fps,
                     output_dir, ref_seqs_fp, job_prefix, working_dir,
                     aligned_reference_seqs_fp, blast_db,
                     chimera_detection_method, min_div_ratio, num_fragments,
                     taxonomy_depth, max_e_value, id_to_taxonomy_fp,
                     command_prefix='', command_suffix=''):
#                     command_prefix='/bin/bash; ', command_suffix='; exit'):
    """Generate identify_chimeric_seqs commands which should be run
    """
    # Create basenames for each of the output files. These will be filled
    # in to create the full list of files created by all of the runs.
    out_filenames = [job_prefix + '.%d_chimeric.txt']
    
    # Create lists to store the results
    commands = []
    result_filepaths = []
    
    # Iterate over the input files
    for i,fasta_fp in enumerate(fasta_fps):
        # Each run ends with moving the output file from the tmp dir to
        # the output_dir. Build the command to perform the move here.
        rename_command, current_result_filepaths = get_rename_command(\
        [fn % i for fn in out_filenames], working_dir, output_dir)
        result_filepaths += current_result_filepaths

        #Need to be filled
        optional_options = ""

        if chimera_detection_method=='blast_fragments':
            
            if ref_seqs_fp:
                optional_options += " -r %s" % ref_seqs_fp
            if blast_db:
                optional_options += " -b %s" % blast_db

            command = \
                '%s %s %s -i %s -t %s -m blast_fragments -o %s -n %s -d %s -e %s %s %s %s' %\
                (command_prefix,
                 python_exe_fp,
                 identify_chimeric_seqs_fp,
                 fasta_fp,
                 id_to_taxonomy_fp,
                 working_dir+"/"+out_filenames[0] % i,
                 num_fragments,
                 taxonomy_depth,
                 max_e_value,
                 optional_options,  
                 rename_command,
                 command_suffix)
            
        elif chimera_detection_method=='ChimeraSlayer':
            if min_div_ratio:
                optional_options += " --min_div_ratio %s" % min_div_ratio
            if ref_seqs_fp:
                optional_options += " -r %s" % ref_seqs_fp
            command = \
                '%s %s %s -i %s -a %s -m ChimeraSlayer -o %s %s %s %s' %\
                (command_prefix,
                 python_exe_fp,
                 identify_chimeric_seqs_fp,
                 fasta_fp,
                 aligned_reference_seqs_fp,
                 working_dir+"/"+out_filenames[0] % i,
                 optional_options,    
                 rename_command,
                 command_suffix)
        else:
           raise NotImplementedError
        commands.append(command)

    return commands, result_filepaths