Exemplo n.º 1
0
script_info['brief_description'] = "Split taxonomy by level and match the otu ID to the corresponding nucleotide sequence at each specific taxonomy level"
script_info['script_description'] = "This script splits taxonomy into different taxonomy levels and matches the otu ID to its corresponding nucleotide sequence at a specified taxonomic level. The output file contains the sequence ID and its nucleotide sequence at a specific taxonomy level under its own taxonomy directory "
script_info['script_usage'] = [\
("Split taxonomy and match the distinct otu ID to its corresponding nucleotide sequence at a specified taxonomy level in one file.",
 "Split taxonomy in a taxonomy file and match the distinct otu ID to its corresponding nucleotide sequence based on the fasta file, which corresponds to the taxonomy file at a specified taxonomy level, which is indicated by numbers 1, 2, 3..., for instance: to split on phylum level pass 6 in the option.  Write the results to each taxonomy file based on each otu level",
  "%prog -t ./gg_12_10_otus/taxonomy/61_otu_taxonomy.txt -f ./gg_12_10_otus/rep_set/61_otus.fasta -l 6_-o IDseq"),
("Split taxonomy and match distinct otu ID to its corresponding nucleotide sequence at a specified taxonomy level in two files.",
 "Split taxonomy in two taxonomy files and match distinct otu ID to its corresponding nucleotide sequence based on two fasta files, which correspond to the taxonomy files  at a specified taxonomy level, which is indicated by numbers 1, 2, 3..., for instance: to split on phylum level pass 6 in the option. Write the results to each taxonomy file based on each otu level.",
"%prog -t ./gg_12_10_otus/taxonomy/61_otu_taxonomy.txt,./gg_12_10_otus/taxonomy/64_otu_taxonomy.txt -f ./gg_12_10_otus/rep_set/61_otus.fasta,./gg_12_10_otus/rep_set/64_otus.fasta -o IDseq"),
("Split taxonomy and match distinct otu ID to its corresponding nucleotide sequence at a specified taxonomy level in more than two files.",
"Split taxonomy in more than two taxonomy files and match distinct otu ID to its corresponding nucleotide sequence based on more than two fasta file, which correspond to their taxonomy file at a specified taxonomy level, which is indicated by numbers 1, 2, 3..., for instance: to split on phylum level pass 6 in the option. Write the results to each taxonomy file based on each otu level",
"%prog -t \"./gg_12_10_otus/taxonomy/*.txt\" -f \"./gg_12_10_otus/rep_set/*.fasta\" -o IDseq")]
script_info['output_description']= "The sequence ID and its nucleotide sequence at specified taxonomy level is written to its specific taxonomy file"
script_info['required_options'] = [
 make_option('-t','--input_taxonomy_fps',type="existing_filepaths",
             help='Input taxonomy files containing otuID and'+\
                   'its corresponding taxonomies'),
 make_option('-f','--input_fasta_fps',type="existing_filepaths",
             help='Input fasta files, which correspond its taxonomy files'),
 make_option('-l','--taxonomy_level',type='int',
             help='Split input files at the specific taxonomy level.'
                  ' Valid taxonomy levels are:'+\
                   ','.join(taxonomy_levels)+'[default: %default]',
                   default=7),
 make_option('-o','--output_dir',type="new_dirpath",
             help='the output directory containing classified taxonomy directories')
]
script_info['version'] = __version__


from qcli.util import qcli_system_call
from search_seq_against_HMM import search_HMM,create_temp_test_seq_file
from search_seq_against_HMM import taxonomy_assignment_to_query_seq
from qcli import (parse_command_line_parameters, 
                  make_option)

script_info = {}
script_info['brief_description'] = "Assign specific taxonomy for each query DNA nucleotide sequence"
script_info['script_description'] = "Search query DNA nucleotide sequences against HMM profiles and return the specific taxonomy assignment of corresponding query sequences. HMM profiles are built from a series of DNA alignment files, which are in Stockholm format and from greengenes 13.8 database. "
script_info['script_usage'] = [\
("It takes query DNA sequences and HMM database as input and output the taxonomy assignment for each query sequence",
"It accepts any FASTA file as target HMM profiles input. It also accepts EMBL/UniPort text format, and Genbank format. This script takes query DNA nucleotide sequences, whcih can be single or multiple sequences and HMM database as input. In this case, HMM database has to be created from a database, which is a tree structure.",
"%prog -i test_seqs.fasta -b HMMprofiles")]
script_info['output_description']=""
script_info['required_options'] = [
 make_option('-i','--input_query_fp',type="existing_filepath",help='the input query sequences filepath'),
 make_option('-b','--input_HMM_fp',type="existing_dirpath",help='the input HMM profiles filepath')
]
script_info['optional_options']=[]
script_info['version'] = __version__



def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    Query_collection=[] 
    rank_collection=[]
    Query_dict=defaultdict(list)
    temp_dir_name=tempfile.mkdtemp(prefix='root_')
Exemplo n.º 3
0
__license__ = "GPL"
__version__ = "0.0.0"
__maintainer__ = "Greg Caporaso"
__email__ = "*****@*****.**"
__status__ = "Development"


from qcli import (parse_command_line_parameters, 
                  make_option)

script_info = {}
script_info['brief_description'] = "An example script."
script_info['script_description'] = "This script is just an example, nothing exciting here."
script_info['script_usage'] = []
script_info['script_usage'].append(("Example usage","Run the script in help mode","%prog -h"))
script_info['output_description']= "No output is created... it's just an example."
script_info['required_options'] = [
 make_option('-i','--input_fp',type="existing_filepath",help='the input filepath'),
]
script_info['optional_options'] = [
 make_option('-o','--output_dir',type="new_dirpath",help='the output directory [default: %default]'),
]
script_info['version'] = __version__

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)


if __name__ == "__main__":
    main()
Exemplo n.º 4
0


script_info = {}

script_info['brief_description'] = "Look for .cm files and cmpress them"

script_info['script_description'] = "Before building the profile HMM on each file, each .cm file is needed to perform the cmpress command "

script_info['script_usage'] = [\
("Takes the directory containing .cm files as input",
  "%prog -i ~/Desktop/fasta_by_taxonomy"),
]

script_info['required_options'] = [
 make_option('-i','--input_dir',type="existing_dirpath",
             help='Input the fasta_by_taxonomy directory')
]
script_info['version'] = __version__



def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    search_cmfile_to_cmpress(opts.input_dir)

if __name__ == "__main__":
    main()
Exemplo n.º 5
0
"""
script_info['script_usage'] = []
script_info['script_usage'].append((
    """General Example: Specify forward read and reverse read \
fasta files, use the metadata mapping file map.txt,\
and output the data to output_dir""",
    """output_dir""",
    """%prog -i fwd_read.fq,rev_read.fq -m map.txt -o output --b 7"""
))
script_info['output_description'] = """The %prog generates:\
 A fasta file called seqs.fna which contains\
 error corrected consensus sequence for the template DNA\
"""
script_info['required_options'] = [
    make_option('-i', '--sequence_read_fps', type='existing_filepaths',
                help='the forward and reverse sequence read fastq files '
                '(comma-separated)'),
    make_option('-o', '--output_dir', type='new_dirpath',
                help='directory to store output files'),
    make_option('-m', '--mapping_fp', type='existing_filepath',
                help='metadata mapping file')
]
script_info['optional_options'] = [
    make_option('-b', '--barcode_type', type='string',
                help='the type of barcode used. This can be an integer, e.g. '
                '6 for length 6 barcodes, or golay_12 for golay error-'
                'correcting barcodes. Error correction will only be '
                'applied for golay_12 barcodes [default: %default]',
                default='golay_12'),
    make_option('--max_barcode_errors', type='float',
                help='the maximum allowable number of errors in the barcode '
error-corrected consensus sequence for the initial template molecule.
"""
script_info['script_usage'] = []
script_info['script_usage'].append(
    ("""General Example: Specify forward read and reverse read \
fasta files, use the metadata mapping file map.txt,\
and output the data to output_dir""", """output_dir""",
     """%prog -i fwd_read.fq,rev_read.fq -m map.txt -o output --b 7"""))
script_info['output_description'] = """The %prog generates:\
 A fasta file called seqs.fna which contains\
 error corrected consensus sequence for the template DNA\
"""
script_info['required_options'] = [
    make_option('-i',
                '--sequence_read_fps',
                type='existing_filepaths',
                help='the forward and reverse sequence read fastq files '
                '(comma-separated)'),
    make_option('-o',
                '--output_dir',
                type='new_dirpath',
                help='directory to store output files'),
    make_option('-m',
                '--mapping_fp',
                type='existing_filepath',
                help='metadata mapping file')
]
script_info['optional_options'] = [
    make_option('-b',
                '--barcode_type',
                type='string',
Exemplo n.º 7
0
        "Split taxonomy in two taxonomy files and match destinctive sequence ID to its corresponding nucleotide sequence based on two fasta files, which correspond to their taxonomy files  at specified taxonomy level, like phylum taxonomy level. Write the results to each taxonomy file based on their characteristics.",
        "%prog -t ./gg_12_10_otus/taxonomy/61_otu_taxonomy.txt,./gg_12_10_otus/taxonomy/64_otu_taxonomy.txt -f ./gg_12_10_otus/rep_set/61_otus.fasta,./gg_12_10_otus/rep_set/64_otus.fasta -o IDseq",
    ),
    (
        "Split taxonomy and match destinctive sequence ID to its corresponding nucleotide sequence at specified taxonomy level in more than two files.",
        "Split taxonomy in more than two taxonomy files and match destinctive sequence ID to its corresponding nucleotide sequence based on more than two fasta file, which correspond to their taxonomy file at specified taxonomy level, like phylum taxonomy level. Write the results to each taxonomy file based on their characteristics",
        '%prog -t "./gg_12_10_otus/taxonomy/*.txt" -f "./gg_12_10_otus/rep_set/*.fasta" -o IDseq',
    ),
]
script_info[
    "output_description"
] = "The sequence ID and its nucleotide sequence at specified taxonomy level is written to its specific taxonomy file"
script_info["required_options"] = [
    make_option(
        "-t",
        "--input_taxonomy_fps",
        type="existing_filepaths",
        help="Input taxonomy files containing seqID and" + "its corresponding taxonomies",
    ),
    make_option(
        "-f",
        "--input_fasta_fps",
        type="existing_filepaths",
        help="Input fasta files, which correspond its taxonomy files",
    ),
    make_option(
        "-l",
        "--taxonomy_level",
        type="choice",
        help="Split input files at the specific taxonomy level."
        " Valid taxonomy levels are:" + ",".join(taxonomy_levels) + "[default: %default]",
        choices=taxonomy_levels,
Exemplo n.º 8
0
                  make_option)

script_info = {}
script_info['brief_description'] = "Assign specific taxonomy for each query nucleotide sequence"

script_info['script_description'] = "Search the query 16S rRNA nucleotide sequences against the profile HMMs database and return the specific taxonomy classificaiton of the corresponding query sequences. The profile HMMs database are built from a series of 16S rRNA alignment files, which are in Stockholm format and from gg_13_5_otus database. "

script_info['script_usage'] = [\
("It takes the query nucleotide sequences and the HMM database as input and output the taxonomic classificaiton for each query sequence",
"It accepts any FASTA file as target HMM profiles input. It also accepts EMBL/UniPort text format, and Genbank format. This script takes query DNA nucleotide sequences, whcih can be single or multiple sequences and HMM database as input. In this case, HMM database has to be created from a database, which is a tree structure.",
"%prog -i test_seqs.fasta -b profileHMMs")]

script_info['output_description']=""

script_info['required_options'] = [
 make_option('-i','--input_query_fp',type="existing_filepath",help='the input query sequences filepath'),
 make_option('-b','--input_HMM_fp',type="existing_dirpath",help='the input profile HMMs filepath'),
 make_option('-o','--output_fp',type="new_filepath",
             help='the output file containing classified taxonomy names')
]

script_info['optional_options']=[]
script_info['version'] = __version__



def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    Query_collection=[]