def test_get_reference_sequences(self):
        fp = get_reference_sequences()
        self.assertIn(
            os.path.join('gg_13_8_otus', 'rep_set', '97_otus.fasta'), fp)
        self.assertTrue(os.path.isfile(fp))
        self.assertTrue(os.path.isabs(fp))

        with open(fp, 'rb') as f:
            md5 = safe_md5(f).hexdigest()
        self.assertEqual(md5, '50b2269712b3738afb41892bed936c29')
    def test_get_reference_sequences(self):
        fp = get_reference_sequences()
        self.assertIn(os.path.join('gg_13_8_otus', 'rep_set', '97_otus.fasta'),
                      fp)
        self.assertTrue(os.path.isfile(fp))
        self.assertTrue(os.path.isabs(fp))

        with open(fp, 'rb') as f:
            md5 = safe_md5(f).hexdigest()
        self.assertEqual(md5, '50b2269712b3738afb41892bed936c29')
def get_reference_set():
    """Get the reference set to use for OTU picking

    Returns
    -------
    str
        The file path to the reference sequences.
    str
        The file path to the reference taxonomy.
    """
    if ag.is_test_env():
        repo = get_repository_dir()
        ref_seqs = os.path.join(repo, 'tests/data/otus.fna')
        ref_tax = os.path.join(repo, 'tests/data/otus.txt')
        return ref_seqs, ref_tax
    else:
        return qdr.get_reference_sequences(), qdr.get_reference_taxonomy()
Exemplo n.º 4
0
def get_reference_set():
    """Get the reference set to use for OTU picking

    Returns
    -------
    str
        The file path to the reference sequences.
    str
        The file path to the reference taxonomy.
    """
    if ag.is_test_env():
        repo = get_repository_dir()
        ref_seqs = os.path.join(repo, 'tests/data/otus.fna')
        ref_tax = os.path.join(repo, 'tests/data/otus.txt')
        return ref_seqs, ref_tax
    else:
        return qdr.get_reference_sequences(), qdr.get_reference_taxonomy()
Exemplo n.º 5
0
def defineTaxSeqsFile(uploadedSeqsFile):
    #    uploadSeqsFile(uploadedSeqsFile)
    return compareWithDB(uploadedSeqsFile, qdr.get_reference_sequences(),
                         qdr.get_reference_taxonomy())
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fp = opts.input_fp
    reference_fp = opts.reference_fp
    taxonomy_fp = opts.taxonomy_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = opts.print_only
    assign_taxonomy = opts.assign_taxonomy

    if opts.suppress_taxonomy_assignment:
        assign_taxonomy = False
        taxonomy_fp = None

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    # if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError("Can't open parameters file (%s). Does it exist? Do you have read access?"
                          % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
        try:
            makedirs(output_dir)
        except OSError:
            if opts.force:
                pass
            else:
                option_parser.error("Output directory already exists. Please choose"
                                    " a different directory, or force overwrite with -f.")

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_pick_closed_reference_otus(
        input_fp,
        reference_fp,
        output_dir,
        taxonomy_fp,
        assign_taxonomy=assign_taxonomy,
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        parallel=parallel,
        status_update_callback=status_update_callback)

    if get_reference_sequences() == reference_fp:
        reference_tree_fp = get_reference_tree()
        fn = basename(reference_tree_fp)
        copyfile(reference_tree_fp, join(output_dir, fn))
from os.path import basename, join

from qiime_default_reference import (get_reference_sequences,
                                      get_reference_tree)

from qiime.util import (load_qiime_config, parse_command_line_parameters,
    get_options_lookup, make_option)
from qiime.parse import parse_qiime_parameters
from qiime.workflow.upstream import run_pick_closed_reference_otus
from qiime.workflow.util import (print_commands, call_commands_serially,
    print_to_stdout, no_status_updates, validate_and_set_jobs_to_start)

qiime_config = load_qiime_config()
options_lookup = get_options_lookup()

if get_reference_sequences() == qiime_config['pick_otus_reference_seqs_fp']:
    reference_fp_help = (
             "The reference sequences [default: %default]. " +
             "NOTE: If you do not pass -r to this script, you will be using "
             "QIIME's default reference sequences. In this case, QIIME will "
             "copy the corresponding reference tree to the output directory. "
             "This is the tree that should be used to perform phylogenetic "
             "diversity analyses (e.g., with core_diversity_analyses.py).")
else:
    reference_fp_help = "The reference sequences [default: %default]."

script_info = {}
script_info[
    'brief_description'] = "Closed-reference OTU picking/Shotgun UniFrac workflow."
script_info['script_description'] = """
This script picks OTUs using a closed reference and constructs an OTU table.
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fp = opts.input_fp
    reference_fp = opts.reference_fp
    taxonomy_fp = opts.taxonomy_fp
    output_dir = opts.output_dir
    verbose = opts.verbose
    print_only = opts.print_only
    assign_taxonomy = opts.assign_taxonomy

    if opts.suppress_taxonomy_assignment:
        assign_taxonomy = False
        taxonomy_fp = None

    parallel = opts.parallel
    # No longer checking that jobs_to_start > 2, but
    # commenting as we may change our minds about this.
    # if parallel: raise_error_on_parallel_unavailable()

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError(
                "Can't open parameters file (%s). Does it exist? Do you have read access?"
                % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([])
        # empty list returns empty defaultdict for now

    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params, jobs_to_start,
                                   default_jobs_to_start, parallel,
                                   option_parser)

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
        try:
            makedirs(output_dir)
        except OSError:
            if opts.force:
                pass
            else:
                option_parser.error(
                    "Output directory already exists. Please choose"
                    " a different directory, or force overwrite with -f.")

    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates

    run_pick_closed_reference_otus(
        input_fp,
        reference_fp,
        output_dir,
        taxonomy_fp,
        assign_taxonomy=assign_taxonomy,
        command_handler=command_handler,
        params=params,
        qiime_config=qiime_config,
        parallel=parallel,
        status_update_callback=status_update_callback)

    if get_reference_sequences() == reference_fp:
        reference_tree_fp = get_reference_tree()
        fn = basename(reference_tree_fp)
        copyfile(reference_tree_fp, join(output_dir, fn))
from qiime_default_reference import (get_reference_sequences,
                                     get_reference_tree)

from qiime.util import (load_qiime_config, parse_command_line_parameters,
                        get_options_lookup, make_option)
from qiime.parse import parse_qiime_parameters
from qiime.workflow.upstream import run_pick_closed_reference_otus
from qiime.workflow.util import (print_commands, call_commands_serially,
                                 print_to_stdout, no_status_updates,
                                 validate_and_set_jobs_to_start)

qiime_config = load_qiime_config()
options_lookup = get_options_lookup()

if get_reference_sequences() == qiime_config['pick_otus_reference_seqs_fp']:
    reference_fp_help = (
        "The reference sequences [default: %default]. " +
        "NOTE: If you do not pass -r to this script, you will be using "
        "QIIME's default reference sequences. In this case, QIIME will "
        "copy the corresponding reference tree to the output directory. "
        "This is the tree that should be used to perform phylogenetic "
        "diversity analyses (e.g., with core_diversity_analyses.py).")
else:
    reference_fp_help = "The reference sequences [default: %default]."

script_info = {}
script_info[
    'brief_description'] = "Closed-reference OTU picking/Shotgun UniFrac workflow."
script_info['script_description'] = """
This script picks OTUs using a closed reference and constructs an OTU table.
Exemplo n.º 10
0
from qiime_default_reference import get_template_alignment, get_reference_sequences

from skbio import SequenceCollection

gapped_sequences = [
    (s.id, str(s)) for s in SequenceCollection.read(get_template_alignment())
][:500]

sequences = [(s.id, str(s))
             for s in SequenceCollection.read(get_reference_sequences())][:500]

motif_1 = "GGTGCAAGCCGGTGGAAACA"


def pairwise(l):
    res = []
    i = iter(l)
    for a, b in zip(i, i):
        s = min(len(a), len(b))
        res.append((a[:s], b[:s]))
    return res
Exemplo n.º 11
0
from qiime_default_reference import get_template_alignment, get_reference_sequences

from skbio import SequenceCollection

gapped_sequences = [(s.id, str(s)) for s in SequenceCollection.read(get_template_alignment())][:500]

sequences = [(s.id, str(s)) for s in SequenceCollection.read(get_reference_sequences())][:500]

motif_1 = "GGTGCAAGCCGGTGGAAACA"