Пример #1
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.
    
    Args: 
        None
    Requires:
        None
    Returns:
        AnaDAMA2.Workflow: The workflow object for this pipeline.
        AnaDAMA2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1',
                        description='A workflow to handle HMP2 '
                        'Proteomics data.',
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file',
                          desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('checksums-file',
                          desc='MD5 checksums for files '
                          'found in the supplied input directory.')
    workflow.add_argument(
        'data_specific_metadata',
        desc='A collection of '
        'dataset specific metadata that should be integrated '
        'with any analysis output (creating a PCL file).')

    return workflow
Пример #2
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1', description='A workflow to handle '
                        'uploading metadata and data files to the Data '
                        'Coordination Center (DCC)', 
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file', desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('metadata-file', desc='Accompanying metadata file '
                           'for the provided data files.', default=None)
    workflow.add_argument('baseline-metadata-file', desc='Metadata file '
                          'containing baseline visit metadata per subject.')                           
    workflow.add_argument('config-file', desc='Configuration file '
                          'containing parameters required by the workflow.')

    return workflow
Пример #3
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        AnaDAMA2.Workflow: The workflow object for this pipeline.
        AnaDAMA2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1',
                        description='A workflow to handle HMP2 '
                        'host exome data.',
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file',
                          desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('threads',
                          desc='Number of threads to use in '
                          'workflow processing',
                          default=1)

    return workflow
Пример #4
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='1.0',
                        description='A workflow to handle visualization '
                        'of HMP2 16S data.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file',
                          desc='Accompanying metadata file '
                          'for the provided data files.',
                          default=None)
    workflow.add_argument('source',
                          desc='The source of the output files generated. '
                          '[biobakery, CMMR]',
                          default='biobakery',
                          choices=['biobakery', 'CMMR'])

    return workflow
Пример #5
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1', description='A workflow to assemble '
                        'metagenomic data and run a gene caller on the '
                        'resulting contigs.')
    workflow.add_argument('contaminant-db', desc='KneadData DNA contaminants database.')
    workflow.add_argument('file-extension', desc='Extension of input files to '
                          'assemble and gene call on.', default='.fastq.gz')
    workflow.add_argument('threads', desc='number of threads/cores for each '
                          'task to use', default=1)
    workflow.add_argument('memory', desc='The amount of memory to use for each '
                          'assembly job. Provided in GB', default='10240')


    return workflow
Пример #6
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.
    
    Args: 
        None
    Requires:
        None
    Returns:
        AnaDAMA2.Workflow: The workflow object for this pipeline.
        AnaDAMA2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1', description='A workflow to handle HMP2 '
                        'Metabolomic data.', remove_options=['input', 'output'])
    workflow.add_argument('manifest-file', desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file', desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file', desc='Accompanying metadata file '
                           'for the provided data files.', default=None)
    workflow.add_argument('aux_metadata', desc='Any additional metadata '
                          'files that can supply metadata for our ouptut '
                          'PCL files.')                           

    return workflow
Пример #7
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1', description='A workflow to handle '
                        'analysis of 16S amplicon data.', 
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file', desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file', desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file', desc='Accompanying metadata file '
                           'for the provided data files.', default=None)
    workflow.add_argument('threads', desc='number of threads/cores for each '
                          'task to use', default=1)

    return workflow
Пример #8
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='1.0',
                        description='A workflow to handle visualization '
                        'of HMP2 Metaviromics data.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file',
                          desc='Accompanying metadata file '
                          'for the provided data files.',
                          default=None)

    return workflow
Пример #9
0
THE SOFTWARE.
"""

# import the workflow class from anadama2
from anadama2 import Workflow

# import the document templates from biobakery_workflows
from biobakery_workflows import document_templates, utilities

# import the files for descriptions and paths
from biobakery_workflows import files
import os

# create a workflow instance, providing the version number and description
# remove the input folder option as it will be replaced with multiple input files
workflow = Workflow(version="0.1", remove_options=["input"],
                    description="A workflow for 16S visualization")
                    
# add the custom arguments to the workflow 
# create a custom description for the input argument listing all expected input files
input_desc="A folder containing the final products from the 16s data workflow.\n\nThe input folder must include the following:\n\n"  
                      
workflow.add_argument("input",desc=input_desc,required=True)

# add the custom arguments to the workflow
workflow.add_argument("project-name",desc="the name of the project",required=True)
workflow.add_argument("input-metadata",desc="the metadata file (samples as columns or rows)")
workflow.add_argument("input-picard",desc="the folder of picard quality score files")
workflow.add_argument("input-picard-extension",desc="the extensions for the picard quality score files", default="quality_by_cycle_metrics")
workflow.add_argument("metadata-categorical",desc="the categorical features", action="append", default=[])
workflow.add_argument("metadata-continuous",desc="the continuous features", action="append", default=[])
workflow.add_argument("metadata-exclude",desc="the features to exclude", action="append", default=[])
import os

# import the workflow class from anadama2
from anadama2 import Workflow

# import the document templates from biobakery_workflows
from biobakery_workflows import utilities

# import the files for descriptions and paths
from biobakery_workflows import files

# create a workflow instance, providing the version number and description
# remove the input folder option as it will be replaced with multiple input files
workflow = Workflow(
    version="0.1",
    remove_options=["input"],
    description=
    "A workflow for whole metagenome and metatranscriptome shotgun sequence visualization"
)

# list the required and optional files for the workflow
# these are expected to be included in the input folder
wmgx_input_files = {
    "required":
    ["kneaddata_read_counts", "taxonomic_profile", "pathabundance_relab"],
    "optional": ["humann2_read_counts", "feature_counts"]
}
wmtx_input_files = {
    "required": ["kneaddata_read_counts"],
    "optional": ["humann2_read_counts", "feature_counts"]
}
norm_input_files = {
Пример #11
0
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

from anadama2 import Workflow
import os, sys, fnmatch

from biobakery_workflows.tasks import sixteen_s, dadatwo, general
from biobakery_workflows import utilities, config, files

# create a workflow instance, providing the version number and description
workflow = Workflow(version="0.1",
                    description="A workflow for 16S sequencing data")

# add the custom arguments to the workflow
workflow_config = config.SixteenS()
workflow.add_argument("method",
                      desc="method to process 16s workflow",
                      default="vsearch",
                      choices=["usearch", "dada2", "vsearch", "its"])
workflow.add_argument("dada-db",
                      desc="reference database for dada2 workflow",
                      default="silva",
                      choices=["gg", "rdp", "silva", "unite"])
workflow.add_argument(
    "usearch-db",
    desc=
    "full paths for the reference databases (fna and taxonomy, comma delimited) for the usearch workflow",
Пример #12
0
# -*- coding: utf-8 -*-
from anadama2 import Workflow

workflow = Workflow(remove_options=["input", "output"])
workflow.do("ls /usr/bin/ | sort > [t:global_exe.txt]")
workflow.do("ls $HOME/.local/bin/ | sort > [t:local_exe.txt]")
workflow.do("join [d:global_exe.txt] [d:local_exe.txt] > [t:match_exe.txt]")
workflow.go()
Пример #13
0
#!/usr/bin/env python

from anadama2 import Workflow
import os

workflow = Workflow(version="0.0.2", description="A workflow to run PanPhlAn")

workflow.add_argument("threads",
                      default=1,
                      desc="number of threads for panphlan to use")
workflow.add_argument("dbfolder",
                      default=None,
                      desc="folder containing database")
workflow.add_argument("filesfile",
                      default=None,
                      desc="file with filepaths to run on (relative to input)")
workflow.add_argument("ref", default=None, desc="name of reference db")
workflow.add_argument(
    "refs",
    default=None,
    desc="file with list of references (relative to dbfolder)")

args = workflow.parse_args()

in_files = workflow.get_input_files(".fastq.gz")
out_files = workflow.name_output_files(name=in_files,
                                       tag="panphlan_map",
                                       extension="csv.bz2")

if args.filesfile:
    with open(args.filesfile) as f:
# $ python pull_out_reads_by_species_metaphlan2_results.py --input input_sam --output output_fastq
# This will look for the metaphlan2 sam output files named *_bowtie2.sam in the input folder and write
# files *_metaphlan2_marker_aligned_subset.fasta to the output folder (one for each input sam file).
# The fasta reads will be any of the sample reads that map to a marker associated with one of the 
# species in the "--species-list" file. This file should have one species per line and be formatted 
# with the metaphlan2 species naming convention. More specifically, the species file should list
# one per line with metaphlan2 format (ie "s__Gemella_sanguinis") and for unknown species
# include the genus in this file (ie "s__Gemella_unclassified" should be included in the file as "g__Gemella").
# The metaphlan2 pkl database is also required for this script to run and can be provided 
# with the option "--pkl-database". 

SAM_READ_NAME_INDEX = 0
SAM_REFERENCE_NAME_INDEX = 2
SAM_SEQ_INDEX = 9

workflow = Workflow()

# input folder should have sam alignment files from metaphlan2 run
workflow.add_argument("pkl-database", desc="MetaPhlAn2 pkl database", default="metaphlan2_db/mpa_v20_m200.pkl")
workflow.add_argument("species-list", desc="the list of species to pull reads for", default="species_list.txt")
workflow.add_argument("input-tag-extension", desc="the file name tag and extension", default="_bowtie2.sam")
args = workflow.parse_args()

def find_reads(task):
    # read in the species
    with open(args.species_list) as file_handle:
        species_list = [taxon.rstrip() for taxon in file_handle.readlines()]

    db = pickle.load(bz2.BZ2File(args.pkl_database, 'r'))

    marker_to_species={}
Пример #15
0
import os

from anadama2 import Workflow

# to run provide the new workflow run input and output folders
# $ python anadama2_add_files_to_database.py --input $NEW_INPUT_FOLDER --output $NEW_OUTPUT_FOLDER

workflow = Workflow()

# add the list of possible file extensions
workflow.add_argument(
    "input-extensions",
    desc="the comma-delimited list of extensions of the input files",
    default="txt,tsv,fastq,fastq.gz,log,sam")
args = workflow.parse_args()


# get all of the files in the input folder with the extensions provided
def get_files_to_add(input_folder):
    posible_extensions = set(args.input_extensions.split(","))
    input_files = []
    for folder, directories, files in os.walk(input_folder):
        if not ".anadama" in folder:
            for filename in files:
                if any(
                        map(lambda ext: filename.endswith(ext),
                            posible_extensions)):
                    input_files.append(os.path.join(folder, filename))
    return input_files

Пример #16
0
    with open(task.depends[1].name) as file_handle:
        new_sum = file_handle.readline().strip().split(" ")[0]

    if new_sum.lower() == md5sum.lower():
        file_handle = open(task.targets[0].name, "w")
        file_handle.write("Match")
        file_handle.close()
    else:
        error_msg = "ERROR: Sums do not match for file {0}\nComputed Sum: {1}\nExpected Sum: {2}".format(
            task.depends[1].name, new_sum, md5sum)
        sys.stderr.write(error_msg)
        raise Exception(error_msg)


# create a workflow and get the arguments
workflow = Workflow()
workflow.add_argument("input-metadata",
                      desc="the metadata file",
                      required=True)
workflow.add_argument("input-extension",
                      desc="the input file extension",
                      required=True)
args = workflow.parse_args()

# get all of the input files
input_files = utilities.find_files(args.input,
                                   extension=args.input_extension,
                                   exit_if_not_found=True)
sample_names = utilities.sample_names(input_files, args.input_extension)

# for each raw input file, generate an md5sum file
Пример #17
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.
    
    Args: 
        None
    Requires:
        None
    Returns:
        AnaDAMA2.Workflow: The workflow object for this pipeline.
    """
    workflow = Workflow(version='0.1',
                        description='A workflow to handle '
                        'refreshing and disseminating HMP2 metadata.',
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file',
                          desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file',
                          desc='If an existing metadata '
                          'file exists it can be supplied here. This metadata '
                          'file will be appended to instead of a whole new '
                          'metadata file being generated.')
    workflow.add_argument('studytrax-metadata-file',
                          desc='Accompanying '
                          'StudyTrax data all corresponding samples in the '
                          'HMP2 project.')
    workflow.add_argument('broad-sample-tracking-file',
                          desc='Broad Institute '
                          'sample tracking spreadsheet containing status of '
                          'sequence products generated.')
    workflow.add_argument('proteomics-metadata',
                          desc='PNNL-supplied metadata '
                          'spreadsheet.')
    workflow.add_argument('auxillary-metadata',
                          action='append',
                          default=[],
                          desc='Any auxillary metadata to be appeneded '
                          'to the final metadata table.')

    return workflow
Пример #18
0
# -*- coding: utf-8 -*-
from anadama2 import Workflow

ctx = Workflow(remove_options=["input", "output"])
ctx.do("wget -qO- checkip.dyndns.com > [t:my_ip.txt]")
ctx.do(r"sed 's|.*Address: \(.*[0-9]\)<.*|\1|' [d:my_ip.txt] > [t:ip.txt]")
ctx.do("whois $(cat [d:ip.txt]) > [t:whois.txt]")
ctx.go()
Пример #19
0
import os, fnmatch

# import the workflow class from anadama2
from anadama2 import Workflow

# import the library of biobakery_workflow tasks for shotgun sequences
from biobakery_workflows.tasks import shotgun, general

# import the utilities functions and config settings from biobakery_workflows
from biobakery_workflows import utilities, config

# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(
    version="0.1",
    description="A workflow for whole metagenome shotgun sequences")

# add the custom arguments to the workflow
workflow_config = config.ShotGun()
workflow.add_argument("input-extension",
                      desc="the input file extension",
                      default="fastq.gz",
                      choices=[
                          "fastq.gz", "fastq", "fq.gz", "fq", "fasta",
                          "fasta.gz", "fastq.bz2", "fq.bz2"
                      ])
workflow.add_argument("barcode-file", desc="the barcode file", default="")
workflow.add_argument("dual-barcode-file",
                      desc="the string to identify the dual barcode file",
                      default="")
Пример #20
0
import anadama2.tracked
from anadama2 import Workflow

workflow = Workflow(remove_options=["input","output"])

# create a container class to track
container = anadama2.tracked.Container(a = 20)

# add a task that depends on the "a" variable in the container
task1=workflow.add_task(
    "echo [depends[0]] > [targets[0]]",
    depends=container.a, 
    targets="echo.txt",
    name="task1")

# add a task that depends on the targets of task1 
task2=workflow.add_task(
    "p=$(cat [depends[0]]); echo $p > [targets[0]]",
    depends=task1.targets[0],
    targets="echo2.txt",
    name="task2")

workflow.go()

Пример #21
0
import os
from glob import glob
from anadama2 import Workflow
from anadama2.tracked import TrackedExecutable

# Setting the version of the workflow and short description
workflow = Workflow(
    version="0.0.1",  #Update the version as needed
    description="Analysis Template"  #Update the description as needed
)

# Setting additional custom arguments for workflow - run.py
workflow.add_argument(name="lines",
                      desc="Number of lines to trim [default: 10]",
                      default="10")

workflow.add_argument(
    name="metadata",
    desc="Metadata for performing analysis [default: input/metadata.tsv]",
    default="input/metadata.tsv")

# Parsing the workflow arguments
args = workflow.parse_args()

#Loading the config setting
args.config = 'etc/config.ini'

# AnADAMA2 example workflow.do
workflow.do("ls /usr/bin/ | sort > [t:output/global_exe.txt]")  #Command
workflow.do("ls $HOME/.local/bin/ | sort > [t:output/local_exe.txt]")  #Command
Пример #22
0
def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='1.0',
                        description='A workflow to handle HMP2 '
                        'WGS data.',
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file',
                          desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file',
                          desc='Accompanying metadata file '
                          'for the provided data files.',
                          default=None)
    workflow.add_argument('threads',
                          desc='number of threads/cores for each '
                          'task to use',
                          default=1)
    workflow.add_argument('threads-kneaddata',
                          desc='OPTIONAL. A specific '
                          'number of threads/cores to use just for the '
                          'kneaddata task.',
                          default=None)
    workflow.add_argument('threads-metaphlan',
                          desc='OPTIONAL. A specific '
                          'number of threads/cores to use just for the '
                          'metaphlan2 task.',
                          default=None)
    workflow.add_argument(
        'threads-humann',
        desc='OPTIONAL. A specific '
        'number of threads/cores to use just for the humann2 '
        'task.',
        default=None)
    return workflow
Пример #23
0
import sys
import os, fnmatch

# import the workflow class from anadama2
from anadama2 import Workflow

# import the library of biobakery_workflow tasks for shotgun sequences
from biobakery_workflows.tasks import shotgun, general

# import the utilities functions and config settings from biobakery_workflows
from biobakery_workflows import utilities, config

# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(version="0.1", description="A workflow to run strainphlan")

# add the custom arguments to the workflow
workflow_config = config.ShotGun()
workflow.add_argument("input-extension", desc="the input file extension", default="fastq.gz", choices=["fastq.gz","fastq","fq.gz","fq","fasta","fasta.gz"])
workflow.add_argument("threads", desc="number of threads/cores for each task to use", default=1)
workflow.add_argument("bypass-taxonomic-profiling", desc="do not run the taxonomic profiling tasks (a tsv profile for each sequence file must be included in the input folder using the same sample name)", action="store_true")
workflow.add_argument("strain-profiling-options", desc="additional options when running the strain profiling step", default="")
workflow.add_argument("max-strains", desc="the max number of strains to profile", default=20, type=int)

# get the arguments from the command line
args = workflow.parse_args()

# get all input files with the input extension provided on the command line
# return an error if no files are found
input_files = utilities.find_files(args.input, extension=args.input_extension, exit_if_not_found=True)
Пример #24
0
# import the workflow class from anadama2
from anadama2 import Workflow

# import the document templates and utilities from biobakery_workflows
from biobakery_workflows import utilities

# import the task to convert from biom to tsv
from biobakery_workflows.tasks.sixteen_s import convert_from_biom_to_tsv_list

# import the files for descriptions and paths
from biobakery_workflows import files

# create a workflow instance, providing the version number and description
workflow = Workflow(
    version="0.1",
    remove_options=["input"],
    description="A workflow for stats on wmgx and 16s data sets")

# add the custom arguments to the workflow
workflow.add_argument(
    "input",
    desc="the folder containing taxonomy and functional data files",
    required=True)

# add the custom arguments to the workflow
workflow.add_argument("project-name",
                      desc="the name of the project",
                      required=True)
workflow.add_argument("input-metadata",
                      desc="the metadata file (samples as columns or rows)",
                      required=True)
Пример #25
0
from anadama2 import Workflow

workflow = Workflow(remove_options=["input", "output"])

downloads = [
    "ftp://public-ftp.hmpdacc.org/HM16STR/by_sample/SRS011275.fsa.gz",
    "ftp://public-ftp.hmpdacc.org/HM16STR/by_sample/SRS011273.fsa.gz",
    "ftp://public-ftp.hmpdacc.org/HM16STR/by_sample/SRS011180.fsa.gz"
]

for link in downloads:
    workflow.add_task("wget -O [targets[0]] [args[0]]",
                      targets=link.split("/")[-1],
                      args=link)

workflow.go()
import os, fnmatch

# import the workflow class from anadama2
from anadama2 import Workflow
from anadama2.tracked import TrackedExecutable

# import the library of biobakery_workflow tasks for shotgun sequences
from biobakery_workflows.tasks import shotgun, general

# import the utilities functions and config settings from biobakery_workflows
from biobakery_workflows import utilities, config

# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(version="0.1",
                    description="A workflow for isolate assembly")

# add the custom arguments to the workflow
workflow_config = config.ShotGun()
workflow.add_argument("species-name", desc="the species name", required=True)
workflow.add_argument("input-extension",
                      desc="the input file extension",
                      default="fastq.gz",
                      choices=["fastq.gz", "fastq", "fq.gz", "fq"])
workflow.add_argument("threads",
                      desc="number of threads/cores for each task to use",
                      default=1)
workflow.add_argument("pair-identifier",
                      desc="the string to identify the first file in a pair",
                      default="_R1_001")
workflow.add_argument(
Пример #27
0
# -*- coding: utf-8 -*-
from anadama2 import Workflow

# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(version="0.1", description="A workflow to run KneadData")

# add the custom arguments to the workflow
workflow.add_argument("kneaddata-db", desc="the kneaddata database", default="/work/code/kneaddata/db/")
workflow.add_argument("input-extension", desc="the input file extension", default="fastq")
workflow.add_argument("threads", desc="number of threads for knead_data to use", default=1)

# get the arguments from the command line
args = workflow.parse_args()

# get all input files with the input extension provided on the command line
in_files = workflow.get_input_files(extension=args.input_extension)

# get a list of output files, one for each input file, with the kneaddata tag
out_files = workflow.name_output_files(name=in_files, tag="kneaddata")

# create a task for each set of input and output files to run kneaddata
workflow.add_task_group(
    "kneaddata --input [depends[0]] --output [output_folder] --reference-db [kneaddata_db] --threads [threads]",
    depends=in_files,
    targets=out_files,
    output_folder=args.output,
    kneaddata_db=args.kneaddata_db,
    threads=args.threads)
from anadama2 import Workflow

from biobakery_workflows.tasks import dadatwo

workflow = Workflow()
workflow.add_argument("fwd-primer", desc="forward primer, required for its workflow",required=True)
workflow.add_argument("rev-primer", desc="reverse primer, required for its workflow",required=True)
workflow.add_argument("pair-identifier", desc="the string to identify the first file in a pair", default="_R1_001")
workflow.add_argument("threads", desc="number of threads/cores for each task to use", default=1)
args = workflow.parse_args()

dadatwo.remove_primers(workflow,args.fwd_primer,args.rev_primer,args.input,args.output,args.pair_identifier,args.threads)

workflow.go()
Пример #29
0
from anadama2 import Workflow

workflow = Workflow(remove_options=["input", "output"])

# add a task to download the file
workflow.add_task(
    "wget ftp://public-ftp.hmpdacc.org/HMMCP/finalData/hmp1.v35.hq.otu.counts.bz2 -O [targets[0]]",
    targets="hmp1.v35.hq.otu.counts.bz2")

# add a task to decompress the file
workflow.add_task("bzip2 -d < [depends[0]] > [targets[0]]",
                  depends="hmp1.v35.hq.otu.counts.bz2",
                  targets="hmp1.v35.hq.otu.counts")


def remove_end_tabs_function(task):
    with open(task.targets[0].name, 'w') as file_handle_out:
        for line in open(task.depends[0].name):
            file_handle_out.write(line.rstrip() + "\n")


# add a task with a function to remove the end tabs from the file
workflow.add_task(remove_end_tabs_function,
                  depends="hmp1.v35.hq.otu.counts",
                  targets="hmp1.v35.hq.otu.counts.notabs",
                  name="remove_end_tabs")

workflow.go()
Пример #30
0
import os
import sys
import datetime

# constants
ARCHIVE_FOLDER = "/opt/archive_folder/"
COUNT_FILE = os.path.join(ARCHIVE_FOLDER, "data_deposition_counts.csv")
PUBLIC_COUNT_FILE = os.path.join(ARCHIVE_FOLDER,
                                 "data_deposition_counts_public.csv")

# create a workflow to check the md5sums for each file
from anadama2 import Workflow
from biobakery_workflows import utilities

# create a workflow and get the arguments
workflow = Workflow(remove_options=["input"])
workflow.add_argument("input-upload",
                      desc="the folder of raw uploaded data",
                      required=True)
workflow.add_argument("input-processed",
                      desc="the folder of processed data",
                      required=True)
workflow.add_argument("key",
                      desc="the key file to use for the transfer",
                      required=True)
workflow.add_argument("user",
                      desc="the user id for the transfer",
                      required=True)
workflow.add_argument("remote",
                      desc="the remote host name for the transfer",
                      required=True)