def test_job_should_fail_if_too_little_memory_required(self):

        outfile = os.path.join(self.work_dir, "out")

        if P.get_parameters()['os'] == 'Linux':
            self.assertRaises(
                OSError,
                P.run,
                "python -c 'import numpy; "
                "a = numpy.array(numpy.arange(0, {memory}), numpy.int8); "
                "out = open(\"{outfile}\", \"w\"); "
                "out.write(str(len(a)) + \"\\n\"); "
                "out.close()'".format(memory=self.test_memory_size,
                                      outfile=outfile),
                to_cluster=self.to_cluster,
                job_memory="{}G".format(0.5 * self.test_memory_size / 10**9))
        else:
            pass
示例#2
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    options, args = P.parse_commandline(argv, config_file="template.yml")

    global PARAMS
    if options.config_file:
        PARAMS = P.get_parameters(options.config_file,
                                  defaults={
                                      "min_value": 0.0,
                                      "num_samples": 1000,
                                      "mu": 0.0,
                                      "sigma": 1.0
                                  })
    else:
        sys.exit(P.main(options, args))

    pipeline = ruffus.Pipeline("template_pipeline")

    task_create_files = pipeline.originate(
        task_func=create_files,
        output=["sample_{:02}.txt".format(x) for x in range(10)])

    task_compute_mean = pipeline.transform(task_func=compute_mean,
                                           input=task_create_files,
                                           filter=ruffus.suffix(".txt"),
                                           output=".mean")

    task_combine_means = pipeline.merge(task_func=combine_means,
                                        input=task_compute_mean,
                                        output="means.txt")

    # primary targets
    pipeline.merge(task_func=P.EmptyRunner("all"),
                   input=task_combine_means,
                   output="all")

    E.debug("starting workflow")
    return P.run_workflow(options, args)
    def test_job_should_fail_if_too_little_memory_required_in_second_statement(
            self):

        outfile = os.path.join(self.work_dir, "out")
        infile = "arv=by_id/glon1-4zz18-3cbje7tmr0nitut/study_list.txt"

        if P.get_parameters()['os'] == 'Linux':
            self.assertRaises(
                OSError,
                P.run,
                "hostname > {outfile}; "
                "python -c 'import numpy; "
                "a = numpy.array(numpy.arange(0, {memory}), numpy.int8); "
                "out = open(\"{outfile}\", \"w\"); "
                "out.write(str(len(a)) + \"\\n\"); "
                "out.close()'".format(memory=self.test_memory_size,
                                      infile=infile,
                                      outfile=outfile),
                to_cluster=self.to_cluster,
                job_memory="{}G".format(0.5 * self.test_memory_size / 10**9))
        else:
            pass
示例#4
0
Communicates with the obolibrary (obo foundry) API for hierarchical
ontology annotations.  Can be used to download and parse any OWL
formatted ontology available on this site.

"""

from ruffus import *
from CGATCore import Pipeline as P
import os
import sys
import CGATPipelines.PipelineGeneInfo as PipelineGeneInfo
import CGATCore.IOTools as IOTools
import pandas as pd

PARAMS = P.get_parameters([
    "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml",
    "pipeline.yml"
])

# pick a pathway to use to check pathway annotation has run
example_pw = PARAMS['my_gene_info_pathway'].split(",")[0]
if example_pw == "all":
    example_pw = 'kegg'
example_homolo = str(PARAMS['my_gene_info_homologene']).split(",")
if len(example_homolo) == 0 or example_homolo[0] == 'all':
    example_homolo = 10090
else:
    example_homolo = example_homolo[0]

# get the list of annotations to be downloaded from my gene info
mgiannotations = PARAMS['my_gene_info_annotations']
示例#5
0
 def setUp(self):
     BaseTest.setUp(self)
     P.get_parameters()
示例#6
0
import CGATCore.Experiment as E
import CGATCore.IOTools as IOTools

import CGATPipelines.PipelineMotifs as PipelineMotifs
import CGATPipelines.PipelineTracks as PipelineTracks
from CGATPipelines.Report import run_report

###################################################
###################################################
###################################################
# Pipeline configuration
###################################################
from CGATCore import Pipeline as P
P.get_parameters([
    "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml",
    "pipeline.yml"
],
                 defaults={'annotations_dir': ""})

PARAMS = P.PARAMS

PARAMS_ANNOTATIONS = P.peek_parameters(PARAMS["annotations_dir"], "genesets")

###################################################################
###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
# load all tracks - exclude input/control tracks
Sample = PipelineTracks.Sample
示例#7
0
                        'pipeline_docs', 'themes')
logopath = os.path.join(themedir, "cgat_logo.png")

################################################################
# Import pipeline configuration from pipeline.ini in the current
# directory and the common one.

# PATH were code for pipelines is stored
pipelinesdir = os.path.dirname(CGATPipelines.__file__)

# The default configuration file - 'inifile' is read by
# sphinx-report.
inifile = os.path.join(os.path.dirname(CGATPipelines.__file__),
                       'configuration', 'pipeline.yml')

PARAMS = P.get_parameters([inifile, "pipeline.yml"])

# Definition now part of CGATReport
# def setup(app):
#     app.add_config_value('PARAMS', {}, True)

################################################################
################################################################
################################################################
# The pipeline assumes that sphinxreport is called within the
# working directory. If the report is in a separate build directory,
# change the paths below.
#
# directory with export directory from pipeline
# This should be a directory in the build directory - you can
# link from here to a directory outside the build tree, though.
示例#8
0

def connect():
    '''connect to database.
    Use this method to connect to additional databases.
    Returns a database connection.
    '''
    dbh = sqlite3.connect(PARAMS["database_name"])

    return dbh


#########################################################################
P.get_parameters([
    "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml",
    "pipeline.yml"
],
                 defaults={'paired_end': False},
                 only_import=__name__ != "__main__")

PARAMS = P.PARAMS

PipelineMapping.PARAMS = PARAMS
PipelineMappingQC.PARAMS = PARAMS
PipelineExome.PARAMS = PARAMS
#########################################################################

#########################################################################
# Load manual annotations
#########################################################################

=====================================================================================
'''

import shutil
import os
import sqlite3
import CGATCore.IOTools as IOTools
import CGAT.IndexedGenome as IndexedGenome
import CGAT.Bed as Bed
from CGATCore import Pipeline as P

############################################################
############################################################
############################################################
# Pipeline configuration
P.get_parameters(["%s.yml" % __file__[:-len(".py")], "pipeline.yml"])
PARAMS = P.PARAMS

############################################################
############################################################
############################################################


def exportIntervalsAsBed(database, query, outfile):
    '''export intervals from SQlite database as bed files. '''

    dbhandle = sqlite3.connect(database)
    cc = dbhandle.cursor()
    cc.execute(query)

    outs = IOTools.open_file(outfile, "w")
示例#10
0
from CGATCore import Pipeline as P
import CGATCore.IOTools as IOTools
import CGAT.Bed as Bed
import cgatpipelines.tasks.peakcalling as PipelinePeakcalling
import PipelineDeNovoMotifs_python3 as PipelineMotifs
import cgatpipelines.tasks.tracks as PipelineTracks


###################################################
###################################################
###################################################
# Pipeline configuration
###################################################
P.get_parameters(
    ["%s/pipeline.yml" % os.path.splitext(__file__)[0],
     "../pipeline.yml",
     "pipeline.yml"],
    defaults={
        'paired_end': False})

PARAMS = P.PARAMS


PipelinePeakcalling.PARAMS = PARAMS
PipelineMotifs.PARAMS = PARAMS


###################################################################
###################################################################
###################################################################
# Helper functions mapping tracks to conditions, etc
###################################################################
示例#11
0
def main(argv=None):

    parser = get_option_parser()

    (options, args) = E.start(parser, add_cluster_options=True)

    if len(args) == 0:
        raise ValueError(
            "command line argument missing - see usage information")

    options.renumber_column = [x.split(":") for x in options.renumber_column]

    cmd = args[0]
    if len(args) > 1:
        cmd += " '" + "' '".join(args[1:]) + "'"

    if options.dry_run:

        cmd = re.sub("%DIR%", "", cmd)
        retcode = subprocess.call(cmd,
                                  shell=True,
                                  stdin=sys.stdin,
                                  stdout=sys.stdout,
                                  cwd=os.getcwd(),
                                  close_fds=True)
        E.stop()
        sys.exit(0)

    failed_requests = []
    started_requests = []
    niterations = 0

    P.get_parameters()
    P.start_session()

    if not options.collect:
        tmpdir = os.path.abspath(tempfile.mkdtemp(dir=options.tmpdir))

        E.info(" working in directory %s" % tmpdir)

        if options.split_at_lines:
            chunk_iterator = chunk_iterator_lines
            args = (options.split_at_lines, )
        elif options.split_at_column:
            chunk_iterator = chunk_iterator_column
            args = (options.split_at_column - 1, options.max_files)
        elif options.split_at_regex:
            chunk_iterator = chunk_iterator_regex_split
            args = (re.compile(options.split_at_regex), 0, options.chunksize,
                    options.max_lines)
        elif options.group_by_regex:
            chunk_iterator = chunk_iterator_regex_group
            args = (re.compile(options.group_by_regex), 0, options.chunksize)
        else:
            raise ValueError("please specify a way to chunk input data")

        data = [(x, cmd, options, None, options.subdirs)
                for x in chunk_iterator(options.stdin,
                                        args,
                                        prefix=tmpdir,
                                        use_header=options.input_header)]

        statements = [build_command(x) for x in data]
        started_requests = [(x[0], x[0] + ".out") for x in data]

        if len(data) == 0:
            E.warn("no data received")
            E.stop()
            sys.exit(0)

        P.run(statements)
    else:
        tmpdir = options.collect
        started_requests = [(x[:-4], x) for x in glob.glob(tmpdir + "/*.out")]

        E.info("collecting %i files from %s" % (len(started_requests), tmpdir))

    if failed_requests:
        for fn, cmd in failed_requests:
            E.error("failed request: filename= %s, cmd= %s" % (fn, cmd))
    else:
        E.info("building result from %i parts" % len(started_requests))

        if options.renumber:
            mapper = MapperLocal(pattern=options.renumber)
        else:
            mapper = MapperEmpty()

        # deal with stdout
        name = None
        index = None

        for pattern, column in options.renumber_column:

            if re.search(pattern, "stdout"):
                try:
                    index = int(column) - 1
                except ValueError:
                    name = column
                    break

        if options.binary:
            ResultBuilderBinary()(started_requests, options.stdout, options)
        else:
            regex = None
            if options.output_regex_header:
                regex = re.compile(options.output_regex_header)
            ResultBuilder(mapper=mapper,
                          field_index=index,
                          field_name=name,
                          header_regex=regex)(started_requests, options.stdout,
                                              options)

        # deal with logfiles : combine them into a single file
        rr = re.search("'--log=(\S+)'", cmd) or re.search("'--L\s+(\S+)'", cmd)
        if rr:
            E.info("logging output goes to %s" % rr.groups()[0])
            logfile = IOTools.open_file(rr.groups()[0], "a")
            ResultBuilderLog()([(x[0], "%s.log" % x[0])
                                for x in started_requests], logfile, options)
            logfile.close()

        # deal with other files
        if options.subdirs:

            files = glob.glob("%s/*.dir/*" % tmpdir)
            # remove directory
            filenames = set([os.path.basename(x) for x in files])
            xx = len(".out")

            for filename in filenames:

                _, filetype = os.path.splitext(filename)

                name = None
                index = None

                for pattern, column in options.renumber_column:
                    if re.search(pattern, filename):
                        try:
                            index = int(column) - 1
                        except ValueError:
                            name = column
                        break

                if options.binary:
                    builder = ResultBuilderBinary(mapper=mapper)
                elif filetype in (".fa", ".fasta"):
                    builder = ResultBuilderFasta(mapper=mapper)
                elif filetype in (".mali", ):
                    builder = ResultBuilderFasta(mapper=MapperEmpty())
                elif filetype in (".png"):
                    builder = ResultBuilderCopies(mapper=mapper)
                else:
                    builder = ResultBuilder(mapper=mapper,
                                            field_index=index,
                                            field_name=name)

                E.debug("chose the following builder for %s: %s: %s" %
                        (filename, filetype, str(builder)))

                E.info("collecting results for %s" % filename)

                input_filenames = []
                for fi, fn in started_requests:
                    fn = fn[:-xx] + ".dir/" + filename
                    if os.path.exists(fn):
                        input_filenames.append((fi, fn))

                E.info("output of %i files goes to %s" %
                       (len(filenames), filename))

                outfile = IOTools.open_file(options.output_pattern % filename,
                                            "w")
                builder(input_filenames, outfile, options)
                outfile.close()

    if not options.debug and (not options.resume or not options.collect):
        if len(failed_requests) == 0:
            E.info("removing directory %s" % tmpdir)
            shutil.rmtree(tmpdir)
        else:
            E.info("directory %s not removed due to %i failed jobs" %
                   (tmpdir, len(failed_requests)))

    E.info("job control: nstarted=%i, nfinished=%i, nerrors=%i, nrepeats=%i" %
           (len(started_requests), len(started_requests) -
            len(failed_requests), len(failed_requests), niterations))

    E.stop()
示例#12
0
def main(argv=None):

    # Parse the options
    parser = E.OptionParser(
        version=
        "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option(
        "-p",
        "--params",
        "--args",
        dest="params",
        type="string",
        help="comma separated list of addtional parameter strings")

    parser.add_option("-m",
                      "--module",
                      dest="module",
                      type="string",
                      help="the full path to the module file",
                      default=None)

    parser.add_option("-i",
                      "--input",
                      dest="input_filenames",
                      type="string",
                      action="append",
                      help="input filename")

    parser.add_option("-o",
                      "--output-section",
                      dest="output_filenames",
                      type="string",
                      action="append",
                      help="output filename")

    parser.add_option("-f",
                      "--function",
                      dest="function",
                      type="string",
                      help="the module function",
                      default=None)

    parser.set_defaults(input_filenames=[], output_filenames=[], params=None)

    (options, args) = E.start(parser)

    # Check a module and function have been specified
    if not options.module or not options.function:
        raise ValueError("Both a function and Module must be specified")

    # initialize defaults
    P.get_parameters()

    # If a full path was given, add this path to the system path
    location = os.path.dirname(options.module)
    if location != "":
        sys.path.append(location)

    # Establish the module name, accomodating cases where the
    # .py extension has been included in the module name
    module_name = os.path.basename(options.module)
    if module_name.endswith(".py"):
        module_base_name = module_name[:-3]
    else:
        module_base_name = module_name

    # Import the specified module and map the specified fuction
    E.info("importing module '%s' " % module_base_name)
    E.debug("sys.path is: %s" % sys.path)

    module = importlib.import_module(module_base_name)
    try:
        function = getattr(module, options.function)
    except AttributeError as msg:
        raise AttributeError(
            msg.message + "unknown function, available functions are: %s" %
            ",".join([x for x in dir(module) if not x.startswith("_")]))

    if options.input_filenames and not options.input_filenames == ["None"]:
        infiles = options.input_filenames
    else:
        infiles = False

    if options.output_filenames and not options.output_filenames == ["None"]:
        outfiles = options.output_filenames
    else:
        outfiles = False

    # Parse the parameters into an array
    if options.params:
        params = [param.strip() for param in options.params.split(",")]
    else:
        params = False

    # deal with single file case
    if infiles and len(infiles) == 1:
        infiles = infiles[0]
    if outfiles and len(outfiles) == 1:
        outfiles = outfiles[0]

    # Make the function call
    if infiles and outfiles and params:
        function(infiles, outfiles, params)
    elif infiles and outfiles and not params:
        function(infiles, outfiles)
    elif params:
        function(params)
    else:
        raise ValueError(
            "Expecting infile+outfile+params or infile+outfile or params")

    E.stop()