Пример #1
0
 def _free_resources(self, job):
     for name, value in job.resources.items():
         if name in self.resources:
             value = self.calc_resource(name, value)
             self.resources[name] += value
             logger.debug("Releasing {} {} (now {}).".format(
                 value, name, self.resources[name]))
Пример #2
0
    def update_dynamic(self, job):
        dynamic_wildcards = job.dynamic_wildcards
        if not dynamic_wildcards:
            # this happens e.g. in dryrun if output is not yet present
            return

        depending = list(filter(lambda job_: not self.finished(job_), self.bfs(self.depending, job)))
        newrule, non_dynamic_wildcards = job.rule.dynamic_branch(dynamic_wildcards, input=False)
        self.replace_rule(job.rule, newrule)

        # no targetfile needed for job
        newjob = Job(newrule, self, format_wildcards=non_dynamic_wildcards)
        self.replace_job(job, newjob)
        for job_ in depending:
            if job_.dynamic_input:
                newrule_ = job_.rule.dynamic_branch(dynamic_wildcards)
                if newrule_ is not None:
                    self.replace_rule(job_.rule, newrule_)
                    if not self.dynamic(job_):
                        logger.debug("Updating job {}.".format(job_))
                        newjob_ = Job(newrule_, self, targetfile=job_.targetfile)

                        unexpected_output = self.reason(job_).missing_output.intersection(newjob.existing_output)
                        if unexpected_output:
                            raise UnexpectedOutputException(newjob_.rule, unexpected_output)

                        self.replace_job(job_, newjob_)
        return newjob
Пример #3
0
    def schedule(self):
        """ Schedule jobs that are ready, maximizing cpu usage. """
        while True:
            try:
                self._open_jobs.wait()
            except:
                # this will be caused because of SIGTERM or SIGINT
                self._executor.shutdown()
                return False
            self._open_jobs.clear()
            if not self.keepgoing and self._errors:
                logger.warning("Will exit after finishing "
                    "currently running jobs.")
                self._executor.shutdown()
                return False
            if not any(self.open_jobs):
                self._executor.shutdown()
                return not self._errors

            needrun = list(self.open_jobs)
            assert needrun

            logger.debug("Ready jobs:\n\t" + "\n\t".join(map(str, needrun)))

            run = self.job_selector(needrun)
            logger.debug("Selected jobs:\n\t" + "\n\t".join(map(str, run)))
            self.running.update(run)
            for job in run:
                self.run(job)
Пример #4
0
 def check_incomplete(self):
     if not self.ignore_incomplete:
         incomplete = self.incomplete_files
         if incomplete:
             if self.force_incomplete:
                 logger.debug("Forcing incomplete files:")
                 logger.debug("\t" + "\n\t".join(incomplete))
                 self.forcefiles.update(incomplete)
             else:
                 raise IncompleteFilesException(incomplete)
Пример #5
0
    def schedule(self):
        """ Schedule jobs that are ready, maximizing cpu usage. """
        try:
            while True:
                # work around so that the wait does not prevent keyboard interrupts
                while not self._open_jobs.wait(1):
                    pass

                # obtain needrun and running jobs in a thread-safe way
                with self._lock:
                    needrun = list(self.open_jobs)
                    running = list(self.running)
                # free the event
                self._open_jobs.clear()

                # handle errors
                if not self.keepgoing and self._errors:
                    logger.info("Will exit after finishing "
                                "currently running jobs.")
                    if not running:
                        self._executor.shutdown()
                        logger.error(_ERROR_MSG_FINAL)
                        return False
                    continue
                # normal shutdown because all jobs have been finished
                if not needrun and not running:
                    self._executor.shutdown()
                    if self._errors:
                        logger.error(_ERROR_MSG_FINAL)
                    return not self._errors

                # continue if no new job needs to be executed
                if not needrun:
                    continue

                logger.debug("Resources before job selection: {}".format(
                    self.resources))
                logger.debug("Ready jobs ({}):\n\t".format(len(needrun)) +
                             "\n\t".join(map(str, needrun)))

                # select jobs by solving knapsack problem
                run = self.job_selector(needrun)
                logger.debug("Selected jobs ({}):\n\t".format(len(run)) +
                             "\n\t".join(map(str, run)))
                # update running jobs
                with self._lock:
                    self.running.update(run)
                logger.debug(
                    "Resources after job selection: {}".format(self.resources))
                # actually run jobs
                for job in run:
                    self.run(job)
        except (KeyboardInterrupt, SystemExit):
            logger.info("Terminating processes on user request.")
            self._executor.cancel()
            with self._lock:
                running = list(self.running)
            for job in running:
                job.cleanup()
            return False
Пример #6
0
def print_exception(ex, linemaps):
    """
    Print an error message for a given exception.

    Arguments
    ex -- the exception
    linemaps -- a dict of a dict that maps for each snakefile
        the compiled lines to source code lines in the snakefile.
    """
    tb = "Full " + "".join(traceback.format_exception(type(ex), ex, ex.__traceback__))
    logger.debug(tb)
    if isinstance(ex, SyntaxError) or isinstance(ex, IndentationError):
        logger.error(format_error(ex, ex.lineno,
                                  linemaps=linemaps,
                                  snakefile=ex.filename,
                                  show_traceback=True))
        return
    origin = get_exception_origin(ex, linemaps)
    if origin is not None:
        lineno, file = origin
        logger.error(format_error(ex, lineno,
                                  linemaps=linemaps,
                                  snakefile=file,
                                  show_traceback=True))
        return
    elif isinstance(ex, TokenError):
        logger.error(format_error(ex, None, show_traceback=False))
    elif isinstance(ex, MissingRuleException):
        logger.error(format_error(ex, None,
                                  linemaps=linemaps,
                                  snakefile=ex.filename,
                                  show_traceback=False))
    elif isinstance(ex, RuleException):
        for e in ex._include + [ex]:
            if not e.omit:
                logger.error(format_error(e, e.lineno,
                                          linemaps=linemaps,
                                          snakefile=e.filename,
                                          show_traceback=True))
    elif isinstance(ex, WorkflowError):
        logger.error(format_error(ex, ex.lineno,
                                  linemaps=linemaps,
                                  snakefile=ex.snakefile,
                                  show_traceback=True))
    elif isinstance(ex, KeyboardInterrupt):
        logger.info("Cancelling snakemake on user request.")
    else:
        traceback.print_exception(type(ex), ex, ex.__traceback__)
Пример #7
0
def shellcmd(
    img_path,
    cmd,
    args="",
    quiet=False,
    envvars=None,
    shell_executable=None,
    container_workdir=None,
    is_python_script=False,
):
    """Execute shell command inside singularity container given optional args
    and environment variables to be passed."""

    if envvars:
        envvars = " ".join(
            "SINGULARITYENV_{}={}".format(k, v) for k, v in envvars.items()
        )
    else:
        envvars = ""

    if shell_executable is None:
        shell_executable = "sh"
    else:
        # Ensure to just use the name of the executable, not a path,
        # because we cannot be sure where it is located in the container.
        shell_executable = os.path.split(shell_executable)[-1]

    if is_python_script:
        # mount host snakemake module into container
        args += " --bind {}:{}".format(SNAKEMAKE_SEARCHPATH, SNAKEMAKE_MOUNTPOINT)

    if container_workdir:
        args += " --pwd {}".format(container_workdir)

    cmd = "{} singularity {} exec --home {} {} {} {} -c '{}'".format(
        envvars,
        "--quiet --silent" if quiet else "",
        os.getcwd(),
        args,
        img_path,
        shell_executable,
        cmd.replace("'", r"'\''"),
    )
    logger.debug(cmd)
    return cmd
Пример #8
0
 def pull(self, dryrun=False):
     if self.is_local:
         return
     if dryrun:
         logger.info("Singularity image {} will be pulled.".format(self.url))
         return
     logger.debug("Singularity image location: {}".format(self.path))
     if not os.path.exists(self.path):
         logger.info("Pulling singularity image {}.".format(self.url))
         try:
             p = subprocess.check_output(["singularity", "pull",
                 "--name", "{}.simg".format(self.hash), self.url],
                 cwd=self._img_dir,
                 stderr=subprocess.STDOUT)
         except subprocess.CalledProcessError as e:
             raise WorkflowError("Failed to pull singularity image "
                                 "from {}:\n{}".format(self.url,
                                                       e.stdout.decode()))
Пример #9
0
    def cancel(self):
        """cancel execution, usually by way of control+c. Cleanup is done in
           shutdown (deleting cached workdirs in Google Cloud Storage
        """
        import googleapiclient

        # projects.locations.operations/cancel
        operations = self._api.projects().locations().operations()

        for job in self.active_jobs:
            request = operations.cancel(name=job.jobname)
            logger.debug("Cancelling operation {}".format(job.jobid))
            try:
                self._retry_request(request)
            except (Exception, BaseException, googleapiclient.errors.HttpError):
                continue

        self.shutdown()
Пример #10
0
    def _add_gpu(self, gpu_count):
        """Add a number of NVIDIA gpus to the current executor. This works
        by way of adding nvidia_gpu to the job default resources, and also
        changing the default machine type prefix to be n1, which is
        the currently only supported instance type for using GPUs for LHS.
        """
        if not gpu_count or gpu_count == 0:
            return

        logger.debug(
            "found resource request for {} GPUs. This will limit to n1 "
            "instance types.".format(gpu_count))
        self.workflow.default_resources.parsed["nvidia_gpu"] = gpu_count
        self.workflow.default_resources.args.append("nvidia_gpu=%s" %
                                                    gpu_count)
        self._machine_type_prefix = self._machine_type_prefix or ""
        if not self._machine_type_prefix.startswith("n1"):
            self._machine_type_prefix = "n1"
Пример #11
0
    def _wait_for_jobs(self):
        UNFINISHED_STATES = [
            "UNKNOWN",
            "INITIALIZING",
            "QUEUED",
            "RUNNING",
            "PAUSED",
        ]
        ERROR_STATES = [
            "EXECUTOR_ERROR",
            "SYSTEM_ERROR",
            "CANCELED",  # TODO: really call `error_callback` on this?
        ]

        while True:

            with self.lock:
                if not self.wait:
                    return
                active_jobs = self.active_jobs
                self.active_jobs = list()
                still_running = list()

            for j in active_jobs:
                with self.status_rate_limiter:  # TODO: this doesn't seem to do anything?
                    res = self.tes_client.get_task(j.jobid, view="MINIMAL")
                    logger.debug("[TES] State of task '{id}': {state}".format(
                        id=j.jobid,
                        state=res.state,
                    ))
                    if res.state in UNFINISHED_STATES:
                        still_running.append(j)
                    elif res.state in ERROR_STATES:
                        logger.info(
                            "[TES] Task errored: {id}".format(id=j.jobid))
                        j.error_callback(j.job)
                    elif res.state == "COMPLETE":
                        logger.info(
                            "[TES] Task completed: {id}".format(id=j.jobid))
                        j.callback(j.job)

            with self.lock:
                self.active_jobs.extend(still_running)
            time.sleep(1 / self.max_status_checks_per_second)
Пример #12
0
    def run(self, job,
            callback=None,
            submit_callback=None,
            error_callback=None):
        super()._run(job)
        workdir = os.getcwd()
        jobid = self.dag.jobid(job)

        jobscript = self.get_jobscript(job)
        jobfinished = os.path.join(self.tmpdir, "{}.jobfinished".format(jobid))
        jobfailed = os.path.join(self.tmpdir, "{}.jobfailed".format(jobid))
        self.spawn_jobscript(job, jobscript,
                             jobfinished=jobfinished,
                             jobfailed=jobfailed)

        deps = " ".join(self.external_jobid[f] for f in job.input
                        if f in self.external_jobid)
        try:
            submitcmd = job.format_wildcards(
                self.submitcmd,
                dependencies=deps,
                cluster=self.cluster_wildcards(job))
        except AttributeError as e:
            raise WorkflowError(str(e), rule=job.rule)
        try:
            ext_jobid = subprocess.check_output(
                '{submitcmd} "{jobscript}"'.format(submitcmd=submitcmd,
                                                   jobscript=jobscript),
                shell=True).decode().split("\n")
        except subprocess.CalledProcessError as ex:
            raise WorkflowError(
                "Error executing jobscript (exit code {}):\n{}".format(
                    ex.returncode, ex.output.decode()),
                rule=job.rule)
        if ext_jobid and ext_jobid[0]:
            ext_jobid = ext_jobid[0]
            self.external_jobid.update((f, ext_jobid) for f in job.output)
            logger.debug("Submitted job {} with external jobid {}.".format(
                jobid, ext_jobid))

        submit_callback(job)
        with self.lock:
            self.active_jobs.append(GenericClusterJob(job, callback, error_callback, jobscript, jobfinished, jobfailed))
Пример #13
0
def parseYAMLHeader(filepath):
    """

    :param filepath: path to the file
    :return: String representation of the YAML header in the file, including inter-document framing ("---")
    """
    yamlHeader = []
    for i, line in enumerate(open(filepath).readlines()):
        # process
        yamlHeader.append(line.strip()[2:])

        # terminate if that's already "#'---" (=end of YAML-designated area)
        if i != 0 and line.startswith("#'---"):
            break

    result = '\n'.join(yamlHeader)
    logger.debug("Got " + result + "as a result of parsing YAML header from " +
                 filepath + ".\n")
    return result
Пример #14
0
    def run(self, job,
            callback=None,
            submit_callback=None,
            error_callback=None):
        super()._run(job)
        workdir = os.getcwd()
        jobid = self.dag.jobid(job)

        jobscript = self.get_jobscript(job)
        jobfinished = os.path.join(self.tmpdir, "{}.jobfinished".format(jobid))
        jobfailed = os.path.join(self.tmpdir, "{}.jobfailed".format(jobid))
        self.spawn_jobscript(job, jobscript,
                             jobfinished=jobfinished,
                             jobfailed=jobfailed)

        deps = " ".join(self.external_jobid[f] for f in job.input
                        if f in self.external_jobid)
        try:
            submitcmd = job.format_wildcards(
                self.submitcmd,
                dependencies=deps,
                cluster=self.cluster_wildcards(job))
        except AttributeError as e:
            raise WorkflowError(str(e), rule=job.rule)
        try:
            ext_jobid = subprocess.check_output(
                '{submitcmd} "{jobscript}"'.format(submitcmd=submitcmd,
                                                   jobscript=jobscript),
                shell=True).decode().split("\n")
        except subprocess.CalledProcessError as ex:
            logger.error("Error submitting jobscript (exit code {}):\n{}".format(
                    ex.returncode, ex.output.decode()))
            error_callback(job)
            return
        if ext_jobid and ext_jobid[0]:
            ext_jobid = ext_jobid[0]
            self.external_jobid.update((f, ext_jobid) for f in job.output)
            logger.debug("Submitted job {} with external jobid {}.".format(
                jobid, ext_jobid))

        submit_callback(job)
        with self.lock:
            self.active_jobs.append(GenericClusterJob(job, callback, error_callback, jobscript, jobfinished, jobfailed))
Пример #15
0
    def write_jobscript(self, job, jobscript, **kwargs):

        use_threads = "--force-use-threads" if not job.is_group() else ""
        envvars = "\\\n".join("export {}={};".format(var, os.environ[var])
                              for var in self.workflow.envvars)

        exec_job = self.format_job(self.exec_job,
                                   job,
                                   _quote_all=False,
                                   use_threads=use_threads,
                                   envvars=envvars,
                                   **kwargs)
        content = self.format_job(self.jobscript,
                                  job,
                                  exec_job=exec_job,
                                  **kwargs)
        logger.debug("Jobscript:\n{}".format(content))
        with open(jobscript, "w") as f:
            print(content, file=f)
        os.chmod(jobscript, os.stat(jobscript).st_mode | stat.S_IXUSR)
Пример #16
0
    def __init__(self, *args, keep_local=False, provider=None, **kwargs):
        super(RemoteObject, self).__init__(*args,
                                           keep_local=keep_local,
                                           provider=provider,
                                           **kwargs)

        bucket_name = "test-static-remote-bucket"
        test_files = ("test.txt", "out1.txt", "out2.txt")

        s3 = boto3.resource("s3")
        s3.create_bucket(Bucket=bucket_name)

        # "Upload" files that should be in S3 before tests...
        s3c = S3Helper()
        for test_file in test_files:
            if not s3c.exists_in_bucket(bucket_name, test_file):
                logger.debug(
                    "Pre-populating remote bucket {} with file {}".format(
                        bucket_name, test_file))
                s3c.upload_to_s3(bucket_name, test_file)
Пример #17
0
 def _globus(self, *args):
     retry = self.provider.retry
     cmd = ["globus-url-copy"] + list(args)
     for i in range(retry + 1):
         try:
             logger.debug(" ".join(cmd))
             return sp.run(
                 cmd, check=True, stderr=sp.PIPE, stdout=sp.PIPE
             ).stdout.decode()
         except sp.CalledProcessError as e:
             if i == retry:
                 raise WorkflowError(
                     "Error calling globus-url-copy:\n{}".format(
                         cmd, e.stderr.decode()
                     )
                 )
             else:
                 # try again after some seconds
                 time.sleep(1)
                 continue
    def __init__(self, *args, keep_local=False, provider=None, **kwargs):
        super(RemoteObject, self).__init__(*args,
                                           keep_local=keep_local,
                                           provider=provider,
                                           **kwargs)

        bucket_name = 'test-static-remote-bucket'
        test_files = ('test.txt', 'out1.txt', 'out2.txt')

        conn = boto.connect_s3()
        if bucket_name not in [b.name for b in conn.get_all_buckets()]:
            conn.create_bucket(bucket_name)

        # "Upload" files that should be in S3 before tests...
        s3c = S3Helper()
        for test_file in test_files:
            if not s3c.exists_in_bucket(bucket_name, test_file):
                logger.debug(
                    "Pre-populating remote bucket {} with file {}".format(
                        bucket_name, test_file))
                s3c.upload_to_s3(bucket_name, test_file)
Пример #19
0
 def _gfal(self, cmd, *args, retry=None, raise_workflow_error=True):
     if retry is None:
         retry = self.provider.retry
     _cmd = ["gfal-" + cmd] + list(args)
     for i in range(retry + 1):
         try:
             logger.debug(_cmd)
             return sp.run(_cmd, check=True, stderr=sp.PIPE,
                           stdout=sp.PIPE).stdout.decode()
         except sp.CalledProcessError as e:
             if i == retry:
                 if raise_workflow_error:
                     raise WorkflowError(
                         "Error calling gfal-{}:\n{}".format(
                             cmd, e.stderr.decode()))
                 else:
                     raise e
             else:
                 # try again after some seconds
                 time.sleep(1)
                 continue
Пример #20
0
    def update_dynamic(self, job):
        """Update the DAG by evaluating the output of the given job that
        contains dynamic output files."""
        dynamic_wildcards = job.dynamic_wildcards
        if not dynamic_wildcards:
            # this happens e.g. in dryrun if output is not yet present
            return

        depending = list(
            filter(lambda job_: not self.finished(job_),
                   self.bfs(self.depending, job)))
        newrule, non_dynamic_wildcards = job.rule.dynamic_branch(
            dynamic_wildcards, input=False)
        self.specialize_rule(job.rule, newrule)

        # no targetfile needed for job
        newjob = Job(newrule, self, format_wildcards=non_dynamic_wildcards)
        self.replace_job(job, newjob)
        for job_ in depending:
            if job_.dynamic_input:
                newrule_ = job_.rule.dynamic_branch(dynamic_wildcards)
                if newrule_ is not None:
                    self.specialize_rule(job_.rule, newrule_)
                    if not self.dynamic(job_):
                        logger.debug("Updating job {}.".format(job_))
                        newjob_ = Job(newrule_,
                                      self,
                                      targetfile=job_.targetfile)

                        unexpected_output = self.reason(
                            job_).missing_output.intersection(
                                newjob.existing_output)
                        if unexpected_output:
                            logger.warning(
                                "Warning: the following output files of rule {} were not "
                                "present when the DAG was created:\n{}".format(
                                    newjob_.rule, unexpected_output))

                        self.replace_job(job_, newjob_)
        return newjob
Пример #21
0
    def _get_bucket(self):
        """get a connection to the storage bucket (self.bucket) and exit
        if the name is taken or otherwise invalid.

        Parameters
        ==========
        workflow: the workflow object to derive the prefix from
        """
        import google

        # Hold path to requested subdirectory and main bucket
        bucket_name = self.workflow.default_remote_prefix.split("/")[0]
        self.gs_subdir = re.sub("^{}/".format(bucket_name), "",
                                self.workflow.default_remote_prefix)
        self.gs_logs = os.path.join(self.gs_subdir, "google-lifesciences-logs")

        # Case 1: The bucket already exists
        try:
            self.bucket = self._bucket_service.get_bucket(bucket_name)

        # Case 2: The bucket needs to be created
        except google.cloud.exceptions.NotFound:
            self.bucket = self._bucket_service.create_bucket(bucket_name)

        # Case 2: The bucket name is already taken
        except Exception as ex:
            logger.error("Cannot get or create {} (exit code {}):\n{}".format(
                bucket_name, ex.returncode, ex.output.decode()))
            log_verbose_traceback(ex)
            raise ex

        logger.debug("bucket=%s" % self.bucket.name)
        logger.debug("subdir=%s" % self.gs_subdir)
        logger.debug("logs=%s" % self.gs_logs)
Пример #22
0
    def _job_was_successful(self, status):
        """based on a status response (a [pipeline].projects.locations.operations.get
        debug print the list of events, return True if all return codes 0
        and False otherwise (indication of failure). In that a nonzero exit
        status is found, we also debug print it for the user.
        """
        success = True

        # https://cloud.google.com/life-sciences/docs/reference/rest/v2beta/Event
        for event in status["metadata"]["events"]:

            logger.debug(event["description"])

            # Does it always result in fail for other failure reasons?
            if "failed" in event:
                success = False
                action = event.get("failed")
                logger.debug("{}: {}".format(action["code"], action["cause"]))

            elif "unexpectedExitStatus" in event:
                action = event.get("unexpectedExitStatus")

                if action["exitStatus"] != 0:
                    success = False

                    # Provide reason for the failure (desc includes exit code)
                    msg = "%s" % event["description"]
                    if "stderr" in action:
                        msg += ": %s" % action["stderr"]
                        logger.debug(msg)

        return success
Пример #23
0
def parseWBInfosFromRFiles(script_dir="Scripts", htmlPath="Output/html"):
    """

    :param script_dir: Relative path to the Scripts directory
    :param htmlPath: Relative path to the html output path
    :return: a list of dictionaries with fields:
      - file - what is the input R file
      - outputFile - there to put the output html file
      - param - parsed yaml params
    """
    parsedInfos = []
    #errorOccured = False
    for filename in findFilesRecursive(script_dir, ['*.r', '*.R']):
        if not hasYAMLHeader(filename):
            # Ignore files without YAML infos
            continue
        header = parseYAMLHeader(filename)
        # run all the synthax checks - will raise an error if it fails
        yamlParamsDict = parseYamlParams(header, filename)
        if yamlParamsDict == None:  #parsing error occured
            continue  #go on parsing next file

        if type(
                yamlParamsDict
        ) is str:  #allow parsing one tag without double points as string; put it in a dict and check later on
            yamlParamsDict = {yamlParamsDict: None}

        if ('wb' in yamlParamsDict):  # the header contains wb informations
            outFile = htmlPath + "/" + pathsepsToUnderscore(
                os.path.splitext(filename)[0]) + ".html"
            parsedInfos.append({
                'file': linuxify(filename),
                'outputFile': outFile,
                'param': yamlParamsDict
            })

    logger.debug("Parsed informations from R files: " + str(parsedInfos))
    #if errorOccured:
    #    raise ValueError("Errors occured in parsing the R files. Please fix them.") TODO really raise a ValueError?
    return parsedInfos
Пример #24
0
def parseMDFiles(script_dir="Scripts", htmlPath="Output/html"):
    """

    :param script_dir: Relative path to the Scripts directory
    :param htmlPath: Relative path to the html output path
    :return: a list of dictionaries with fields:
      - file - what is the input .md file
      - outputFile - there to put the output html file
      - param - parsed yaml header - always an empty list
    """
    logger.debug("Finding .md files:\n")
    foundMDFiles = []
    for f in findFilesRecursive(script_dir, ['*.md']):
        outFile = htmlPath + "/" + pathsepsToUnderscore(
            os.path.splitext(f)[0]) + ".html"
        logger.debug("Found " + outFile + ".\n")
        foundMDFiles.append({
            'file': linuxify(f),
            'outputFile': outFile,
            'param': []
        })
    return foundMDFiles
Пример #25
0
def findFilesRecursive(startingPath, patterns):
    """
    :param startingPath: root path of the search
    :param patterns: patterns to search file names for
    :return: paths to files matching the patterns
    """
    matchedFilepaths = []
    for root, dirnames, filenames in os.walk(startingPath):
        dirnames[:] = [d for d in dirnames if not d[0] == '_']
        dirnames[:] = [d for d in dirnames if not d[0] == '.']
        for file in reduce(operator.add,
                           (fnmatch.filter(filenames, p) for p in patterns)):
            checkFilename(file)
            absFilepath = os.path.join(root, file)
            if not absFilepath in matchedFilepaths:
                matchedFilepaths.append(absFilepath)
    sortedMatchedFilepaths = sorted(matchedFilepaths)
    conf = Config()
    regex = re.compile(conf.get("fileRegex"))
    reFiles = list(filter(regex.search, sortedMatchedFilepaths))
    logger.debug("Found files in scope of wBuild: " + str(reFiles) + ".\n")
    return reFiles
Пример #26
0
    def run(
        self, job, callback=None, submit_callback=None, error_callback=None):
        super()._run(job)
        workdir = os.getcwd()
        jobid = self.dag.jobid(job)
        properties = job.json()

        jobscript = self.get_jobscript(job)
        jobfinished = os.path.join(self.tmpdir, "{}.jobfinished".format(jobid))
        jobfailed = os.path.join(self.tmpdir, "{}.jobfailed".format(jobid))
        with open(jobscript, "w") as f:
            print(format(self.jobscript, workflow=self.workflow, cores=self.cores), file=f)
        os.chmod(jobscript, os.stat(jobscript).st_mode | stat.S_IXUSR)

        deps = " ".join(self.external_jobid[f] for f in job.input if f in self.external_jobid)
        submitcmd = job.format_wildcards(self.submitcmd, dependencies=deps)
        try:
            ext_jobid = subprocess.check_output(
                '{submitcmd} "{jobscript}"'.format(
                    submitcmd=submitcmd,
                    jobscript=jobscript),
                shell=True).decode().split("\n")
        except subprocess.CalledProcessError as ex:
            raise WorkflowError("Error executing jobscript (exit code {}):\n{}".format(ex.returncode, ex.output.decode()), rule=job.rule)
        if ext_jobid and ext_jobid[0]:
            ext_jobid = ext_jobid[0]
            self.external_jobid.update((f, ext_jobid) for f in job.output)
            logger.debug("Submitted job {} with external jobid {}.".format(jobid, ext_jobid))

        thread = threading.Thread(
            target=self._wait_for_job,
            args=(job, callback, error_callback,
                jobscript, jobfinished, jobfailed))
        thread.daemon = True
        thread.start()
        self.threads.append(thread)

        submit_callback(job)
Пример #27
0
    def createSampleFileMapping(self):
        """
        create a sample file mapping with unique entries of existing files
            columns: [ID | ASSAY | FILE_TYPE | FILE_PATH ]
        """

        assay_mapping = {'RNA_ID': ['RNA_BAM_FILE', 'GENE_COUNTS_FILE'], 'DNA_ID': ['DNA_VCF_FILE']}
        assay_subsets = []
        for id_, file_types in assay_mapping.items():
            for file_type in file_types:
                df = self.annotationTable[[id_, file_type]].dropna().drop_duplicates().copy()
                df.rename(columns={id_: 'ID', file_type: 'FILE_PATH'}, inplace=True)
                df['ASSAY'] = id_
                df['FILE_TYPE'] = file_type
                assay_subsets.append(df)
        file_mapping = pd.concat(assay_subsets)

        # cleaning SAMPLE_FILE_MAPPING
        file_mapping.dropna(inplace=True)
        file_mapping.drop_duplicates(inplace=True)

        # check for missing files
        existing = utils.checkFileExists(file_mapping["FILE_PATH"])
        if len(existing) == 0:
            message = "File mapping is empty. "
            message += "Please check that all files in your sample annotation exist."
            raise FileNotFoundError(message)
        elif len(existing) < file_mapping.shape[0]:
            missing = set(file_mapping["FILE_PATH"]) - set(existing)
            logger.info(f"WARNING: {len(missing)} files missing in samples annotation. Ignoring...")
            logger.debug(f"Missing files: {missing}")

            file_mapping = file_mapping[file_mapping["FILE_PATH"].isin(existing)]

        # write file mapping
        file_mapping.to_csv(self.root / "file_mapping.csv", index=False)
        return file_mapping
Пример #28
0
def writeDependencyFile():
    """
    Entry point for writing .wBuild.depend.
    """
    #if not wbuildVersionIsCurrent():
    #    print(bcolors.WARNING + "Version of the project's static .wBuild lib is not the same as the dynamically loaded "
    #                            "wBuild"
    #                            "version. It is strongly recommended to update .wBuild lib using \'wbuild update\'; "
    #                            "otherwise, the consistency of the build can not be guaranteed." + bcolors.ENDC)
    logger.info("Structuring dependencies...")
    conf = Config()
    htmlOutputPath = conf.get("htmlOutputPath")
    logger.debug("Loaded config.\n html output path (key htmlOutputPath): " +
                 htmlOutputPath + "\n")
    scriptsPath = conf.get("scriptsPath")
    wbData = parseWBInfosFromRFiles(script_dir=scriptsPath,
                                    htmlPath=htmlOutputPath)
    mdData = parseMDFiles(script_dir=scriptsPath, htmlPath=htmlOutputPath)
    dependFile = tempfile.NamedTemporaryFile('w', delete=False)
    with dependFile as f:  #start off with the header
        f.write('######\n')
        f.write('#This is a autogenerated snakemake file by wBuild\n')
        f.write('#wBuild by Leonhard Wachutka\n')
        f.write('######\n')
        # write rules
        for r in wbData:
            writeRule(r, f)
        # write md rules
        for r in mdData:
            writeMdRule(r, f)

        # write build index rule
        writeIndexRule(wbData, mdData, f)
    logger.info("Dependencies file generated.\n")

    return (dependFile.name)
Пример #29
0
    def _get_task(self, job, jobscript):
        import tes

        checkdir, _ = os.path.split(self.snakefile)

        task = {}
        task["name"] = job.format_wildcards(self.jobname)
        task["description"] = self._get_task_description(job)
        task["inputs"] = self._get_task_inputs(job, jobscript, checkdir)
        task["outputs"] = self._get_task_outputs(job, checkdir)
        task["executors"] = self._get_task_executors()
        task["resources"] = tes.models.Resources()

        # define resources
        if "_cores" in job.resources:
            task["resources"]["cpu_cores"] = job.resources["_cores"]
        if "mem_mb" in job.resources:
            task["resources"]["ram_gb"] = job.resources["mem_mb"] / 1000
        if "disk_mb" in job.resources:
            task["resources"]["disk_gb"] = job.resources["disk_mb"] / 1000

        tes_task = tes.Task(**task)
        logger.debug("[TES] Built task: {task}".format(task=tes_task))
        return tes_task
Пример #30
0
    def update_dynamic(self, job):
        dynamic_wildcards = job.dynamic_wildcards
        if not dynamic_wildcards:
            # this happens e.g. in dryrun if output is not yet present
            return

        depending = list(
            filter(lambda job_: not self.finished(job_),
                   self.bfs(self.depending, job)))
        newrule, non_dynamic_wildcards = job.rule.dynamic_branch(
            dynamic_wildcards, input=False)
        self.replace_rule(job.rule, newrule)

        # no targetfile needed for job
        newjob = Job(newrule, self, format_wildcards=non_dynamic_wildcards)
        self.replace_job(job, newjob)
        for job_ in depending:
            if job_.dynamic_input:
                newrule_ = job_.rule.dynamic_branch(dynamic_wildcards)
                if newrule_ is not None:
                    self.replace_rule(job_.rule, newrule_)
                    if not self.dynamic(job_):
                        logger.debug("Updating job {}.".format(job_))
                        newjob_ = Job(newrule_,
                                      self,
                                      targetfile=job_.targetfile)

                        unexpected_output = self.reason(
                            job_).missing_output.intersection(
                                newjob.existing_output)
                        if unexpected_output:
                            raise UnexpectedOutputException(
                                newjob_.rule, unexpected_output)

                        self.replace_job(job_, newjob_)
        return newjob
Пример #31
0
 def unlock(self, *args):
     logger.debug("unlocking")
     for lockfile in self._lockfile.values():
         try:
             logger.debug("removing lock")
             os.remove(lockfile)
         except OSError as e:
             if e.errno != 2:  # missing file
                 raise e
     logger.debug("removed all locks")
Пример #32
0
 def unlock(self, *args):
     logger.debug("unlocking")
     for lockfile in self._lockfile.values():
         try:
             logger.debug("removing lock")
             os.remove(lockfile)
         except OSError as e:
             if e.errno != 2:  # missing file
                 raise e
     logger.debug("removed all locks")
Пример #33
0
 def mtime(self):
     if self.s3_key == "test.txt":
         logger.debug("test.txt is new")
         return float("inf")
     elif self.s3_key.startswith("out"):
         logger.debug("{} is old".format(self.s3_key))
         # For some reason this breaks if you return 0
         return 5
     else:
         logger.debug("Using real mtime for {}".format(self.s3_key))
         return super().mtime()
Пример #34
0
def test_wrappers(args_dict):
    """"""
    # Cleanup data and leave
    if args_dict["clean_output"]:
        logger.info("Removing output data")
        for wrapper_name in args_dict["wrappers"]:
            wrapper_workdir = os.path.join(args_dict["workdir"], wrapper_name)
            shutil.rmtree(wrapper_workdir, ignore_errors=True)
        sys.exit()

    # Test wrappers
    for wrapper_name in args_dict["wrappers"]:
        logger.warning("Testing Wrapper {}".format(wrapper_name))
        try:
            snakefile = get_snakefile_fn(workflow_dir=WRAPPER_DIR,
                                         workflow=wrapper_name)
            wrapper_workdir = os.path.join(args_dict["workdir"], wrapper_name)
            logger.debug("Working in directory: {}".format(wrapper_workdir))

            #Run Snakemake through the API
            snakemake(snakefile=snakefile,
                      workdir=wrapper_workdir,
                      config={"data_dir": DATA_DIR},
                      wrapper_prefix=WRAPPER_PREFIX,
                      use_conda=True,
                      cores=args_dict["cores"],
                      verbose=args_dict["verbose"],
                      quiet=args_dict["quiet"])

        finally:
            logger.debug("List of file generated: {}".format(
                os.listdir(wrapper_workdir)))
            shutil.rmtree(os.path.join(wrapper_workdir, ".snakemake"),
                          ignore_errors=True)
            if not args_dict["keep_output"]:
                logger.debug("Removing temporary directory")
                shutil.rmtree(wrapper_workdir, ignore_errors=True)
Пример #35
0
    def _wait_for_jobs(self):
        """wait for jobs to complete. This means requesting their status,
        and then marking them as finished when a "done" parameter
        shows up. Even for finished jobs, the status should still return
        """
        import googleapiclient

        while True:
            # always use self.lock to avoid race conditions
            with self.lock:
                if not self.wait:
                    return
                active_jobs = self.active_jobs
                self.active_jobs = list()
                still_running = list()

            # Loop through active jobs and act on status
            for j in active_jobs:

                # use self.status_rate_limiter to avoid too many API calls.
                with self.status_rate_limiter:

                    # https://cloud.google.com/life-sciences/docs/reference/rest/v2beta/projects.locations.operations/get
                    # Get status from projects.locations.operations/get
                    operations = self._api.projects().locations().operations()
                    request = operations.get(name=j.jobname)
                    logger.debug("Checking status for operation {}".format(
                        j.jobid))

                    try:
                        status = self._retry_request(request)
                    except googleapiclient.errors.HttpError as ex:

                        # Operation name not found, even finished should be found
                        if ex.status == 404:
                            j.error_callback(j.job)
                            continue

                        # Unpredictable server (500) error
                        elif ex.status == 500:
                            logger.error(ex["content"].decode("utf-8"))
                            j.error_callback(j.job)

                    except WorkflowError as ex:
                        print_exception(ex, self.workflow.linemaps)
                        j.error_callback(j.job)
                        continue

                    # The operation is done
                    if status.get("done", False) == True:

                        # Derive success/failure from status codes (prints too)
                        if self._job_was_successful(status):
                            j.callback(j.job)
                        else:
                            self.print_job_error(j.job, jobid=j.jobid)
                            j.error_callback(j.job)

                    # The operation is still running
                    else:
                        still_running.append(j)

            with self.lock:
                self.active_jobs.extend(still_running)
            sleep()
Пример #36
0
def log_verbose_traceback(ex):
    tb = "Full " + "".join(traceback.format_exception(type(ex), ex, ex.__traceback__))
    logger.debug(tb)
Пример #37
0
    def _generate_job_resources(self, job):
        """given a particular job, generate the resources that it needs,
        including default regions and the virtual machine configuration
        """
        # Right now, do a best effort mapping of resources to instance types
        cores = job.resources.get("_cores", 1)
        mem_mb = job.resources.get("mem_mb", 15360)

        # IOPS performance proportional to disk size
        disk_mb = job.resources.get("disk_mb", 512000)

        # Convert mb to gb
        disk_gb = math.ceil(disk_mb / 1024)

        # Look for if the user wants an nvidia gpu
        gpu_count = job.resources.get("nvidia_gpu") or job.resources.get("gpu")
        gpu_model = job.resources.get("gpu_model")

        # If a gpu model is specified without a count, we assume 1
        if gpu_model and not gpu_count:
            gpu_count = 1

        # Update default resources using decided memory and disk
        self.workflow.default_resources = self.default_resources
        self.workflow.default_resources.args = [
            "mem_mb=%s" % mem_mb,
            "disk_mb=%s" % disk_mb,
        ]
        self.workflow.default_resources.parsed["mem_mb"] = mem_mb
        self.workflow.default_resources.parsed["disk_mb"] = disk_mb

        # Job resource specification can be overridden by gpu preferences
        self.machine_type_prefix = job.resources.get("machine_type")

        # If gpu wanted, limit to N1 general family, and update arguments
        if gpu_count:
            self._add_gpu(gpu_count)

        machine_types = self.get_available_machine_types()

        # Alert the user of machine_types available before filtering
        # https://cloud.google.com/compute/docs/machine-types
        logger.debug(
            "found {} machine types across regions {} before filtering "
            "to increase selection, define fewer regions".format(
                len(machine_types), self.regions))

        # First pass - eliminate anything that too low in cpu/memory
        keepers = dict()

        # Also keep track of max cpus and memory, in case none available
        max_cpu = 1
        max_mem = 15360

        for name, machine_type in machine_types.items():
            max_cpu = max(max_cpu, machine_type["guestCpus"])
            max_mem = max(max_mem, machine_type["memoryMb"])
            if machine_type["guestCpus"] < cores or machine_type[
                    "memoryMb"] < mem_mb:
                continue
            keepers[name] = machine_type

        # If a prefix is set, filter down to it
        if self.machine_type_prefix:
            machine_types = keepers
            keepers = dict()
            for name, machine_type in machine_types.items():
                if name.startswith(self.machine_type_prefix):
                    keepers[name] = machine_type

        # If we don't have any contenders, workflow error
        if not keepers:
            if self.machine_type_prefix:
                raise WorkflowError(
                    "Machine prefix {prefix} is too strict, or the resources cannot "
                    " be satisfied, so there are no options "
                    "available.".format(prefix=self.machine_type_prefix))
            else:
                raise WorkflowError(
                    "You requested {requestMemory} MB memory, {requestCpu} cores. "
                    "The maximum available are {availableMemory} MB memory and "
                    "{availableCpu} cores. These resources cannot be satisfied. "
                    "Please consider reducing the resource requirements of the "
                    "corresponding rule.".format(
                        requestMemory=mem_mb,
                        requestCpu=cores,
                        availableCpu=max_cpu,
                        availableMemory=max_mem,
                    ))

        # Now find (quasi) minimal to satisfy constraints
        machine_types = keepers

        # Select the first as the "smallest"
        smallest = list(machine_types.keys())[0]
        min_cores = machine_types[smallest]["guestCpus"]
        min_mem = machine_types[smallest]["memoryMb"]

        for name, machine_type in machine_types.items():
            if (machine_type["guestCpus"] < min_cores
                    and machine_type["memoryMb"] < min_mem):
                smallest = name
                min_cores = machine_type["guestCpus"]
                min_mem = machine_type["memoryMb"]

        selected = machine_types[smallest]
        logger.debug("Selected machine type {}:{}".format(
            smallest, selected["description"]))

        virtual_machine = {
            "machineType": smallest,
            "labels": {
                "app": "snakemake"
            },
            "bootDiskSizeGb": disk_gb,
            "preemptible": job.rule.name in self.preemptible_rules,
        }

        # If the user wants gpus, add accelerators here
        if gpu_count:
            accelerator = self._get_accelerator(gpu_count,
                                                zone=selected["zone"],
                                                gpu_model=gpu_model)
            virtual_machine["accelerators"] = [{
                "type": accelerator["name"],
                "count": gpu_count
            }]

        resources = {
            "regions": self.regions,
            "virtualMachine": virtual_machine
        }
        return resources
Пример #38
0
    def __init__(
        self,
        workflow,
        dag,
        cores,
        jobname="snakejob.{name}.{jobid}.sh",
        printreason=False,
        quiet=False,
        printshellcmds=False,
        container_image=None,
        regions=None,
        location=None,
        cache=False,
        latency_wait=3,
        local_input=None,
        restart_times=None,
        exec_job=None,
        max_status_checks_per_second=1,
        preemption_default=None,
        preemptible_rules=None,
    ):

        # Attach variables for easy access
        self.workflow = workflow
        self.quiet = quiet
        self.workdir = os.path.dirname(self.workflow.persistence.path)
        self._save_storage_cache = cache

        # Relative path for running on instance
        self._set_snakefile()

        # Prepare workflow sources for build package
        self._set_workflow_sources()

        exec_job = (exec_job or
                    ("snakemake {target} --snakefile %s "
                     "--force -j{cores} --keep-target-files --keep-remote "
                     "--latency-wait 0 --scheduler {workflow.scheduler_type} "
                     "--attempt 1 {use_threads} --max-inventory-time 0 "
                     "{overwrite_config} {rules} --nocolor "
                     "--notemp --no-hooks --nolock " % self.snakefile) +
                    self.get_set_threads_args() + self.get_set_scatter_args())

        # Set preemptible instances
        self._set_preemptible_rules(preemption_default, preemptible_rules)

        # IMPORTANT: using Compute Engine API and not k8s == no support secrets
        self.envvars = list(self.workflow.envvars) or []

        # Quit early if we can't authenticate
        self._get_services()
        self._get_bucket()

        # Akin to Kubernetes, create a run namespace, default container image
        self.run_namespace = str(uuid.uuid4())
        self.container_image = container_image or get_container_image()
        self.regions = regions or ["us-east1", "us-west1", "us-central1"]

        # The project name is required, either from client or environment
        self.project = (os.environ.get("GOOGLE_CLOUD_PROJECT")
                        or self._bucket_service.project)

        # Determine API location based on user preference, and then regions
        self._set_location(location)

        # Tell the user right away the regions, location, and container
        logger.debug("regions=%s" % self.regions)
        logger.debug("location=%s" % self.location)
        logger.debug("container=%s" % self.container_image)

        # Keep track of build packages to clean up shutdown, and generate
        self._build_packages = set()
        targz = self._generate_build_source_package()
        self._upload_build_source_package(targz)

        # Save default resources to add later, since we need to add custom
        # default resources depending on the instance requested
        self.default_resources = self.workflow.default_resources
        self.workflow.default_resources.args = None

        super().__init__(
            workflow,
            dag,
            None,
            jobname=jobname,
            printreason=printreason,
            quiet=quiet,
            printshellcmds=printshellcmds,
            latency_wait=latency_wait,
            restart_times=restart_times,
            exec_job=exec_job,
            assume_shared_fs=False,
            max_status_checks_per_second=10,
        )
Пример #39
0
    def _set_location(self, location=None):
        """The location is where the Google Life Sciences API is located.
        This can be meaningful if the requester has data residency
        requirements or multi-zone needs. To determine this value,
        we first use the locations API to determine locations available,
        and then compare them against:

        1. user specified location or prefix
        2. regions having the same prefix
        3. if cannot be satisifed, we throw an error.
        """
        # Derive available locations
        # See https://cloud.google.com/life-sciences/docs/concepts/locations
        locations = (self._api.projects().locations().list(
            name="projects/{}".format(self.project)).execute())

        locations = {
            x["locationId"]: x["name"]
            for x in locations.get("locations", [])
        }

        # Alert the user about locations available
        logger.debug("locations-available:\n%s" % "\n".join(locations))

        # If no locations, there is something wrong
        if not locations:
            raise WorkflowError(
                "No locations found for Google Life Sciences API.")

        # First pass, attempt to match the user-specified location (or prefix)
        if location:
            if location in locations:
                self.location = locations[location]
                return

            # It could be that a prefix was provided
            for contender in locations:
                if contender.startswith(location):
                    self.location = locations[contender]
                    return

            # If we get here and no match, alert user.
            raise WorkflowError(
                "Location or prefix requested %s is not available." % location)

        # If we get here, we need to select location from regions
        for region in self.regions:
            if region in locations:
                self.location = locations[region]
                return

        # If we get here, choose based on prefix
        prefixes = set([r.split("-")[0] for r in self.regions])
        regexp = "^(%s)" % "|".join(prefixes)
        for location in locations:
            if re.search(regexp, location):
                self.location = locations[location]
                return

        # If we get here, total failure of finding location
        raise WorkflowError(
            " No locations available for regions!"
            " Please specify a location with --google-lifesciences-location "
            " or extend --google-lifesciences-regions to find a Life Sciences location."
        )
Пример #40
0
def set_temporary_output(*rules):
    """Set the output of rules to temporary"""
    for rule in rules:
        logger.debug(
            "setting output of rule '{rule}' to temporary".format(rule=rule))
        rule.temp_output = set(rule.output)
Пример #41
0
def set_protected_output(*rules):
    """Set the output of rules to protected"""
    for rule in rules:
        logger.debug(
            "setting output of rule '{rule}' to protected".format(rule=rule))
        rule.protected_output = set(rule.output)