Exemplo n.º 1
0
    def test_update_job_status(self):
        """
        test whether job status is changed
        """
        mgmt = self.config.get("management", {})
        uri = mgmt.get("database_uri", None)
        job_name = mgmt.get("job_name", None)
        update_job_status(self.config, status=EStatus.RUN)

        engine = create_engine(uri)
        Session = sessionmaker(bind=engine)
        session = Session()
        y1 = [
            x.__dict__
            for x in session.query(ComputeJob).filter_by(name=job_name).all()
        ][0]
        self.assertEqual(mgmt["job_name"], y1["name"])
        self.assertEqual(EStatus.RUN, y1["status"])

        update_job_status(self.config, status=EStatus.DONE)
        y2 = [
            x.__dict__
            for x in session.query(ComputeJob).filter_by(name=job_name).all()
        ][0]
        self.assertEqual(EStatus.DONE, y2["status"])
Exemplo n.º 2
0
    def _handler(signal_, frame):
        # set job status to terminated in database
        update_job_status(config, status=EStatus.TERM)

        # create file flag that job was terminated
        with open(prefix + ".terminated", "w") as f:
            f.write("SIGNAL: {}\n".format(signal_))

        # terminate program
        sys.exit(1)
Exemplo n.º 3
0
def execute(**config):
    """
    Execute a pipeline configuration

    Parameters
    ----------
    **config
        Input configuration for pipeline
        (see pipeline config files for
        example of how this should look like)

    Returns
    -------
    global_state : dict
        Global output state of pipeline
    """
    check_required(config, ["pipeline", "stages", "global"])

    # check if valid pipeline was selected
    if config["pipeline"] not in PIPELINES:
        raise InvalidParameterError("Not a valid pipeline selection. "
                                    "Valid choices are:\n{}".format(", ".join(
                                        PIPELINES.keys())))

    stages = config["stages"]
    if stages is None:
        raise InvalidParameterError("No stages defined, need at least one.")

    # get definition of selected pipeline
    pipeline = PIPELINES[config["pipeline"]]
    prefix = config["global"]["prefix"]

    # make sure output directory exists
    create_prefix_folders(prefix)

    # this is the global state of results as
    # we move through different stages of
    # the pipeline
    global_state = config["global"]

    # keep track of how many stages are still
    # to be run, so we can leave out stages at
    # the end of workflow below
    num_stages_to_run = len(stages)

    # set job status to running
    update_job_status(config, status=EStatus.RUN)

    # iterate through individual stages
    for (stage, runner, key_prefix) in pipeline:
        # check if anything else is left to
        # run, otherwise skip
        if num_stages_to_run == 0:
            break

        # check if config for stage is there
        check_required(config, [stage])

        # output files for stage into an individual folder
        stage_prefix = insert_dir(prefix, stage)
        create_prefix_folders(stage_prefix)

        # config files for input and output of stage
        stage_incfg = "{}_{}.incfg".format(stage_prefix, stage)
        stage_outcfg = "{}_{}.outcfg".format(stage_prefix, stage)

        # update current stage of job
        update_job_status(config, stage=stage)

        # check if stage should be executed
        if stage in stages:
            # global state inserted at end, overrides any
            # stage-specific settings (except for custom prefix)
            incfg = {
                **config["tools"],
                **config["databases"],
                **config[stage],
                **global_state, "prefix": stage_prefix
            }
            # save input of stage in config file
            write_config_file(stage_incfg, incfg)

            # run stage
            outcfg = runner(**incfg)

            # prefix output keys if this parameter is
            # given in stage configuration, to avoid
            # name clashes if same protocol run multiple times
            if key_prefix is not None:
                outcfg = {key_prefix + k: v for k, v in outcfg.items()}

            # save output of stage in config file
            write_config_file(stage_outcfg, outcfg)

            # one less stage to put through after we ran this...
            num_stages_to_run -= 1
        else:
            # skip state by injecting state from previous run
            verify_resources(
                "Trying to skip, but output configuration "
                "for stage '{}' does not exist. Has it already "
                "been run?".format(stage, stage), stage_outcfg)

            # read output configuration
            outcfg = read_config_file(stage_outcfg)

            # verify all the output files are there
            outfiles = [
                filepath for f, filepath in outcfg.items()
                if f.endswith("_file") and filepath is not None
            ]

            verify_resources(
                "Output files from stage '{}' "
                "missing".format(stage), *outfiles)

        # update global state with outputs of stage
        global_state = {**global_state, **outcfg}

    # create results archive
    archive_file = prefix + ".tar.gz"
    create_archive(config, global_state, archive_file)
    global_state["archive_file"] = archive_file

    # delete selected output files if requested
    global_state = delete_outputs(config, global_state)

    # write final global state of pipeline
    write_config_file(prefix + FINAL_CONFIG_SUFFIX, global_state)

    # set job status to done
    update_job_status(config, status=EStatus.DONE)

    return global_state
Exemplo n.º 4
0
def execute_wrapped(**config):
    """
    Execute a pipeline configuration in "wrapped"
    mode that handles external interruptions and
    exceptions and documents these using files
    (.finished, .terminated, .failed), as well
    as documenting failure in a job database

    Parameters
    ----------
    **config
        Input configuration for pipeline
        (see pipeline config files for
        example of how this should look like)

    Returns
    -------
    outcfg : dict
        Global output state of pipeline
    """
    # make sure the prefix in configuration is valid
    try:
        prefix = verify_prefix(**config)
    except Exception:
        update_job_status(config, status=EStatus.TERM)
        raise

    # delete terminated/failed flags from previous
    # executions of pipeline
    for ext in [
            EXTENSION_FAILED,
            EXTENSION_TERMINATED,
            EXTENSION_DONE,
    ]:
        try:
            os.remove(prefix + ext)
        except OSError:
            pass

    # handler for external interruptions
    # (needs config for database access)
    def _handler(signal_, frame):
        # set job status to terminated in database
        update_job_status(config, status=EStatus.TERM)

        # create file flag that job was terminated
        with open(prefix + ".terminated", "w") as f:
            f.write("SIGNAL: {}\n".format(signal_))

        # terminate program
        sys.exit(1)

    # set up handlers for job termination
    # (note that this list may not be complete and may
    # need extension for other computing environments)
    for sig in [signal.SIGINT, signal.SIGTERM, signal.SIGUSR1, signal.SIGUSR2]:
        signal.signal(sig, _handler)

    try:
        # execute configuration
        outcfg = execute(**config)

        # if we made it here, job was sucessfully run to completing
        # create file flag that job was terminated
        with open(prefix + ".finished", "w") as f:
            f.write(repr(outcfg))

        return outcfg

    except Exception as e:
        # set status in database to failed
        update_job_status(config, status=EStatus.FAIL)

        # create failed file flag
        with open(prefix + ".failed", "w") as f:
            f.write(traceback.format_exc())

        # raise exception again after we updated status
        raise