Пример #1
0
def api_runCancel():
    jid = None
    try:
        req = http_request.parse_post(id=True,
                                      model=True,
                                      check_sim_exists=True)
        jid = req.sim_data.parse_jid(req.req_data)
        # TODO(robnagler) need to have a way of listing jobs
        # Don't bother with cache_hit check. We don't have any way of canceling
        # if the parameters don't match so for now, always kill.
        #TODO(robnagler) mutex required
        if runner.job_is_processing(jid):
            run_dir = simulation_db.simulation_run_dir(req.req_data)
            # Write first, since results are write once, and we want to
            # indicate the cancel instead of the termination error that
            # will happen as a result of the kill.
            try:
                simulation_db.write_result({'state': 'canceled'},
                                           run_dir=run_dir)
            except Exception as e:
                if not pykern.pkio.exception_is_not_found(e):
                    raise
                # else: run_dir may have been deleted
            runner.job_kill(jid)
            # TODO(robnagler) should really be inside the template (t.cancel_simulation()?)
            # the last frame file may not be finished, remove it
            t = sirepo.template.import_module(req.req_data)
            if hasattr(t, 'remove_last_frame'):
                t.remove_last_frame(run_dir)
    except Exception as e:
        pkdlog('ignoring exception={} jid={} stack={}', e, jid, pkdexc())
    # Always true from the client's perspective
    return http_reply.gen_json({'state': 'canceled'})
Пример #2
0
def api_runCancel():
    data = _parse_data_input()
    jid = simulation_db.job_id(data)
    if feature_config.cfg.runner_daemon:
        jhash = template_common.report_parameters_hash(data)
        run_dir = simulation_db.simulation_run_dir(data)
        runner_client.cancel_report_job(run_dir, jhash)
        # Always true from the client's perspective
        return http_reply.gen_json({'state': 'canceled'})
    else:
        # TODO(robnagler) need to have a way of listing jobs
        # Don't bother with cache_hit check. We don't have any way of canceling
        # if the parameters don't match so for now, always kill.
        #TODO(robnagler) mutex required
        if runner.job_is_processing(jid):
            run_dir = simulation_db.simulation_run_dir(data)
            # Write first, since results are write once, and we want to
            # indicate the cancel instead of the termination error that
            # will happen as a result of the kill.
            simulation_db.write_result({'state': 'canceled'}, run_dir=run_dir)
            runner.job_kill(jid)
            # TODO(robnagler) should really be inside the template (t.cancel_simulation()?)
            # the last frame file may not be finished, remove it
            t = sirepo.template.import_module(data)
            if hasattr(t, 'remove_last_frame'):
                t.remove_last_frame(run_dir)
        # Always true from the client's perspective
        return http_reply.gen_json({'state': 'canceled'})
Пример #3
0
def _simulation_run_status(data, quiet=False):
    """Look for simulation status and output

    Args:
        data (dict): request
        quiet (bool): don't write errors to log

    Returns:
        dict: status response
    """
    try:
        #TODO(robnagler): Lock
        rep = simulation_db.report_info(data)
        is_processing = runner.job_is_processing(rep.job_id)
        is_running = rep.job_status in _RUN_STATES
        res = {'state': rep.job_status}
        pkdc(
            '{}: is_processing={} is_running={} state={} cached_data={}',
            rep.job_id,
            is_processing,
            is_running,
            rep.job_status,
            bool(rep.cached_data),
        )
        if is_processing and not is_running:
            runner.job_race_condition_reap(rep.job_id)
            pkdc('{}: is_processing and not is_running', rep.job_id)
            is_processing = False
        template = sirepo.template.import_module(data)
        if is_processing:
            if not rep.cached_data:
                return _simulation_error(
                    'input file not found, but job is running',
                    rep.input_file,
                )
        else:
            is_running = False
            if rep.run_dir.exists():
                if hasattr(template,
                           'prepare_output_file') and 'models' in data:
                    template.prepare_output_file(rep.run_dir, data)
                res2, err = simulation_db.read_result(rep.run_dir)
                if err:
                    if simulation_db.is_parallel(data):
                        # allow parallel jobs to use template to parse errors below
                        res['state'] = 'error'
                    else:
                        if hasattr(template, 'parse_error_log'):
                            res = template.parse_error_log(rep.run_dir)
                            if res:
                                return res
                        return _simulation_error(err, 'error in read_result',
                                                 rep.run_dir)
                else:
                    res = res2
        if simulation_db.is_parallel(data):
            new = template.background_percent_complete(
                rep.model_name,
                rep.run_dir,
                is_running,
            )
            new.setdefault('percentComplete', 0.0)
            new.setdefault('frameCount', 0)
            res.update(new)
        res['parametersChanged'] = rep.parameters_changed
        if res['parametersChanged']:
            pkdlog(
                '{}: parametersChanged=True req_hash={} cached_hash={}',
                rep.job_id,
                rep.req_hash,
                rep.cached_hash,
            )
        #TODO(robnagler) verify serial number to see what's newer
        res.setdefault('startTime', _mtime_or_now(rep.input_file))
        res.setdefault('lastUpdateTime', _mtime_or_now(rep.run_dir))
        res.setdefault('elapsedTime', res['lastUpdateTime'] - res['startTime'])
        if is_processing:
            res['nextRequestSeconds'] = simulation_db.poll_seconds(
                rep.cached_data)
            res['nextRequest'] = {
                'report': rep.model_name,
                'reportParametersHash': rep.cached_hash,
                'simulationId': rep.cached_data['simulationId'],
                'simulationType': rep.cached_data['simulationType'],
            }
        pkdc(
            '{}: processing={} state={} cache_hit={} cached_hash={} data_hash={}',
            rep.job_id,
            is_processing,
            res['state'],
            rep.cache_hit,
            rep.cached_hash,
            rep.req_hash,
        )
    except Exception:
        return _simulation_error(pkdexc(), quiet=quiet)
    return res
Пример #4
0
def _simulation_run_status(req, quiet=False):
    """Look for simulation status and output

    Args:
        req (dict): parsed simulation data
        quiet (bool): don't write errors to log

    Returns:
        dict: status response
    """
    reqd = _reqd(req)
    in_run_simulation = 'models' in req.req_data
    if in_run_simulation:
        req.req_data.models.computeJobCacheKey = PKDict(
            computeJobHash=reqd.req_hash, )
    is_processing = runner.job_is_processing(reqd.jid)
    is_running = reqd.job_status in _RUN_STATES
    res = PKDict(state=reqd.job_status)
    pkdc(
        '{}: is_processing={} is_running={} state={} cached_data={}',
        reqd.jid,
        is_processing,
        is_running,
        reqd.job_status,
        bool(reqd.cached_data),
    )
    if is_processing and not is_running:
        runner.job_race_condition_reap(reqd.jid)
        pkdc('{}: is_processing and not is_running', reqd.jid)
        is_processing = False
    template = sirepo.template.import_module(req.type)
    if is_processing:
        if not reqd.cached_data:
            return _subprocess_error(
                error='input file not found, but job is running',
                input_file=reqd.input_file,
            )
    else:
        is_running = False
        if reqd.run_dir.exists():
            res = simulation_db.read_result(reqd.run_dir)
            if res.state == sirepo.job.ERROR:
                return _subprocess_error(
                    error='read_result error: ' +
                    res.get('error', '<no error in read_result>'),
                    run_dir=reqd.run_dir,
                )
            if (in_run_simulation and res.state == sirepo.job.COMPLETED
                    and hasattr(template, 'prepare_output_file')):
                template.prepare_output_file(reqd.run_dir, req.req_data)
                res = simulation_db.read_result(reqd.run_dir)
    if reqd.is_parallel:
        new = template.background_percent_complete(
            reqd.model_name,
            reqd.run_dir,
            is_running,
        )
        new.setdefault('percentComplete', 0.0)
        new.setdefault('frameCount', 0)
        res.update(new)
    res['parametersChanged'] = reqd.parameters_changed
    if res['parametersChanged']:
        pkdlog(
            '{}: parametersChanged=True req_hash={} cached_hash={}',
            reqd.jid,
            reqd.req_hash,
            reqd.cached_hash,
        )
    if reqd.is_parallel and reqd.cached_data:
        s = reqd.cached_data.models.computeJobCacheKey
        t = s.get('computeJobSerial', 0)
        res.pksetdefault(
            computeJobHash=s.computeJobHash,
            computeJobSerial=t,
            elapsedTime=lambda: int(
                (res.get('lastUpdateTime') or _mtime_or_now(reqd.run_dir)) - t
                if t else 0, ),
        )
    if is_processing:
        res.nextRequestSeconds = reqd.sim_data.poll_seconds(reqd.cached_data)
        res.nextRequest = PKDict(
            report=reqd.model_name,
            simulationId=reqd.cached_data.simulationId,
            simulationType=reqd.cached_data.simulationType,
            **reqd.cached_data.models.computeJobCacheKey)
    pkdc(
        '{}: processing={} state={} cache_hit={} cached_hash={} data_hash={}',
        reqd.jid,
        is_processing,
        res['state'],
        reqd.cache_hit,
        reqd.cached_hash,
        reqd.req_hash,
    )
    return res