Пример #1
0
def parmake_job2_new_process(args):
    """ Starts the job in a new compmake process. """
    (job_id, context) = args
    compmake_bin = which("compmake")

    db = context.get_compmake_db()
    storage = db.basepath  # XXX:
    where = os.path.join(storage, "parmake_job2_new_process")
    if not os.path.exists(storage):
        try:
            os.makedirs(storage)
        except:
            pass

    out_result = os.path.join(where, "%s.results.pickle" % job_id)
    out_result = os.path.abspath(out_result)
    cmd = [compmake_bin, storage]

    if not all_disabled():
        cmd += ["--contracts"]

    cmd += [
        "--status_line_enabled",
        "0",
        "--colorize",
        "0",
        "-c",
        "make_single out_result=%s %s" % (out_result, job_id),
    ]

    cwd = os.getcwd()
    cmd_res = system_cmd_result(
        cwd, cmd, display_stdout=False, display_stderr=False, raise_on_error=False, capture_keyboard_interrupt=False
    )
    ret = cmd_res.ret

    if ret == CompmakeConstants.RET_CODE_JOB_FAILED:  # XXX:
        msg = "Job %r failed in external process" % job_id
        msg += indent(cmd_res.stdout, "stdout| ")
        msg += indent(cmd_res.stderr, "stderr| ")

        res = safe_pickle_load(out_result)
        os.unlink(out_result)
        result_dict_check(res)

        raise JobFailed.from_dict(res)

    elif ret != 0:
        msg = "Host failed while doing %r" % job_id
        msg += "\n cmd: %s" % " ".join(cmd)
        msg += "\n" + indent(cmd_res.stdout, "stdout| ")
        msg += "\n" + indent(cmd_res.stderr, "stderr| ")
        raise CompmakeBug(msg)  # XXX:

    res = safe_pickle_load(out_result)
    os.unlink(out_result)
    result_dict_check(res)
    return res
Пример #2
0
def result_dict_raise_if_error(res):
    from compmake.exceptions import JobFailed
    from compmake.exceptions import HostFailed
    from compmake.exceptions import CompmakeBug
    from compmake.exceptions import JobInterrupted

    result_dict_check(res)

    if 'fail' in res:
        raise JobFailed.from_dict(res)

    if 'abort' in res:
        raise HostFailed.from_dict(res)

    if 'bug' in res:
        raise CompmakeBug.from_dict(res)

    if 'interrupted' in res:
        raise JobInterrupted.from_dict(res)
Пример #3
0
def result_dict_raise_if_error(res):
    from compmake.exceptions import JobFailed
    from compmake.exceptions import HostFailed
    from compmake.exceptions import CompmakeBug
    from compmake.exceptions import JobInterrupted

    result_dict_check(res)

    if 'fail' in res:
        raise JobFailed.from_dict(res)

    if 'abort' in res:
        raise HostFailed.from_dict(res)

    if 'bug' in res:
        raise CompmakeBug.from_dict(res)

    if 'interrupted' in res:
        raise JobInterrupted.from_dict(res)
Пример #4
0
def mvac_job(args):
    """
    args = tuple job_id, context,  queue_name, show_events
        
    Returns a dictionary with fields "user_object", "new_jobs", 'delete_jobs'.
    "user_object" is set to None because we do not want to 
    load in our thread if not necessary. Sometimes it is necessary
    because it might contain a Promise. 
   
    """
    job_id, context, event_queue_name, show_output, volumes, cwd = args  # @UnusedVariable
    check_isinstance(job_id, str)
    check_isinstance(event_queue_name, str)
    
    # Disable multyvac logging
    disable_logging_if_config(context)
    
    db = context.get_compmake_db()
    job = get_job(job_id=job_id, db=db)

    if job.needs_context:
        msg = 'Cannot use multyvac for dynamic job.'
        raise CompmakeException(msg)

    time_start = time.time()

    multyvac_job = mvac_instance(db, job_id, volumes, cwd)
    multyvac_job.wait()
    
    errors = [multyvac_job.status_error, multyvac_job.status_killed]
    if multyvac_job.status in errors:
        e = 'Multyvac error (status: %r)' % multyvac_job.status 
        bt = str(multyvac_job.stderr)

        cache = Cache(Cache.FAILED)
        cache.exception = e
        cache.backtrace = bt
        cache.timestamp = time.time()
        cache.captured_stderr = str(multyvac_job.stderr)
        cache.captured_stdout = str(multyvac_job.stdout)
        set_job_cache(job_id, cache, db=db)

        raise JobFailed(job_id=job_id, reason=str(e), bt=bt)
        
    user_object = multyvac_job.result

    user_object_deps = collect_dependencies(user_object)
    set_job_userobject(job_id, user_object, db=db)
    
    cache = get_job_cache(job_id, db=db)
    cache.captured_stderr = str(multyvac_job.stderr)
    cache.captured_stdout = str(multyvac_job.stdout)

    cache.state = Cache.DONE
    cache.timestamp = time.time()
    walltime = cache.timestamp - time_start
    cache.walltime_used = walltime
    cache.cputime_used = multyvac_job.cputime_system
    cache.host = 'multyvac'
    cache.jobs_defined = set()
    set_job_cache(job_id, cache, db=db)
    
    result_dict = dict(user_object=user_object,
                user_object_deps=user_object_deps, 
                new_jobs=[], deleted_jobs=[])
    result_dict_check(result_dict)
    return result_dict
Пример #5
0
def make(job_id, context, echo=False):  # @UnusedVariable
    """
        Makes a single job.

        Returns a dictionary with fields:
             "user_object"
             "user_object_deps" = set of Promises
             "new_jobs" -> new jobs defined
             "deleted_jobs" -> jobs that were defined but not anymore

        Raises JobFailed
        or JobInterrupted. Also SystemExit, KeyboardInterrupt, MemoryError are
        captured.
    """
    db = context.get_compmake_db()

    int_make = IntervalTimer()

    host = 'hostname'  # XXX

    if get_compmake_config('set_proc_title'):
        setproctitle('cm-%s' % job_id)

    # TODO: should we make sure we are up to date???
    #     up, reason = up_to_date(job_id, db=db)  # @UnusedVariable
    #     if up:
    #         msg = 'Job %r appears already done.' % job_id
    #         msg += 'This can only happen if another compmake process uses the ' \
    #                'same DB.'
    # logger.error(msg)
    #         user_object = get_job_userobject(job_id, db=db)
    #         # XXX: this is not right anyway
    #         return dict(user_object=user_object,
    #                     user_object_deps=collect_dependencies(user_object),
    #                     deleted_jobs=[],
    #                     new_jobs=[])

    job = get_job(job_id, db=db)
    cache = get_job_cache(job_id, db=db)

    if cache.state == Cache.DONE:
        prev_defined_jobs = set(cache.jobs_defined)
        # print('%s had previously defined %s' % (job_id, prev_defined_jobs))
    else:
        # print('%s was not DONE' % job_id)
        prev_defined_jobs = None

    # Note that at this point we save important information in the Cache
    # so if we set this then it's going to destroy it
    # cache.state = Cache.IN _ PROGRESS
    # set_job_cache(job_id, cache, db=db)

    # TODO: delete previous user object

    def progress_callback(stack):
        publish(context,
                'job-progress-plus',
                job_id=job_id,
                host=host,
                stack=stack)

    init_progress_tracking(progress_callback)

    disable_capture = False
    if disable_capture:
        capture = None
    else:
        echo = False
        capture = OutputCapture(
            context=context,
            prefix=job_id,
            # This is instantaneous echo and should be False
            # They will generate events anyway.
            echo_stdout=echo,
            echo_stderr=echo)

    # TODO: add whether we should just capture and not echo
    old_emit = logging.StreamHandler.emit

    from compmake.ui.coloredlog import colorize_loglevel

    FORMAT = "%(name)10s|%(filename)15s:%(lineno)-4s - %(funcName)-15s| %(message)s"

    formatter = Formatter(FORMAT)

    class Store(object):
        nhidden = 0

    def my_emit(_, log_record):
        # note that log_record.msg might be an exception
        try:
            try:
                s = str(log_record.msg)
            except UnicodeEncodeError:
                s = unicode(log_record.msg)
            except:
                s = 'Could not print log_record %s' % id(log_record)
            log_record.msg = colorize_loglevel(log_record.levelno, s)
            res = formatter.format(log_record)
            print(res)
            # this will be captured by OutputCapture anyway
        except:
            Store.nhidden += 1

    logging.StreamHandler.emit = my_emit

    already = set(context.get_jobs_defined_in_this_session())

    def get_deleted_jobs():
        generated = set(context.get_jobs_defined_in_this_session()) - already
        # print('failure: rolling back %s' % generated)

        from compmake.ui.ui import delete_jobs_recurse_definition

        todelete_ = set()
        # delete the jobs that were previously defined
        if prev_defined_jobs:
            todelete_.update(prev_defined_jobs)
        # and also the ones that were generated
        todelete_.update(generated)

        deleted_jobs_ = delete_jobs_recurse_definition(jobs=todelete_, db=db)
        # now we failed, so we need to roll back other changes
        # to the db
        return deleted_jobs_

    try:
        result = job_compute(job=job, context=context)

        assert isinstance(result, dict) and len(result) == 5
        user_object = result['user_object']
        new_jobs = result['new_jobs']
        int_load_results = result['int_load_results']
        int_compute = result['int_compute']
        int_gc = result['int_gc']
        int_gc.stop()

    except KeyboardInterrupt as e:
        bt = traceback.format_exc()
        deleted_jobs = get_deleted_jobs()
        mark_as_failed(job_id,
                       'KeyboardInterrupt: ' + str(e),
                       backtrace=bt,
                       db=db)

        cache = get_job_cache(job_id, db=db)
        if capture is not None:
            cache.captured_stderr = capture.get_logged_stderr()
            cache.captured_stdout = capture.get_logged_stdout()
        else:
            msg = '(Capture turned off.)'
            cache.captured_stderr = msg
            cache.captured_stdout = msg
        set_job_cache(job_id, cache, db=db)

        raise JobInterrupted(job_id=job_id, deleted_jobs=deleted_jobs)

    except (BaseException, ArithmeticError, BufferError, LookupError,
            Exception, SystemExit, MemoryError) as e:
        bt = traceback.format_exc()
        if six.PY2:
            s = '%s: %s' % (type(e).__name__, e)
            #
            # s = type(e).__name__ + ': ' + e.__str__().strip()
            # try:
            #     s = s.decode('utf-8', 'replace').encode('utf-8', 'replace')
            # except (UnicodeDecodeError, UnicodeEncodeError) as ue:
            #     print(ue)  # XXX
            #     s = 'Could not represent string.'
        else:
            s = '%s: %s' % (type(e).__name__, e)
        mark_as_failed(job_id, s, backtrace=bt, db=db)
        deleted_jobs = get_deleted_jobs()

        cache = get_job_cache(job_id, db=db)
        if capture is not None:
            cache.captured_stderr = capture.get_logged_stderr()
            cache.captured_stdout = capture.get_logged_stdout()
        else:
            msg = '(Capture turned off.)'
            cache.captured_stderr = msg
            cache.captured_stdout = msg

        set_job_cache(job_id, cache, db=db)

        raise JobFailed(job_id=job_id,
                        reason=s,
                        bt=bt,
                        deleted_jobs=deleted_jobs)
    finally:
        int_finally = IntervalTimer()
        if capture is not None:
            capture.deactivate()
        # even if we send an error, let's save the output of the process
        logging.StreamHandler.emit = old_emit
        if Store.nhidden > 0:
            msg = 'compmake: There were %d messages hidden due to bugs in logging.' % Store.nhidden
            print(msg)
        int_finally.stop()
    #        print('finally: %s' % int_finally)

    int_save_results = IntervalTimer()

    # print('Now %s has defined %s' % (job_id, new_jobs))
    if prev_defined_jobs is not None:
        # did we defined fewer jobs this time around?
        # then we need to delete them
        todelete = set()
        for x in prev_defined_jobs:
            if x not in new_jobs:
                todelete.add(x)
        from compmake.ui.ui import delete_jobs_recurse_definition
        deleted_jobs = delete_jobs_recurse_definition(jobs=todelete, db=db)
    else:
        deleted_jobs = set()

    # print('Now %s has deleted %s' % (job_id, deleted_jobs))

    set_job_userobject(job_id, user_object, db=db)
    int_save_results.stop()

    #    logger.debug('Save time for %s: %s s' % (job_id, walltime_save_result))

    int_make.stop()
    end_time = time()

    cache = Cache(Cache.DONE)

    #    print('int_make: %s' % int_make)
    #    print('int_load_results: %s' % int_load_results)
    #    print('int_compute: %s' % int_compute)
    if int_gc.get_walltime_used() > 1.0:
        logger.warning(
            'Expensive garbage collection detected at the end of %s: %s' %
            (job_id, int_gc))
    #    print('int_save_results: %s' % int_save_results)

    cache.int_make = int_make
    cache.int_load_results = int_load_results
    cache.int_compute = int_compute
    cache.int_gc = int_gc
    cache.int_save_results = int_save_results

    cache.timestamp = end_time

    cache.walltime_used = int_make.get_walltime_used()
    cache.cputime_used = int_make.get_cputime_used()
    cache.host = host
    cache.jobs_defined = new_jobs
    set_job_cache(job_id, cache, db=db)

    return dict(user_object=user_object,
                user_object_deps=collect_dependencies(user_object),
                new_jobs=new_jobs,
                deleted_jobs=deleted_jobs)
Пример #6
0
def parmake_job2_new_process(args):
    """ Starts the job in a new compmake process. """
    (job_id, context) = args
    compmake_bin = which('compmake')

    db = context.get_compmake_db()
    storage = db.basepath  # XXX:
    where = os.path.join(storage, 'parmake_job2_new_process')
    if not os.path.exists(storage):
        try:
            os.makedirs(storage)
        except:
            pass

    out_result = os.path.join(where, '%s.results.pickle' % job_id)
    out_result = os.path.abspath(out_result)
    cmd = [compmake_bin, storage]

    if not all_disabled():
        cmd += ['--contracts']

    cmd += [
        '--status_line_enabled',
        '0',
        '--colorize',
        '0',
        '-c',
        'make_single out_result=%s %s' % (out_result, job_id),
    ]

    cwd = os.getcwd()
    cmd_res = system_cmd_result(cwd,
                                cmd,
                                display_stdout=False,
                                display_stderr=False,
                                raise_on_error=False,
                                capture_keyboard_interrupt=False)
    ret = cmd_res.ret

    if ret == CompmakeConstants.RET_CODE_JOB_FAILED:  # XXX:
        msg = 'Job %r failed in external process' % job_id
        msg += indent(cmd_res.stdout, 'stdout| ')
        msg += indent(cmd_res.stderr, 'stderr| ')

        res = safe_pickle_load(out_result)
        os.unlink(out_result)
        result_dict_check(res)

        raise JobFailed.from_dict(res)

    elif ret != 0:
        msg = 'Host failed while doing %r' % job_id
        msg += '\n cmd: %s' % " ".join(cmd)
        msg += '\n' + indent(cmd_res.stdout, 'stdout| ')
        msg += '\n' + indent(cmd_res.stderr, 'stderr| ')
        raise CompmakeBug(msg)  # XXX:

    res = safe_pickle_load(out_result)
    os.unlink(out_result)
    result_dict_check(res)
    return res