Exemplo n.º 1
0
def execute_with_context(db, context, job_id, command, args, kwargs):
    """ Returns a dictionary with fields "user_object" and "new_jobs" """
    from compmake.context import Context

    assert isinstance(context, Context)
    from compmake.jobs.storage import get_job

    cur_job = get_job(job_id=job_id, db=db)
    context.currently_executing = cur_job.defined_by + [job_id]

    already = set(context.get_jobs_defined_in_this_session())
    context.reset_jobs_defined_in_this_session([])

    if args:
        if isinstance(args[0], Context) and args[0] != context:
            msg = ('%s(%s, %s)' % (command, args, kwargs))
            raise ValueError(msg)

    # context is one of the arguments
    assert context in args

    res = command(*args, **kwargs)

    generated = set(context.get_jobs_defined_in_this_session())
    context.reset_jobs_defined_in_this_session(already)
    return dict(user_object=res, new_jobs=generated)
Exemplo n.º 2
0
def execute_with_context(db, context, job_id, command, args, kwargs):
    """ Returns a dictionary with fields "user_object" and "new_jobs" """
    from compmake.context import Context

    assert isinstance(context, Context)
    from compmake.jobs.storage import get_job

    cur_job = get_job(job_id=job_id, db=db)
    context.currently_executing = cur_job.defined_by + [job_id]

    already = set(context.get_jobs_defined_in_this_session())
    context.reset_jobs_defined_in_this_session([])

    if args:
        if isinstance(args[0], Context) and args[0] != context:
            msg = ('%s(%s, %s)' % (command, args, kwargs))
            raise ValueError(msg)

    # context is one of the arguments
    assert context in args

    res = command(*args, **kwargs)

    generated = set(context.get_jobs_defined_in_this_session())
    context.reset_jobs_defined_in_this_session(already)
    return dict(user_object=res, new_jobs=generated)
Exemplo n.º 3
0
def display_stats(job_list):
    
    states_order = [Cache.NOT_STARTED, Cache.IN_PROGRESS,
              Cache.MORE_REQUESTED, Cache.FAILED,
              Cache.DONE]
    # initialize counters to 0
    states2count = dict(map(lambda x: (x, 0), states_order))

    function2state2count = {}
    total = 0
    
    for job_id in job_list:
        
        cache = get_job_cache(job_id)   
        states2count[cache.state] += 1
        total += 1
        
        function_id = get_job(job_id).command_desc
        # initialize record if not present
        if not function_id in function2state2count:
            function2state2count[function_id] = dict(map(lambda x: (x, 0), states_order) + 
                                                     [('all', 0)])
        # update
        function2state2count[function_id][cache.state] += 1
        function2state2count[function_id]['all'] += 1
        
        if total == 100: # XXX: use standard method
            info("Loading a large number of jobs...")
    
    print("Found %s jobs in total. Summary by state:" % total)
        
    for state in states_order:
        desc = "%30s" % Cache.state2desc[state]
        # colorize output
        desc = colored(desc, **state2color[state])

        num = states2count[state]
        if num > 0:
            print("%s: %5d" % (desc, num))
          
    print("Summary by function:")

    for function_id, function_stats in function2state2count.items():
        ndone = function_stats[Cache.DONE]
        nfailed = function_stats[Cache.FAILED]
        nrest = function_stats['all'] - ndone - nfailed
        failed_s = "%5d failed" % nfailed
        if nfailed > 0:
            failed_s = colored(failed_s, color='red')
        s = "%5d done, %s, %5d to do." % (ndone, failed_s, nrest)
        
        print(" %30s(): %s" % (function_id, s)) 
Exemplo n.º 4
0
def direct_uptodate_deps(job_id, db):
    """ Returns all direct 'dependencies' of this job:
        the jobs that are children (arguemnts)
        plus the job that defined it (if not root).
    """
    from compmake.jobs.queries import direct_children
    dependencies = direct_children(job_id, db)
    
    # plus jobs that defined it
    from compmake.jobs.storage import get_job
    defined_by = get_job(job_id, db).defined_by
    last = defined_by[-1]
    if last != 'root':
        dependencies.add(last)
    return dependencies
Exemplo n.º 5
0
def direct_uptodate_deps(job_id, db):
    """ Returns all direct 'dependencies' of this job:
        the jobs that are children (arguemnts)
        plus the job that defined it (if not root).
    """
    from compmake.jobs.queries import direct_children
    dependencies = direct_children(job_id, db)

    # plus jobs that defined it
    from compmake.jobs.storage import get_job
    defined_by = get_job(job_id, db).defined_by
    last = defined_by[-1]
    if last != 'root':
        dependencies.add(last)
    return dependencies
Exemplo n.º 6
0
def list_matching_functions(token):
    assert token.endswith('()')
    if len(token) < 3:
        raise UserError('Malformed token "%s".' % token)
    
    function_id = token[:-2] 

    num_matches = 0
    for job_id in all_jobs():
        if function_id.lower() == get_job(job_id).command.__name__.lower():
            yield job_id
            num_matches += 1 

    if num_matches == 0:
        raise UserError('Could not find matches for function "%s()".' % 
                        function_id)
Exemplo n.º 7
0
    def instance_job(self, job_id):
        publish(self.context,
                'worker-status',
                job_id=job_id,
                status='apply_async')
        assert len(self.sub_available) > 0
        name = sorted(self.sub_available)[0]
        self.sub_available.remove(name)
        assert not name in self.sub_processing
        self.sub_processing.add(name)
        sub = self.subs[name]

        self.job2subname[job_id] = name
        self.subname2job[name] = job_id

        job = get_job(job_id, self.db)

        if self.rdb:
            f = mvac_job_rdb
            args = (job_id, self.context, self.event_queue_name,
                    self.show_output, self.volumes, self.rdb_vol.name,
                    self.rdb_db, os.getcwd())
        else:
            if job.needs_context:
                # if self.new_process:
                #     f = parmake_job2_new_process
                #     args = (job_id, self.context)
                #
                # else:
                f = parmake_job2
                args = (job_id, self.context, self.event_queue_name,
                        self.show_output)
            else:
                f = mvac_job
                args = (job_id, self.context, self.event_queue_name,
                        self.show_output, self.volumes, os.getcwd())

        if True:
            async_result = sub.apply_async(f, args)
        else:
            warnings.warn('Debugging synchronously')
            async_result = f(args)

        return async_result
Exemplo n.º 8
0
    def instance_job(self, job_id):
        publish(self.context, 'worker-status', job_id=job_id,
                status='apply_async')
        assert len(self.sub_available) > 0
        name = sorted(self.sub_available)[0]
        self.sub_available.remove(name)
        assert not name in self.sub_processing
        self.sub_processing.add(name)
        sub = self.subs[name]

        self.job2subname[job_id] = name
        self.subname2job[name] = job_id

        job = get_job(job_id, self.db)

        if self.rdb:
            f = mvac_job_rdb
            args = (job_id, self.context,
                    self.event_queue_name, self.show_output,
                    self.volumes, self.rdb_vol.name, self.rdb_db, os.getcwd())            
        else:
            if job.needs_context:
                # if self.new_process:
                #     f = parmake_job2_new_process
                #     args = (job_id, self.context)
                # 
                # else:
                f = parmake_job2
                args = (job_id, self.context,
                        self.event_queue_name, self.show_output)
            else:
                f = mvac_job
                args = (job_id, self.context,
                        self.event_queue_name, self.show_output,
                        self.volumes, os.getcwd())
    
        if True:
            async_result = sub.apply_async(f, args)
        else:
            warnings.warn('Debugging synchronously')
            async_result = f(args)
            
        return async_result
Exemplo n.º 9
0
def mvac_job(args):
    """
    args = tuple job_id, context,  queue_name, show_events
        
    Returns a dictionary with fields "user_object", "new_jobs", 'delete_jobs'.
    "user_object" is set to None because we do not want to 
    load in our thread if not necessary. Sometimes it is necessary
    because it might contain a Promise. 
   
    """
    job_id, context, event_queue_name, show_output, volumes, cwd = args  # @UnusedVariable
    check_isinstance(job_id, str)
    check_isinstance(event_queue_name, str)
    
    # Disable multyvac logging
    disable_logging_if_config(context)
    
    db = context.get_compmake_db()
    job = get_job(job_id=job_id, db=db)

    if job.needs_context:
        msg = 'Cannot use multyvac for dynamic job.'
        raise CompmakeException(msg)

    time_start = time.time()

    multyvac_job = mvac_instance(db, job_id, volumes, cwd)
    multyvac_job.wait()
    
    errors = [multyvac_job.status_error, multyvac_job.status_killed]
    if multyvac_job.status in errors:
        e = 'Multyvac error (status: %r)' % multyvac_job.status 
        bt = str(multyvac_job.stderr)

        cache = Cache(Cache.FAILED)
        cache.exception = e
        cache.backtrace = bt
        cache.timestamp = time.time()
        cache.captured_stderr = str(multyvac_job.stderr)
        cache.captured_stdout = str(multyvac_job.stdout)
        set_job_cache(job_id, cache, db=db)

        raise JobFailed(job_id=job_id, reason=str(e), bt=bt)
        
    user_object = multyvac_job.result

    user_object_deps = collect_dependencies(user_object)
    set_job_userobject(job_id, user_object, db=db)
    
    cache = get_job_cache(job_id, db=db)
    cache.captured_stderr = str(multyvac_job.stderr)
    cache.captured_stdout = str(multyvac_job.stdout)

    cache.state = Cache.DONE
    cache.timestamp = time.time()
    walltime = cache.timestamp - time_start
    cache.walltime_used = walltime
    cache.cputime_used = multyvac_job.cputime_system
    cache.host = 'multyvac'
    cache.jobs_defined = set()
    set_job_cache(job_id, cache, db=db)
    
    result_dict = dict(user_object=user_object,
                user_object_deps=user_object_deps, 
                new_jobs=[], deleted_jobs=[])
    result_dict_check(result_dict)
    return result_dict
Exemplo n.º 10
0
def direct_children(job_id):
    ''' Returns the direct children (dependences) of the specified job '''
    assert(isinstance(job_id, str))
    computation = get_job(job_id)
    return computation.children
Exemplo n.º 11
0
    def get_job(self, job_id):
        from .storage import get_job

        return get_job(job_id, db=self.db)
Exemplo n.º 12
0
def mvac_job(args):
    """
    args = tuple job_id, context,  queue_name, show_events
        
    Returns a dictionary with fields "user_object", "new_jobs", 'delete_jobs'.
    "user_object" is set to None because we do not want to 
    load in our thread if not necessary. Sometimes it is necessary
    because it might contain a Promise. 
   
    """
    job_id, context, event_queue_name, show_output, volumes, cwd = args  # @UnusedVariable
    check_isinstance(job_id, str)
    check_isinstance(event_queue_name, str)
    
    # Disable multyvac logging
    disable_logging_if_config(context)
    
    db = context.get_compmake_db()
    job = get_job(job_id=job_id, db=db)

    if job.needs_context:
        msg = 'Cannot use multyvac for dynamic job.'
        raise CompmakeException(msg)

    time_start = time.time()

    multyvac_job = mvac_instance(db, job_id, volumes, cwd)
    multyvac_job.wait()
    
    errors = [multyvac_job.status_error, multyvac_job.status_killed]
    if multyvac_job.status in errors:
        e = 'Multyvac error (status: %r)' % multyvac_job.status 
        bt = str(multyvac_job.stderr)

        cache = Cache(Cache.FAILED)
        cache.exception = e
        cache.backtrace = bt
        cache.timestamp = time.time()
        cache.captured_stderr = str(multyvac_job.stderr)
        cache.captured_stdout = str(multyvac_job.stdout)
        set_job_cache(job_id, cache, db=db)

        raise JobFailed(job_id=job_id, reason=str(e), bt=bt)
        
    user_object = multyvac_job.result

    user_object_deps = collect_dependencies(user_object)
    set_job_userobject(job_id, user_object, db=db)
    
    cache = get_job_cache(job_id, db=db)
    cache.captured_stderr = str(multyvac_job.stderr)
    cache.captured_stdout = str(multyvac_job.stdout)

    cache.state = Cache.DONE
    cache.timestamp = time.time()
    walltime = cache.timestamp - time_start
    cache.walltime_used = walltime
    cache.cputime_used = multyvac_job.cputime_system
    cache.host = 'multyvac'
    cache.jobs_defined = set()
    set_job_cache(job_id, cache, db=db)
    
    result_dict = dict(user_object=user_object,
                user_object_deps=user_object_deps, 
                new_jobs=[], deleted_jobs=[])
    result_dict_check(result_dict)
    return result_dict
Exemplo n.º 13
0
def execute_with_context(db, context, job_id, command, args, kwargs):
    """ Returns a dictionary with fields "user_object" and "new_jobs" """
    from compmake.context import Context

    assert isinstance(context, Context)
    from compmake.jobs.storage import get_job

    cur_job = get_job(job_id=job_id, db=db)
    context.currently_executing = cur_job.defined_by + [job_id]

    already = set(context.get_jobs_defined_in_this_session())
    context.reset_jobs_defined_in_this_session([])

    if args:
        if isinstance(args[0], Context) and args[0] != context:
            msg = ('%s(%s, %s)' % (command, args, kwargs))
            raise ValueError(msg)

    # context is one of the arguments 
    assert context in args
    
    res = command(*args, **kwargs)

    generated = set(context.get_jobs_defined_in_this_session())
    context.reset_jobs_defined_in_this_session(already)

    if generated:
        if len(generated) < 4:
            # info('Job %r generated %s.' % (job_id, generated))
            pass
        else:
            # info('Job %r generated %d jobs such as %s.' % 
            # (job_id, len(generated), sorted(generated)[:M]))
            pass
            # # now remove the extra jobs that are not needed anymore

#     extra = []

    # FIXME this is a RACE CONDITION -- needs to be done in the main thread
    # from compmake.ui.visualization import info

    # info('now cleaning up; generated = %s' % generated)
# 
#     if False:
#         for g in all_jobs(db=db):
#             try:
#                 job = get_job(g, db=db)
#             except:
#                 continue
#             if job.defined_by[-1] == job_id:
#                 if not g in generated:
#                     extra.append(g)
# 
#         for g in extra:
#             #info('Previously generated job %r (%s) removed.' % (g,
#             # job.defined_by))
#             delete_all_job_data(g, db=db)
# 
#             #     from compmake.jobs.manager import
#             # clean_other_jobs_distributed
#             #     clean_other_jobs_distributed(db=db, job_id=job_id,
#             # new_jobs=generated)

    return dict(user_object=res, new_jobs=generated)
Exemplo n.º 14
0
def check_job(job_id, context):
    db = context.get_compmake_db()

    job = get_job(job_id, db)
    defined_by = job.defined_by
    assert 'root' in defined_by

    dparents = direct_parents(job_id, db=db)
    all_parents = parents(job_id, db=db)
    dchildren = direct_children(job_id, db=db)
    all_children = children(job_id, db=db)

    #print(job_id)
    #print('d children: %s' % dchildren)
    #print('all children: %s' % all_children)

    errors = []

    def e(msg):
        errors.append(msg)

    for defb in defined_by:
        if defb == 'root': continue
        if not job_exists(defb, db=db):
            s = ('%r defined by %r but %r not existing.' %
                 (job_id, defined_by, defb))
            e(s)

    for dp in dparents:

        if not job_exists(dp, db=db):
            s = 'Direct parent %r of %r does not exist.' % (dp, job_id)
            e(s)
        else:
            if not job_id in direct_children(dp, db=db):
                s = '%s thinks %s is its direct parent;' % (job_id, dp)
                s += 'but %s does not think %s is its direct child' % (dp,
                                                                       job_id)
                e(s)

    for ap in all_parents:
        if not job_exists(ap, db=db):
            s = 'Parent %r of %r does not exist.' % (ap, job_id)
            e(s)
        else:
            if not job_id in children(ap, db=db):
                e('%s is parent but no child relation' % ap)

    for dc in dchildren:
        if not job_exists(dc, db=db):
            s = 'Direct child %r of %r does not exist.' % (dc, job_id)
            e(s)
        else:
            if not job_id in direct_parents(dc, db=db):
                e('%s is direct child but no direct_parent relation' % dc)

    for ac in all_children:
        if not job_exists(ac, db=db):
            s = 'A child %r of %r does not exist.' % (ac, job_id)
            e(s)
        else:
            if not job_id in parents(ac, db=db):
                e('%s is direct child but no parent relation' % ac)

    if errors:
        s = ('Inconsistencies for %s:\n' % job_id)
        s += '\n'.join('- %s' % msg for msg in errors)
        error(s)
        return False, errors
    else:
        return True, []
Exemplo n.º 15
0
def comp(command, *args, **kwargs):
    ''' Main method to define a computation.
    
    
        Extra arguments:
    
        :arg:job_id:   sets the job id (respects job_prefix)
        :arg:extra_dep: extra dependencies (not passed as arguments)
    '''
    if compmake.compmake_status == compmake_status_slave:
        return None
    
    # Check that this is a pickable function
    try:
        pickle.dumps(command)
    except:
        msg = ('Cannot pickle %r. Make sure it is not a lambda function or a '
              'nested function. (This is a limitation of Python)' % command)
        raise SerializationError(msg)
    
    args = list(args) # args is a non iterable tuple
    # Get job id from arguments
    job_id_key = 'job_id'
    if job_id_key in kwargs:
        # make sure that command does not have itself a job_id key
        #available = command.func_code.co_varnames
        argspec = inspect.getargspec(command)
        
        if job_id_key in argspec.args:
            msg = ("You cannot define the job id in this way because 'job_id' " 
                   "is already a parameter of this function.")
            raise UserError(msg)    
        
        job_id = kwargs[job_id_key]
        if job_prefix:
            job_id = '%s-%s' % (job_prefix, job_id)
        del kwargs[job_id_key]
        
        if job_id in jobs_defined_in_this_session:
            raise UserError('Job %r already defined.' % job_id)
    else:
        job_id = generate_job_id(command)
    
    jobs_defined_in_this_session.add(job_id)
     
    if 'extra_dep' in kwargs:
        extra_dep = collect_dependencies(kwargs['extra_dep'])
        del kwargs['extra_dep']
    else:
        extra_dep = set()
        
    children = collect_dependencies([args, kwargs])
    children.update(extra_dep)
    
    all_args = (command, args, kwargs) 
    
    command_desc = command.__name__
    
    c = Job(job_id=job_id, children=list(children), command_desc=command_desc)
    # TODO: check for loops     
            
    for child in children:
        child_comp = get_job(child)
        if not job_id in child_comp.parents:
            child_comp.parents.append(job_id)
            set_job(child, child_comp)
    
    if job_exists(job_id):
        # OK, this is going to be black magic.
        # We want to load the previous job definition,
        # however, by unpickling(), it will start
        # __import__()ing the modules, perhaps
        # even the one that is calling us.
        # What happens, then is that it will try to 
        # add another time this computation recursively.
        # What we do, is that we temporarely switch to 
        # slave mode, so that recursive calls to comp() 
        # are disabled.
        
        if compmake_config.check_params: #@UndefinedVariable
            old_status = compmake_status
            set_compmake_status(compmake_status_slave) 
            old_computation = get_job(job_id)
            set_compmake_status(old_status)
            
            assert False, 'update for job_args'
            same, reason = old_computation.same_computation(c)
            
            if not same:
                set_job(job_id, c)
                set_job_args(job_id, all_args)
                publish('job-redefined', job_id=job_id , reason=reason)
                # XXX TODO clean the cache
            else:
                publish('job-already-defined', job_id=job_id)
        else:
            # We assume everything's ok
            set_job(job_id, c)
            set_job_args(job_id, all_args)
            publish('job-defined', job_id=job_id)
    
    else:    
        set_job(job_id, c)
        set_job_args(job_id, all_args)
        publish('job-defined', job_id=job_id)
        
    assert job_exists(job_id)
    assert job_args_exists(job_id)

    return Promise(job_id)
Exemplo n.º 16
0
def check_job(job_id, context):
    db = context.get_compmake_db()
    
    job = get_job(job_id, db)
    defined_by = job.defined_by
    assert 'root' in defined_by
    
    dparents = direct_parents(job_id, db=db)
    all_parents = parents(job_id, db=db)
    dchildren = direct_children(job_id, db=db)
    all_children = children(job_id, db=db)

    #print(job_id)
    #print('d children: %s' % dchildren)
    #print('all children: %s' % all_children)
    
    errors = []

    def e(msg):
        errors.append(msg)
    
    for defb in defined_by:
        if defb == 'root': continue
        if not job_exists(defb, db=db):
            s = ('%r defined by %r but %r not existing.'
                 %(job_id, defined_by, defb))
            e(s) 


    for dp in dparents:
        
        if not job_exists(dp, db=db):
            s = 'Direct parent %r of %r does not exist.' % (dp, job_id)
            e(s)
        else:
            if not job_id in direct_children(dp, db=db):
                s = '%s thinks %s is its direct parent;' % (job_id, dp)
                s += 'but %s does not think %s is its direct child' % (dp, job_id)
                e(s)

    for ap in all_parents:
        if not job_exists(ap, db=db):
            s = 'Parent %r of %r does not exist.' % (ap, job_id)
            e(s)
        else:
            if not job_id in children(ap, db=db):
                e('%s is parent but no child relation' % ap)

    for dc in dchildren:
        if not job_exists(dc, db=db):
            s = 'Direct child %r of %r does not exist.' % (dc, job_id)
            e(s)
        else:
            if not job_id in direct_parents(dc, db=db):
                e('%s is direct child but no direct_parent relation' % dc)

    for ac in all_children:
        if not job_exists(ac, db=db):
            s = 'A child %r of %r does not exist.' % (ac, job_id)
            e(s)
        else:
            if not job_id in parents(ac, db=db):
                e('%s is direct child but no parent relation' % ac)

    if errors:
        s = ('Inconsistencies for %s:\n' % job_id)
        s += '\n'.join('- %s' % msg for msg in errors)
        error(s)
        return False, errors
    else:
        return True, []
Exemplo n.º 17
0
def direct_parents(job_id):
    ''' Returns the direct parents of the specified job.
        (Jobs that depend directly on this one) '''
    assert(isinstance(job_id, str))
    computation = get_job(job_id)
    return computation.parents
Exemplo n.º 18
0
    def get_job(self, job_id):
        from .storage import get_job

        return get_job(job_id, db=self.db)