Пример #1
0
def call_dag(call, requestor_cache, pusher_cache, dags, func_locations,
        key_ip_map):
    logging.info('Calling DAG %s.' % (call.name))

    dag, sources = dags[call.name]
    chosen_locations = {}
    for f in dag.functions:
        locations = func_locations[f]
        args = call.function_args[f].args

        refs = list(filter(lambda arg: type(arg) == FluentReference,
            map(lambda arg: get_serializer(arg.type).load(arg.body),
                args)))
        loc = _pick_node(locations, key_ip_map, refs)
        chosen_locations[f] = (loc[0], loc[1])

    schedule = DagSchedule()
    schedule.id = generate_timestamp(0)
    schedule.dag.CopyFrom(dag)

    # copy over arguments into the dag schedule
    for fname in call.function_args:
        arg_list = schedule.arguments[fname]
        arg_list.args.extend(call.function_args[fname].args)

    resp_id = str(uuid.uuid4())
    schedule.response_id = resp_id

    for func in chosen_locations:
        loc = chosen_locations[func]
        schedule.locations[func] = loc[0] + ':' + str(loc[1])


    for func in chosen_locations:
        loc = chosen_locations[func]
        ip = utils._get_queue_address(loc[0], loc[1])
        schedule.target_function = func

        sckt = requestor_cache.get(ip)
        sckt.send(schedule.SerializeToString())

        response = GenericResponse()
        response.ParseFromString(sckt.recv())

        if not response.success:
            logging.info('Pin operation for %s at %s failed.' % (func, ip))
            return response.success, response.error, None

    for source in sources:
        trigger = DagTrigger()
        trigger.id = schedule.id
        trigger.target_function = source

        ip = sutils._get_dag_trigger_address(schedule.locations[source])
        sckt = pusher_cache.get(ip)
        sckt.send(trigger.SerializeToString())

    return True, None, resp_id
Пример #2
0
def call_dag(call, pusher_cache, dags, func_locations, key_ip_map,
        running_counts, backoff):
    dag, sources = dags[call.name]

    schedule = DagSchedule()
    schedule.id = str(uuid.uuid4())
    schedule.dag.CopyFrom(dag)
    schedule.consistency = NORMAL
    if call.HasField('response_address'):
        schedule.response_address = call.response_address

    logging.info('Calling DAG %s (%s).' % (call.name, schedule.id))

    for fname in dag.functions:
        locations = func_locations[fname]
        args = call.function_args[fname].args

        refs = list(filter(lambda arg: type(arg) == FluentReference,
            map(lambda arg: get_serializer(arg.type).load(arg.body),
                args)))
        loc = _pick_node(locations, key_ip_map, refs, running_counts, backoff)
        schedule.locations[fname] = loc[0] + ':' + str(loc[1])

        # copy over arguments into the dag schedule
        arg_list = schedule.arguments[fname]
        arg_list.args.extend(args)

    logging.info('Sending to %s' %(schedule.locations['sleep']))

    for func in schedule.locations:
        loc = schedule.locations[func].split(':')
        ip = utils._get_queue_address(loc[0], loc[1])
        schedule.target_function = func

        triggers = sutils._get_dag_predecessors(dag, func)
        if len(triggers) == 0:
            triggers.append('BEGIN')

        schedule.ClearField('triggers')
        schedule.triggers.extend(triggers)

        sckt = pusher_cache.get(ip)
        sckt.send(schedule.SerializeToString())

    for source in sources:
        trigger = DagTrigger()
        trigger.id = schedule.id
        trigger.source = 'BEGIN'
        trigger.target_function = source

        ip = sutils._get_dag_trigger_address(schedule.locations[source])
        sckt = pusher_cache.get(ip)
        sckt.send(trigger.SerializeToString())

    return schedule.id
Пример #3
0
def _exec_dag_function_normal(pusher_cache, kvs, triggers, function, schedule,
                              user_lib):
    fname = schedule.target_function
    fargs = list(schedule.arguments[fname].args)

    for trname in schedule.triggers:
        trigger = triggers[trname]
        fargs += list(trigger.arguments.args)

    logging.info('Executing function %s for DAG %s (ID %s): started at %.6f.' %
                 (schedule.dag.name, fname, trigger.id, time.time()))

    fargs = _process_args(fargs)

    result = _exec_func_normal(kvs, function, fargs, user_lib)

    is_sink = True
    for conn in schedule.dag.connections:
        if conn.source == fname:
            is_sink = False
            new_trigger = DagTrigger()
            new_trigger.id = trigger.id
            new_trigger.target_function = conn.sink
            new_trigger.source = fname

            if type(result) != tuple:
                result = (result, )

            al = new_trigger.arguments
            al.args.extend(
                list(map(lambda v: serialize_val(v, None, False), result)))

            dest_ip = schedule.locations[conn.sink]
            sckt = pusher_cache.get(sutils._get_dag_trigger_address(dest_ip))
            sckt.send(new_trigger.SerializeToString())

    logging.info(
        'Finished executing function %s for DAG %s (ID %s): ended at %.6f.' %
        (schedule.dag.name, fname, trigger.id, time.time()))
    if is_sink:
        logging.info('DAG %s (ID %s) completed; result at %s.' %
                     (schedule.dag.name, trigger.id, schedule.id))
        result = serialize_val(result)
        if schedule.HasField('response_address'):
            sckt = pusher_cache.get(schedule.response_address)
            sckt.send(result)
        else:
            l = LWWPairLattice(generate_timestamp(0), result)
            kvs.put(schedule.id, l)
Пример #4
0
def exec_dag_function(pusher_cache, kvs, trigger, function, schedule):
    fname = trigger.target_function
    logging.info('Executing function %s for DAG %s (ID %d).' %
                 (schedule.dag.name, fname, trigger.id))

    fargs = list(schedule.arguments[fname].args) + list(trigger.arguments.args)
    fargs = _process_args(fargs)

    result = _exec_func(kvs, function, fargs)

    result_triggers = []

    is_sink = True
    for conn in schedule.dag.connections:
        if conn.source == fname:
            is_sink = False
            new_trigger = DagTrigger()
            new_trigger.id = trigger.id
            new_trigger.target_function = conn.sink

            if type(result) != tuple:
                result = (result, )

            al = new_trigger.arguments
            al.args.extend(
                list(map(lambda v: serialize_val(v, None, False), result)))

            dest_ip = schedule.locations[conn.sink]
            sckt = pusher_cache.get(sutils._get_dag_trigger_address(dest_ip))
            sckt.send(new_trigger.SerializeToString())

    if is_sink:
        logging.info('DAG %s (ID %d) completed; result at %s.' %
                     (schedule.dag.name, trigger.id, schedule.response_id))
        l = LWWPairLattice(generate_timestamp(0), serialize_val(result))
        kvs.put(schedule.response_id, l)
Пример #5
0
def _exec_dag_function_causal(pusher_cache, kvs, triggers, function, schedule):
    fname = schedule.target_function
    fargs = list(schedule.arguments[fname].args)

    versioned_key_locations = None
    dependencies = {}

    for trname in schedule.triggers:
        trigger = triggers[trname]
        fargs += list(trigger.arguments.args)
        # combine versioned_key_locations
        if versioned_key_locations is None:
            versioned_key_locations = trigger.versioned_key_locations
        else:
            for addr in trigger.versioned_key_locations:
                versioned_key_locations[addr].versioned_keys.extend(
                    trigger.versioned_key_locations[addr].versioned_keys)
        # combine dependencies from previous func
        for dep in trigger.dependencies:
            if dep.key in dependencies:
                dependencies[dep.key] = _merge_vector_clock(
                    dependencies[dep.key], dep.vector_clock)
            else:
                dependencies[dep.key] = dep.vector_clock

    fargs = _process_args(fargs)

    kv_pairs = {}
    result = _exec_func_causal(kvs, function, fargs, kv_pairs, schedule,
                               versioned_key_locations, dependencies)

    for key in kv_pairs:
        if key in dependencies:
            dependencies[key] = _merge_vector_clock(dependencies[key],
                                                    kv_pairs[key][0])
        else:
            dependencies[key] = kv_pairs[key][0]

    is_sink = True
    for conn in schedule.dag.connections:
        if conn.source == fname:
            is_sink = False
            new_trigger = DagTrigger()
            new_trigger.id = trigger.id
            new_trigger.target_function = conn.sink
            new_trigger.source = fname

            if type(result) != tuple:
                result = (result, )

            al = new_trigger.arguments
            al.args.extend(
                list(map(lambda v: serialize_val(v, None, False), result)))

            for addr in versioned_key_locations:
                new_trigger.versioned_key_locations[
                    addr].versioned_keys.extend(
                        versioned_key_locations[addr].versioned_keys)

            for key in dependencies:
                dep = new_trigger.dependencies.add()
                dep.key = key
                dep.vector_clock.update(dependencies[key])

            dest_ip = schedule.locations[conn.sink]
            sckt = pusher_cache.get(sutils._get_dag_trigger_address(dest_ip))
            sckt.send(new_trigger.SerializeToString())

    if is_sink:
        logging.info('DAG %s (ID %s) completed in causal mode; result at %s.' %
                     (schedule.dag.name, schedule.id, schedule.output_key))

        vector_clock = {}
        if schedule.output_key in dependencies:
            if schedule.client_id in dependencies[schedule.output_key]:
                dependencies[schedule.output_key][schedule.client_id] += 1
            else:
                dependencies[schedule.output_key][schedule.client_id] = 1
            vector_clock.update(dependencies[schedule.output_key])
            del dependencies[schedule.output_key]
        else:
            vector_clock = {schedule.client_id: 1}

        succeed = kvs.causal_put(schedule.output_key,
                                 vector_clock, dependencies,
                                 serialize_val(result), schedule.client_id)
        while not succeed:
            kvs.causal_put(schedule.output_key, vector_clock, dependencies,
                           serialize_val(result), schedule.client_id)

        # issue requests to GC the version store
        for cache_addr in versioned_key_locations:
            gc_addr = cache_addr[:-4] + str(int(cache_addr[-4:]) - 50)
            sckt = pusher_cache.get(gc_addr)
            sckt.send_string(schedule.client_id)