Exemplo n.º 1
0
 def setUp(self):
     self.serializer = Serializer()
Exemplo n.º 2
0
from cloudburst.server.executor import utils
import cloudburst.server.utils as sutils
from cloudburst.shared.proto.cloudburst_pb2 import (
    Continuation,
    DagTrigger,
    FunctionCall,
    NORMAL,
    MULTI,  # Cloudburst's consistency modes,
    EXECUTION_ERROR,
    FUNC_NOT_FOUND,  # Cloudburst's error types
    MULTIEXEC  # Cloudburst's execution types
)
from cloudburst.shared.reference import CloudburstReference
from cloudburst.shared.serializer import Serializer

serializer = Serializer()


def exec_function(exec_socket, kvs, user_library, cache, function_cache):
    call = FunctionCall()
    call.ParseFromString(exec_socket.recv())

    fargs = [serializer.load(arg) for arg in call.arguments.values]

    if call.name in function_cache:
        f = function_cache[call.name]
    else:
        f = utils.retrieve_function(call.name, kvs, user_library,
                                    call.consistency)

    if not f:
Exemplo n.º 3
0
def run(cloudburst: CloudburstConnection,
        num_requests: int,
        data_size: str,
        breakpoint: bool,
        do_optimize: bool):

    print('Creating data...')
    size = DATA_SIZES[data_size]
    for i in range(1, NUM_DATA_POINTS+1):
        arr = np.random.rand(size)
        cloudburst.put_object('data-' + str(i), arr)

    def stage1(self, row: Row) -> (int, str):
        idx = int(row['req_num'] / 10) + 1
        key = 'data-%d' % (idx)

        return idx, key

    def stage2(self, row: Row) -> str:
        import numpy as np
        arr = row[row['key']]

        return float(np.sum(arr))

    print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.')

    flow = Flow('locality-benchmark', FlowType.PUSH, cloudburst)
    flow.map(stage1, names=['index', 'key']) \
        .lookup('key', dynamic=True) \
        .map(stage2, names=['sum'])

    optimize_rules['breakpoint'] = breakpoint
    if do_optimize:
        flow = optimize(flow, rules=optimize_rules)
        print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('req_num', IntType)])

    if breakpoint:
        print('Starting warmup...')
        for i in range(NUM_DATA_POINTS):
            inp = Table([('req_num', IntType)])
            inp.insert([i * 10])

            res = flow.run(inp).get()

        print('Pausing to let cache metadata propagate...')
        time.sleep(15)

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        inp = Table([('req_num', IntType)])
        inp.insert([i])

        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    with open('data.bts', 'wb') as f:
        from cloudburst.shared.serializer import Serializer
        ser = Serializer()
        bts = ser.dump(latencies)
        f.write(bts)

    print_latency_stats(latencies, 'E2E')