Пример #1
0
def run(cloudburst: CloudburstConnection,
        num_requests: int,
        data_size: str,
        do_optimize: bool):

    def stage1(self, row: Row) -> bytes:
        import numpy as np

        return np.random.rand(row['size'])

    def stage2(self, row: Row) -> int:
        return 3

    print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.')

    flow = Flow('colocate-benchmark', FlowType.PUSH, cloudburst)
    f1 = flow.map(stage1)

    p1 = f1.map(stage2, names=['val1'])
    p2 = f1.map(stage2, names=['val2'])
    p3 = f1.map(stage2, names=['val3'])
    p4 = f1.map(stage2, names=['val4'])
    p5 = f1.map(stage2, names=['val5'])
    # p6 = f1.map(stage2, names=['val6'])
    # p7 = f1.map(stage2, names=['val7'])
    # p8 = f1.map(stage2, names=['val8'])

    p1.join(p2).join(p3).join(p4).join(p5) # .join(p6).join(p7).join(p8)

    if do_optimize:
        flow = optimize(flow, rules=optimize_rules)
        print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('size', IntType)])
    inp.insert([DATA_SIZES[data_size]])

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    print_latency_stats(latencies, 'E2E')
Пример #2
0
def run(cloudburst: CloudburstConnection, num_requests: int, gamma: int,
        num_replicas: int):
    def stage1(self, val: int) -> int:
        return val + 1

    def stage2(self, row: Row) -> float:
        import time
        from scipy.stats import gamma

        delay = gamma.rvs(3.0, scale=row['scale']) * 10 / 1000  # convert to ms
        time.sleep(delay)

        return delay

    def stage3(self, row: Row) -> float:
        return row['val']

    print(f'Creating flow with {num_replicas} replicas and' +
          f' gamma={GAMMA_VALS[gamma]}')

    flow = Flow('fusion-benchmark', FlowType.PUSH, cloudburst)
    flow.map(stage1, col='val') \
        .map(stage2, names=['val'], high_variance=True) \
        .map(stage3, names=['val'])

    optimize_rules['compete_replicas'] = num_replicas
    flow = optimize(flow, rules=optimize_rules)
    print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('val', IntType), ('scale', FloatType)])
    inp.insert([1, GAMMA_VALS[gamma]])

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        time.sleep(.300)  # Sleep to let the queue drain.
        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    print_latency_stats(latencies, 'E2E')
Пример #3
0
def run(cloudburst: CloudburstConnection, num_requests: int, num_fns: int,
        data_size: str, do_optimize: bool):
    def fusion_op(self, row: Row) -> bytes:
        return row['data']

    print(f'Creating flow with {num_fns} operators and {data_size}' +
          f' ({DATA_SIZES[data_size]}) inputs.')

    flow = Flow('fusion-benchmark', FlowType.PUSH, cloudburst)

    marker = flow
    for _ in range(num_fns):
        marker = marker.map(fusion_op, names=['data'])

    if do_optimize:
        flow = optimize(flow, rules=optimize_rules)
        print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('data', BtsType)])
    inp.insert([os.urandom(DATA_SIZES[data_size])])

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    print_latency_stats(latencies, 'E2E')
Пример #4
0
             names=['img', 'resnet_index', 'resnet_max_prob'],
             gpu=gpu,
             batching=gpu)

    incept = rnet.filter(low_prob) \
        .map(incept,
             init=incept_cons,
             names=['incept_index', 'incept_max_prob'],
             gpu=gpu,
             batching=gpu)
    rnet.join(incept, how='left') \
        .map(cascade_predict, init=cascade_init, names=['class'],
             batching=False)

    print('Optimizing flow...')
    flow = optimize(flow, rules=optimize_rules)

    print('Deploying flow...')
    flow.deploy()

    local = args.local[0].lower() == 'true'
    if local:
        run(flow, cloudburst, args.requests[0], local)
    else:
        flow.cloudburst = None  # Hack to serialize and send flow.
        queue = [flow]
        while len(queue) > 0:
            op = queue.pop(0)
            op.cb_fn = None

            queue.extend(op.downstreams)
Пример #5
0
def run(cloudburst: CloudburstConnection,
        num_requests: int,
        data_size: str,
        breakpoint: bool,
        do_optimize: bool):

    print('Creating data...')
    size = DATA_SIZES[data_size]
    for i in range(1, NUM_DATA_POINTS+1):
        arr = np.random.rand(size)
        cloudburst.put_object('data-' + str(i), arr)

    def stage1(self, row: Row) -> (int, str):
        idx = int(row['req_num'] / 10) + 1
        key = 'data-%d' % (idx)

        return idx, key

    def stage2(self, row: Row) -> str:
        import numpy as np
        arr = row[row['key']]

        return float(np.sum(arr))

    print(f'Creating flow with {data_size} ({DATA_SIZES[data_size]}) inputs.')

    flow = Flow('locality-benchmark', FlowType.PUSH, cloudburst)
    flow.map(stage1, names=['index', 'key']) \
        .lookup('key', dynamic=True) \
        .map(stage2, names=['sum'])

    optimize_rules['breakpoint'] = breakpoint
    if do_optimize:
        flow = optimize(flow, rules=optimize_rules)
        print('Flow has been optimized...')

    flow.deploy()
    print('Flow successfully deployed!')

    latencies = []
    inp = Table([('req_num', IntType)])

    if breakpoint:
        print('Starting warmup...')
        for i in range(NUM_DATA_POINTS):
            inp = Table([('req_num', IntType)])
            inp.insert([i * 10])

            res = flow.run(inp).get()

        print('Pausing to let cache metadata propagate...')
        time.sleep(15)

    print('Starting benchmark...')
    for i in range(num_requests):
        if i % 100 == 0 and i > 0:
            print(f'On request {i}...')

        inp = Table([('req_num', IntType)])
        inp.insert([i])

        start = time.time()
        res = flow.run(inp).get()
        end = time.time()

        latencies.append(end - start)

    with open('data.bts', 'wb') as f:
        from cloudburst.shared.serializer import Serializer
        ser = Serializer()
        bts = ser.dump(latencies)
        f.write(bts)

    print_latency_stats(latencies, 'E2E')