Пример #1
0
def test_enable_parallel_lz4():
    enable_parallel_lz4(True)
    from arctic._compression import ENABLE_PARALLEL
    assert(ENABLE_PARALLEL == True)
    enable_parallel_lz4(False)
    from arctic._compression import ENABLE_PARALLEL
    assert(ENABLE_PARALLEL == False)
Пример #2
0
def test_enable_parallel_lz4():
    enable_parallel_lz4(True)
    from arctic._compression import ENABLE_PARALLEL
    assert (ENABLE_PARALLEL is True)
    enable_parallel_lz4(False)
    from arctic._compression import ENABLE_PARALLEL
    assert (ENABLE_PARALLEL is False)
Пример #3
0
def run_scenario(result_text,
                 rounds,
                 num_requests,
                 num_chunks,
                 parallel_lz4,
                 use_async,
                 async_arctic_pool_workers=None):
    aclz4.enable_parallel_lz4(parallel_lz4)
    if async_arctic_pool_workers is not None:
        ASYNC_ARCTIC.reset(pool_size=int(async_arctic_pool_workers),
                           timeout=10)
    measurements = []
    for curr_round in range(rounds):
        # print("Running round {}".format(curr_round))
        clean_lib()
        start = time.time()
        if use_async:
            async_bench(num_requests, num_chunks)
        else:
            serial_bench(num_requests, num_chunks)
        measurements.append(time.time() - start)
    print("{}: async={}, chunks/write={}, writes/round={}, rounds={}, "
          "parallel_lz4={}, async_arctic_pool_workers={}: {}".format(
              result_text, use_async, num_chunks, num_requests, rounds,
              parallel_lz4, async_arctic_pool_workers, [
                  "{:.3f}".format(x)
                  for x in get_stats(measurements[1:] if len(measurements) > 1
                                     else measurements)
              ]))
Пример #4
0
def run_scenario(result_text, rounds, num_requests, num_chunks, parallel_lz4,
                 use_async, async_arctic_pool_workers=None):
    aclz4.enable_parallel_lz4(parallel_lz4)
    if async_arctic_pool_workers is not None:
        ASYNC_ARCTIC.reset(pool_size=int(async_arctic_pool_workers), timeout=10)
    measurements = []
    for curr_round in xrange(rounds):
        # print("Running round {}".format(curr_round))
        clean_lib()
        start = time.time()
        if use_async:
            async_bench(num_requests, num_chunks)
        else:
            serial_bench(num_requests, num_chunks)
        measurements.append(time.time() - start)
    print("{}: async={}, chunks/write={}, writes/round={}, rounds={}, "
          "parallel_lz4={}, async_arctic_pool_workers={}: {}".format(
        result_text, use_async, num_chunks, num_requests, rounds, parallel_lz4, async_arctic_pool_workers,
        ["{:.3f}".format(x) for x in get_stats(measurements[1:] if len(measurements) > 1 else measurements)]))
Пример #5
0
from __future__ import print_function

import random
from datetime import datetime as dt
from multiprocessing.pool import ThreadPool

import numpy as np
import pandas as pd

import arctic._compression as c
from arctic.serialization.numpy_records import DataFrameSerializer

c.enable_parallel_lz4(True)
c.BENCHMARK_MODE = True


def get_random_df(nrows, ncols):
    ret_df = pd.DataFrame(np.random.randn(nrows, ncols),
                          index=pd.date_range('20170101',
                                              periods=nrows, freq='S'),
                          columns=["".join([chr(random.randint(ord('A'), ord('Z'))) for _ in range(8)]) for _ in
                                   range(ncols)])
    ret_df.index.name = 'index'
    ret_df.index = ret_df.index.tz_localize('UTC')
    return ret_df


def construct_test_data(df_length, append_mul):
    serializer = DataFrameSerializer()
    tmp_df = get_random_df(df_length, 10)
    recs = serializer.serialize(tmp_df)[0]
Пример #6
0
from __future__ import print_function

import random
from datetime import datetime as dt
from multiprocessing.pool import ThreadPool

import numpy as np
import pandas as pd

import arctic._compression as c
from arctic.serialization.numpy_records import DataFrameSerializer

c.enable_parallel_lz4(True)
c.BENCHMARK_MODE = True


def get_random_df(nrows, ncols):
    ret_df = pd.DataFrame(np.random.randn(nrows, ncols),
                          index=pd.date_range('20170101',
                                              periods=nrows,
                                              freq='S'),
                          columns=[
                              "".join([
                                  chr(random.randint(ord('A'), ord('Z')))
                                  for _ in range(8)
                              ]) for _ in range(ncols)
                          ])
    ret_df.index.name = 'index'
    ret_df.index = ret_df.index.tz_localize('UTC')
    return ret_df