Пример #1
0
def stream_tasks(inDB_path, num_task, num_proc):
    '''
    This procedure generates all vector pairs and stores them
    in a ShareDB instance located at inDB_path. Once all the
    work is queued up, the task end tokens are issued.
    '''
    # Open inDB to write vector pairs
    inDB = ShareDB(
        path=inDB_path,     # Store output where specified
        reset=True,         # Recreate inDB if previously existing
        serial='msgpack',   # msgpack offers optimal serialization for lists
        readers=num_proc+1, # Allow all (num_proc) processes to read in parallel
        compress=True,      # Serialized msgpack-ed lists are further compressed
        map_size=5*10**8)   # And we estimate to require ~500MB for results

    # Queue all work
    current_token = 0
    while current_token < num_task:
        # Choose vector size
        base_size = np.random.randint(10**4, 10**5)

        # Generate vectors
        X = tuple(int(v) for v in np.random.randint(100, size=(1, base_size))[0])
        Y = tuple(int(v) for v in np.random.randint(100, size=(1, base_size))[0])

        # Write to inDB
        inDB[current_token] = (X, Y)

        print('QUEUED WORK # {}'.format(current_token))

        current_token += 1 # Jump to next token

    # Close inDB
    inDB.close()
Пример #2
0
def merge_results(mergeDB_path, outDB_paths):
    '''
    This procedure simply merges all individual outDBs into a single
    ShareDB instance stored at mergeDB_path.
    '''
    # Open mergeDB to store merged convolution results
    mergeDB = ShareDB(
        path=mergeDB_path,  # Store output where specified
        reset=True,         # Recreate outDB if previously existing
        serial='msgpack',   # msgpack offers optimal serialization for lists
        readers=2,          # At most 2 processes would read outDB in parallel
        compress=True,      # Serialized msgpack-ed lists are further compressed
        map_size=5*10**8)   # And we estimate to require ~500MB for results

    # # Open and chain all key-value pairs in results
    # outDB_list = (ShareDB(path) for path in outDB_paths)
    # results    = chain.from_iterable(
    #     outDB.items() for outDB in outDB_list)

    # # Merge all individual results
    # mergeDB.multiset(results)

    # # All results merged ... we're done!
    # mergeDB.close()

    # Merge all individual results
    for outDB_path in outDB_paths:
        outDB = ShareDB(outDB_path)
        mergeDB.multiset(outDB.items())
        print('Merged results = {}'.format(len(mergeDB)))

    # All results merged ... we're done!
    mergeDB.close()
Пример #3
0
def get_myDB_resources(total):
    '''
    Initialize a populated ShareDB instance and associated resources.
    '''
    # Setup random
    seed = random.random()
    gri_stream = gri(seed=seed)

    # Initialize ShareDB instance
    myDB = ShareDB(path='./myDB',
                   reset=True,
                   serial='msgpack',
                   compress=random.choice([True, False]),
                   readers=40,
                   buffer_size=100,
                   map_size=10**7)

    # Populate myDB with random items and record keys
    key_val_dict = {}
    while len(key_val_dict) < total:
        key = next(gri_stream)
        val = next(gri_stream)
        if key not in key_val_dict:
            myDB[key] = val
            key_val_dict[key] = val

    # Generate some keys not seen before
    non_key_set = set()
    while len(non_key_set) < total:
        non_key = next(gri_stream)
        if non_key not in key_val_dict:
            non_key_set.add(non_key)

    # Return resources
    return myDB, key_val_dict, non_key_set
Пример #4
0
def main():
    store_path = './kvStore'
    num_items  = 1000000
    length     = 25
    write_thp(store_path, num_items, length)
    print('\n')
    read_thp(store_path)
    ShareDB(store_path).drop()
Пример #5
0
def get_tinyDB(map_size):
    '''
    Initialize a ShareDB with msgpack serialization.
    '''
    myDB = ShareDB(path='./tinyDB',
                   reset=True,
                   serial='msgpack',
                   readers=40,
                   buffer_size=100,
                   map_size=map_size)
    return myDB
Пример #6
0
def read_thp(store_path):
    '''
    Go through store and report reading throughput.
    '''
    kvStore = ShareDB(store_path)
    i  = 1.
    tt = 0.0
    while i <= len(kvStore):
        t0  = time.time()
        val = kvStore[i]
        tt += time.time() - t0
        print('READER thp @ {:.2f} rd/sec | SCAN {:.2f}%'.format(
            i / tt, (100. * i) / len(kvStore)))
        i += 1
Пример #7
0
def write_thp(store_path, num_items, length):
    '''
    Fill store with random DNA strings and report throughput.
    '''
    kvStore = ShareDB(
        store_path,
        True,
        'msgpack', 
        map_size=num_items*100)
    i  = 1.
    tt = 0.0
    while i <= num_items:
        t0  = time.time()
        kvStore[i] = 1
        tt += time.time() - t0
        print('WRITER thp @ {:.2f} wt/sec | FILL {:.2f}%'.format(
            i / tt, (100. * i) / num_items))
        i += 1
Пример #8
0
def msgpack_myDB():
    '''
    Initialize a ShareDB with msgpack serialization.
    '''
    msgpack_myDB = ShareDB(path='./myDB.msgpack',
                           reset=True,
                           serial='msgpack',
                           compress=True,
                           readers=40,
                           buffer_size=100,
                           map_size=10**7)
    msgpack_myDB = None
    msgpack_myDB = ShareDB(path='./myDB.msgpack', reset=False)
    yield msgpack_myDB
    msgpack_myDB.drop()
Пример #9
0
def pickle_myDB():
    '''
    Initialize a ShareDB with pickle serialization.
    '''
    pickle_myDB = ShareDB(path='./myDB.pickle',
                          reset=True,
                          serial='pickle',
                          compress=True,
                          readers=40,
                          buffer_size=100,
                          map_size=10**7)
    pickle_myDB = None
    pickle_myDB = ShareDB(path='./myDB.pickle', reset=False)
    yield pickle_myDB
    pickle_myDB.drop()
Пример #10
0
def test_ShareDB_path():
    '''
    Test Exceptions and success when path is occupied by file.
    '''
    # Setup ShareDB init fail via file occupancy
    path = './test_init.ShareDB'
    with open(path, 'w') as outfile:
        pass

    # Raises TypeError because path points to a file
    with pytest.raises(TypeError) as error:
        myDB = ShareDB(path=path)

    # Automatically remove file when reset is True
    myDB = ShareDB(path=path, reset=True)
    myDB.drop()
Пример #11
0
def para_conv(inDB_path, outDB_path, exec_id, num_task, num_proc):
    '''
    This procedure computes the convolution of vector pairs stored in a
    ShareDB instance located at inDB_path, and writes the results in a
    ShareDB instance located in outDB_path. The procedure ends when
    no more tasks are available and a relevant task end token is found.
    '''
    # Open inDB to read vector pairs
    inDB = ShareDB(path=inDB_path)

    # Open outDB to write convolution results
    outDB = ShareDB(
        path=outDB_path,    # Store output where specified
        reset=True,         # Recreate outDB if previously existing
        serial='msgpack',   # msgpack offers optimal serialization for lists
        readers=num_proc+1, # At most 2 processes would read outDB in parallel
        compress=True,      # Serialized msgpack-ed lists are further compressed
        map_size=5*10**8 // num_proc) # And we split total allocation uniformly

    # Actual computation loop
    key_iter = stream_task_token(exec_id, num_task, num_proc)
    current_token = exec_id
    # Get vector pairs
    for X, Y in inDB.multiget(key_iter):
        # Log execution initation
        print('EXECUTING WORK # {}'.format(current_token))

        # Actual execution
        result = tuple(int(v) for v in np.convolve(X, Y)) # Compute and store result in a list
        outDB[current_token] = result    # Insert compressed result in outDB

        # Log execution computation
        print('COMPLETED WORK # {}'.format(current_token))

        # Update token for logging
        current_token += num_proc

    # Log executor completion
    print('EXECUTOR # {} COMPLETED'.format(exec_id))

    # Time to close outDB ... we're done!
    outDB.close()
Пример #12
0
def test_close_drop():
    '''
    Test close and drop.
    '''
    # Successful close
    myDB = ShareDB(path='./test_close_drop')
    assert myDB.close() == True
    assert myDB.close() == False

    # Once closed transaction raises RuntimeError
    with pytest.raises(RuntimeError) as error:
        myDB[1] = 2

    # Successful drop
    myDB = ShareDB(path='./test_close_drop', reset=True)
    assert myDB.drop() == True
    assert myDB.drop() == False

    # Once dropped transaction raises RuntimeError
    with pytest.raises(RuntimeError) as error:
        myDB[1] = 2
Пример #13
0
def test_ShareDB_init_param_fails():
    '''
    Test Exceptions to be raised on bad instantiation.
    '''
    with pytest.raises(TypeError) as error:
        myDB = ShareDB()
        myDB.drop()
    with pytest.raises(TypeError) as error:
        myDB = ShareDB(path=True)
        myDB.drop()
    with pytest.raises(TypeError) as error:
        myDB = ShareDB(path=123)
        myDB.drop()
    with pytest.raises(TypeError) as error:
        myDB = ShareDB(path='./test_init.ShareDB',
                       reset=True,
                       serial='something_fancy')
    with pytest.raises(TypeError) as error:
        myDB = ShareDB(path='./test_init.ShareDB',
                       reset=True,
                       serial='pickle',
                       compress='AbsoluteTruth')
    with pytest.raises(TypeError) as error:
        myDB = ShareDB(path='./test_init.ShareDB',
                       reset=True,
                       readers='XYZ',
                       buffer_size=100,
                       map_size=10**3)
    with pytest.raises(TypeError) as error:
        myDB = ShareDB(path='./test_init.ShareDB',
                       reset=True,
                       readers='XYZ',
                       buffer_size=100,
                       map_size=0)
    myDB = ShareDB(path='./test_init.ShareDB', reset=True)
    myDB.drop()