Exemplo n.º 1
0
def test_multiprocessing_safety_parent_children_race(mongo_host, library_name):
    # Create Arctic and directly fork/start children (no wait)
    total_iterations = 12
    total_processes = 6
    total_writes_per_child = 20

    global MY_ARCTIC

    for i in range(total_iterations):
        processes = list()

        MY_ARCTIC = Arctic(mongo_host=mongo_host)
        for j in range(total_processes):
            p = Process(target=f, args=(library_name, total_writes_per_child, False))
            p.start()  # start directly, don't wait to create first all children procs
            processes.append(p)

        MY_ARCTIC.initialize_library(library_name, VERSION_STORE)  # this will unblock spinning children

        for p in processes:
            p.join()

        for p in processes:
            assert p.exitcode == 0

        MY_ARCTIC.reset()

    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
Exemplo n.º 2
0
def test_multiprocessing_safety(mongo_host, library_name):
    # Create/initialize library at the parent process, then spawn children, and start them aligned in time
    total_processes = 64
    total_writes_per_child = 100

    register_get_auth_hook(my_auth_hook)

    global MY_ARCTIC
    MY_ARCTIC = Arctic(mongo_host=mongo_host)

    MY_ARCTIC.initialize_library(library_name, VERSION_STORE)
    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)

    processes = [Process(target=f, args=(library_name, total_writes_per_child, True)) for _ in range(total_processes)]

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    for p in processes:
        assert p.exitcode == 0

    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
Exemplo n.º 3
0
class ArcticSaver(AbstractBaseSaver):
    """
    Serializer for Arctic VersionStore.
    """
    def __init__(self,
                 host: str = 'localhost',
                 library: str = 'test_log',
                 note: str = '') -> None:
        """
        Library given at init, collection determined by self.name_str.
        """
        self.host = host
        self.library = library
        self.db = Arctic(host)
        self.db.initialize_library(library)
        self.store = self.db[library]
        super().__init__(note)

    def save(self,
             df: pd.DataFrame,
             what: str,
             contract_str: str,
             note: str = '') -> None:
        self.store.write(self.name_str(what, contract_str), df)

    def keys(self) -> List[str]:
        return self.store.list_symbols()

    def read(self, key: str) -> pd.DataFrame:
        return self.store.read(key)

    def __str__(self):
        return (f'ArcticSaver(host={self.host}, library={self.library}, '
                f'note={self.note})')
Exemplo n.º 4
0
class Database(object):
    def __init__(self, sym, exchange):
        self.counter = 0
        self.data = list()
        self.sym = sym
        self.tz = tz.utc
        self.exchange = exchange
        if configs.RECORD_DATA:
            self.db = Arctic(configs.MONGO_ENDPOINT)
            self.db.initialize_library(configs.ARCTIC_NAME,
                                       lib_type=TICK_STORE)
            self.collection = self.db[configs.ARCTIC_NAME]
            print('%s is recording %s\n' % (self.exchange, self.sym))
        else:
            self.db = None
            self.collection = None

    def new_tick(self, msg):
        if self.db is not None:
            self.counter += 1
            msg['index'] = dt.now(tz=self.tz)
            msg['system_time'] = str(msg['index'])
            self.data.append(msg)
            if self.counter % configs.BATCH_SIZE == 0:
                print('%s added %i msgs to Arctic' % (self.sym, self.counter))
                self.collection.write(self.sym, self.data)
                self.counter = 0
                self.data.clear()
Exemplo n.º 5
0
def show():        
    # Connect to Local MONGODB
    store = Arctic('localhost')
    # Create the library - defaults to VersionStore
    store.initialize_library('Bitmex')
    # Access the library
    library = store['Bitmex']
    #library.write('XBTUSD', df, metadata={'source': 'Bitmex'})

    # Reading the data
    item = library.read('XBTUSD')
    xbtusd = item.data
    metadata = item.metadata
    print (xbtusd)
    print (metadata)
    
    xbtusd['ret'] = -1+xbtusd['close']/xbtusd['close'].shift(1)

    from math import sqrt
    xbtusd['vol10'] = sqrt(260)*xbtusd['ret'].rolling(10).std(ddof=0)
    xbtusd['vol30'] = sqrt(260)*xbtusd['ret'].rolling(30).std(ddof=0)

    #print (volList)

    #plt.plot(df.index, df['close'], label='price')
    plt.plot(xbtusd.index, xbtusd['vol10'], label='vol10')
    plt.plot(xbtusd.index, xbtusd['vol30'], label='vol30')
    #plt.plot(xbtusd['ret'])
    plt.ylabel('vol')
    plt.xlabel('Date')
    plt.legend(loc=0)

    plt.show()
def test_multiprocessing_safety(mongo_host, library_name):
    # Create/initialize library at the parent process, then spawn children, and start them aligned in time
    total_processes = 64
    total_writes_per_child = 100

    register_get_auth_hook(my_auth_hook)

    global MY_ARCTIC
    MY_ARCTIC = Arctic(mongo_host=mongo_host)

    MY_ARCTIC.initialize_library(library_name, VERSION_STORE)
    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)

    processes = [
        Process(target=f, args=(library_name, total_writes_per_child, True))
        for _ in range(total_processes)
    ]

    for p in processes:
        p.start()

    for p in processes:
        p.join()

    for p in processes:
        assert p.exitcode == 0

    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
Exemplo n.º 7
0
def sync():
    store = Arctic('localhost')
    store.initialize_library('Bitmex')
    library = store['Bitmex']
    df = get_candle_pandas()
    print (df)
    library.write('XBTUSD', df, metadata={'source': 'Bitmex'})
Exemplo n.º 8
0
class ArcticBinary:
    def __init__(self,
                 lib_name: str = _ARCTIC_BINARY_LIBRARY,
                 mongo_db: str = "auto"):
        """假定一个 instance 只操作一个 library
            mongo_db :
                "auto" 根据环境是否为 colab 自动选择  google 还是 local
                "google" 选择 google 的 mongo
                "intranet" 选择机房中的Mongo
        """
        # 这里 暂时先 hardcode arctic 所使用的 mongo 地址
        mongo_db_conn_str = get_mongo_admin_conn_str()
        if mongo_db == "google":
            mongo_db_conn_str = get_google_mongo_conn_str()
        elif mongo_db == "intranet":
            mongo_db_conn_str = get_intranet_mongo_conn_str()
        self._store = Arctic(mongo_db_conn_str)
        if not self._store.library_exists(lib_name):
            self._store.initialize_library(lib_name, VERSION_STORE)
        self._lib = self._store[lib_name]

    def write_bin_object(self, bin_data: bytes, symbol: str):
        self._lib.write(symbol, bin_data)

    def read_bin_object(self, symbol: str) -> bytes:
        return self._lib.read(symbol).data

    def has_symbol(self, symbol: str) -> bytes:
        return self._lib.has_symbol(symbol)
def test_multiprocessing_safety_parent_children_race(mongo_host, library_name):
    # Create Arctic and directly fork/start children (no wait)
    total_iterations = 12
    total_processes = 6
    total_writes_per_child = 20

    global MY_ARCTIC

    for i in range(total_iterations):
        processes = list()

        MY_ARCTIC = Arctic(mongo_host=mongo_host)
        for j in range(total_processes):
            p = Process(target=f,
                        args=(library_name, total_writes_per_child, False))
            p.start(
            )  # start directly, don't wait to create first all children procs
            processes.append(p)

        MY_ARCTIC.initialize_library(
            library_name, VERSION_STORE)  # this will unblock spinning children

        for p in processes:
            p.join()

        for p in processes:
            assert p.exitcode == 0

        MY_ARCTIC.reset()

    assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
Exemplo n.º 10
0
    def __init__(self, TYPE):
        """
        :param TYPE: 'BS', 'IS', 'CF'
        """

        ############ SETTING #############
        self.config = GetConfig()
        self.TYPE = TYPE  # 'BS', 'IS', 'CF'
        self.MONGO = self.config.MONGO
        self.CSV = self.config.CSV
        self.RAW = False
        self.outdir = self.config.fund_dir
        self.encode = self.config.encode
        self.proxypool = self.config.proxypool

        ############ CHANGE ABOVE SETTING #############

        if self.MONGO:
            from arctic import Arctic
            # mongod --dbpath D:/idwzx/project/arctic
            a = Arctic(self.config.ahost)
            a.initialize_library('ashare_{}'.format(self.TYPE))
            self.lib = a['ashare_{}'.format(self.TYPE)]

        self.result_dict = {}
Exemplo n.º 11
0
    def save_to_chunkstore_per_symbol(self):
        lib_name = "jy_equity_mkt_data"
        arctic_store = Arctic(get_mongo_admin_conn_str())
        arctic_store.delete_library(lib_name)
        arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store[lib_name]

        df = self.load_all_close_price()

        df2 = df.pivot_table(values="close_price",
                             index="t",
                             columns="o",
                             aggfunc=np.mean)
        df2.index.rename("date", inplace=True)

        i = 0
        for col in df2.columns:
            df3 = df2.loc[:, col]
            df3 = df3.dropna(axis=0)
            lib_chunk_store.write(col,
                                  df3,
                                  chunker=DateChunker(),
                                  chunk_size="D")
            i += 1
            if i % 2 == 0:
                print(f"{i}:{col}")
Exemplo n.º 12
0
    def __init__(self,
                 collection_name,
                 database_name=None,
                 host=None,
                 port=None):

        database_name, host, _port_not_used = mongo_defaults(db=database_name,
                                                             host=host,
                                                             port=port)

        # Arctic doesn't accept a port

        store = Arctic(host)
        library_name = database_name + "." + collection_name
        store.initialize_library(
            library_name)  # will this fail if already exists??
        library = store[library_name]

        self.database_name = database_name
        self.collection_name = collection_name
        self.host = host

        self.store = store
        self.library_name = library_name
        self.library = library
Exemplo n.º 13
0
class Database(object):
    def __init__(self, sym, exchange):
        self.counter = 0
        self.data = list()
        self.sym = sym
        self.tz = TIMEZONE
        self.exchange = exchange
        if RECORD_DATA:
            self.db = Arctic(MONGO_ENDPOINT)
            self.db.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE)
            self.collection = self.db[ARCTIC_NAME]
            print('\n%s is recording %s\n' % (self.exchange, self.sym))
        else:
            self.db = None
            self.collection = None

    def new_tick(self, msg):
        """
        If RECORD_DATA is TRUE, add streaming ticks to a list
        After the list has accumulated BATCH_SIZE ticks, insert batch into
        the Arctic Tick Store
        :param msg: incoming tick
        :return: void
        """
        if self.db is not None:
            self.counter += 1
            msg['index'] = dt.now(tz=self.tz)
            msg['system_time'] = str(msg['index'])
            self.data.append(msg)
            if self.counter % BATCH_SIZE == 0:
                print('%s added %i msgs to Arctic' % (self.sym, self.counter))
                self.collection.write(self.sym, self.data)
                self.counter = 0
                self.data.clear()
Exemplo n.º 14
0
class TickBlotter(AbstractBaseBlotter):
    def __init__(self, save_to_file: bool = True, host: str = 'localhost',
                 library: str = 'tick_blotter',
                 collection: str = 'test_blotter') -> None:
        self.db = Arctic(host)
        self.db.initialize_library(library, lib_type=TICK_STORE)
        self.store = self.db[library]
        self.collection = collection

    def write_to_file(self, data: Dict[str, Any]) -> None:
        data['index'] = pd.to_datetime(data['time'], utc=True)
        self.store.write(self.collection, [data])

    def save(self) -> None:
        data = []
        for d in self.blotter:
            d.update({'index': pd.to_datetime(d['time'], utc=True)})
            data.append(d)
        self.store.write(self.collection, data)

    def delete(self, querry: Dict) -> str:
        raise NotImplementedError

    def clear(self) -> str:
        raise NotImplementedError
Exemplo n.º 15
0
class Store:
    """
    freq in ['min', 'hour', 'day']
    """
    chunk_size = {'min': 'D', 'hour': 'D', 'day': 'M'}

    def __init__(self, library=default_library, db='localhost', **kwargs):
        self.conn = Arctic(db)

    def write(self, symbol, data, freq='min', what='TRADES'):
        lib_name = f'{what}_{freq}'
        if lib_name in self.conn.list_libraries():
            lib = self.conn[lib_name]
            lib.append(symbol, data)
        else:
            self.conn.initialize_library(lib_name)
            lib = self.conn[lib_name]
            lib.write(symbol,
                      data,
                      lib_type=CHUNK_STORE,
                      chunk_size=self.chunk_size['freq'])

    def read(self, symbol, freq='min', what='TRADES'):
        lib_name = f'{what}_{freq}'
        lib = self.conn[lib_name]
        return lib.read(symbol, **kwargs)

    def check_data_availability(
        self,
        symbol,
    ):
        pass
Exemplo n.º 16
0
class TimeSuiteWrite(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def setup(self, arg):
        self.store = Arctic("127.0.0.1")
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None

    def time_write_dataframe_random(self, idx):
        self.lib.write('df_bench_random', df_random[idx])

    def time_write_series_random(self, idx):
        self.lib.write('series_bench_random', s_random[idx])

    def time_write_dataframe_compressible(self, idx):
        self.lib.write('df_bench_compressible', df_compress[idx])

    def time_write_series_compressible(self, idx):
        self.lib.write('series_bench_compressible', s_compress[idx])
Exemplo n.º 17
0
class TimeSuiteWrite(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def setup(self, arg):
        self.store = Arctic("127.0.0.1")
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None

    def time_write_dataframe_random(self, idx):
        self.lib.write('df_bench_random', df_random[idx])

    def time_write_series_random(self, idx):
        self.lib.write('series_bench_random', s_random[idx])

    def time_write_dataframe_compressible(self, idx):
        self.lib.write('df_bench_compressible', df_compress[idx])

    def time_write_series_compressible(self, idx):
        self.lib.write('series_bench_compressible', s_compress[idx])
Exemplo n.º 18
0
 def initialize_db(self, lib):
     if lib:
         db = Arctic('localhost')
         if lib not in db.list_libraries():
             self.logger.info(
                 'Data library \'%s\' does not exist -- creating it', lib)
             db.initialize_library(lib, lib_type=TICK_STORE)
         self.dlib = lib
         self.dbconn = db[lib]
Exemplo n.º 19
0
def ingest_trades(filename: str, library: str, symbol: str) -> None:
    store = Arctic('localhost')
    logger.info(f"Saving to library: {library}, symbol: {symbol}")
    # Defaults to VersionStore
    store.initialize_library(library)
    library = store[library]
    df = pd.read_csv(filename, names=COLUMN_NAMES)
    df.time = pd.to_datetime(df.time, unit='s')
    library.write(symbol, df, metadata={'source': 'csv'})
class ArcticStoreDatabase(Database):
    def __init__(self):
        super().__init__()
        self.db = None
        self.store = None

    def open(self, store='chunkstore'):
        self.db = Arctic('localhost')
        try:
            self.store = self.db[store]
        except:
            self.db.initialize_library(store, lib_type=CHUNK_STORE)
            self.store = self.db[store]
            self.store._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024)

    def close(self):
        pass  #no need to close arctic connection

    def remove(self, key):
        self.store.delete(key)  #used for debugging

    def getMeatdata(self, key):
        return self.store.read_metadata(key)

    def setMetadata(self, key, metadata):
        self.store.write_metadata(key, metadata)

    def _save(self, key, data):
        if self.has_key(key):
            self.store.append(key, data)
        else:
            self.store.write(key, data, chunk_size=chunkSizes.get(key, 'M'))

    def get(self, key, start=None, end=None, iterator=False):
        if not iterator:
            return self.store.read(key,
                                   chunk_range=DateRange(
                                       start, end, CLOSED_CLOSED))
        else:
            return self.store.iterator(key,
                                       chunk_range=DateRange(
                                           start, end, CLOSED_CLOSED))

    def getLatestRow(self, key):
        latestDate = self.store.read_metadata(key)['end']
        return self.get(key, start=latestDate, end=None)

    def getFirstRow(self, key):
        firstDate = self.store.read_metadata(key)['start']
        return self.get(key, start=None, end=firstDate)

    def has_key(self, key):
        return self.store.has_symbol(key)

    def list_keys(self):
        return self.store.list_symbols()
Exemplo n.º 21
0
def insert_random_data(config, args, n_rows):
    store = Arctic(args.mongodb, app_name="benchmark")
    lib_name = lib_name_from_args(config)
    store.delete_library(lib_name)
    store.initialize_library(lib_name, segment='month')
    lib = store[lib_name]

    for sym in range(args.symbols):
        df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense)
        lib.write('sym' + str(sym), df)
Exemplo n.º 22
0
def insert_random_data(config, args, n_rows):
    store = Arctic(args.mongodb, app_name="benchmark")
    lib_name = lib_name_from_args(config)
    store.delete_library(lib_name)
    store.initialize_library(lib_name, segment='month')
    lib = store[lib_name]

    for sym in range(args.symbols):
        df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense)
        lib.write('sym' + str(sym), df)
Exemplo n.º 23
0
def _get_lib(lib_name: "lib name(str)" = "default",
             lib_type: "lib type" = VERSION_STORE):
    client = MongoClient(host=config.MONGO_HOST,
                         port=27017,
                         username=config.MONGO_USER,
                         password=config.MONGO_PWD,
                         authSource=config.MONGO_AUTHDB)

    a = Arctic(client)
    if not a.library_exists(lib_name):
        a.initialize_library(lib_name, lib_type=lib_type)
    return a[lib_name]
Exemplo n.º 24
0
def init():
    # Connect to Local MONGODB
    logger.info('init start')
    store = Arctic("192.168.1.117:27018")
    # Create the library - defaults to VersionStore
    store.initialize_library('bert2vec')
    # Access the library
    library = store['bert2vec']
    c = pymongo.MongoClient(host=myconfig.MONGO_HOST, port=myconfig.MONGO_PORT)
    db = c.bert2vec
    coll = db.guizhou
    # lib,conn,db,coll
    logger.info('init ok')
    return library, c, db, coll
Exemplo n.º 25
0
def init():
    # Connect to Local MONGODB
    logger.info('init start')
    store = Arctic('localhost')
    # Create the library - defaults to VersionStore
    store.initialize_library('bert2vec')
    # Access the library
    library = store['bert2vec']
    c = pymongo.MongoClient(host='localhost', port=27018)
    db = c.bert2vec
    coll = db.guizhou
    # lib,conn,db,coll
    logger.info('init ok')
    return library, c, db, coll
def main():
    parser = argparse.ArgumentParser(prog="store", description='Store data to DB')
    parser.add_argument('--host', help="MongoDB host", default=MONGO_HOST_DEFAULT, type=str)
    parser.add_argument('--updater', help="Updater", default=UPDATER_DEFAULT, type=str)
    parser.add_argument('-s', '--source', help="Source", default=SOURCE_DEFAULT, type=str)
    parser.add_argument('--symbols', help="Symbol", default=SYMBOLS_DEFAULT, type=str)
    parser.add_argument('--start', help="Start date", default='', type=str)
    parser.add_argument('--end', help="End date", default='', type=str)
    parser.add_argument('--freq', help="Freq", default='', type=str)
    parser.add_argument('--max_rows', help="max_rows", default=10, type=int)
    parser.add_argument('--max_columns', help="max_columns", default=6, type=int)
    parser.add_argument('--api_key', help="API key", default='', type=str)
    parser.add_argument('--expire_after', help="Cache expiration ('0': no cache, '-1': no expiration, 'HH:MM:SS.X': expiration duration)", default='24:00:00.0', type=str)
    args = parser.parse_args()

    pd.set_option('max_rows', args.max_rows)
    pd.set_option('expand_frame_repr', False)
    pd.set_option('max_columns', args.max_columns)

    if args.start != '':
        start = pd.to_datetime(args.start)
    else:
        start = None

    if args.end != '':
        end = pd.to_datetime(args.end)
    else:
        end = None

    if args.freq != '':
        freq = args.freq
    else:
        freq = None

    symbols = args.symbols.split(',')

    session = get_session(args.expire_after, 'cache')
    my_updater = updater(args.updater, session=session)
    if args.api_key != '':
        my_updater.set_credentials(api_key=args.api_key)

    store = Arctic(args.host)
    library_name = my_updater.library_name(args.source, freq)
    print(library_name)
    store.initialize_library(library_name)
    library = store[library_name]

    for symbol in symbols:
        update(library, my_updater, symbol, start, end, freq, args.source.lower())
Exemplo n.º 27
0
def read_candles():
    # Connect to Local MONGODB
    store = Arctic('localhost')

    # Create the library - defaults to VersionStore
    store.initialize_library('crypto')

    # Access the library
    library = store['crypto']
    # Reading the data
    item = library.read('XBTUSD')
    xbtusd = item.data

    for x in xbtusd:
        print(x)
Exemplo n.º 28
0
def write_candles():
    client = broker.get_client(exc.BITMEX)
    candles = client.trades_candle("XBTUSD", mex.candle_1d)
    candles.reverse()

    # Connect to Local MONGODB
    store = Arctic('localhost')

    # Create the library - defaults to VersionStore
    store.initialize_library('crypto')

    # Access the library
    library = store['crypto']

    library.write('XBTUSD', candles, metadata={'source': 'Bitmex'})
Exemplo n.º 29
0
def conn_database():
    username = oc.cfg['database']['username']
    password = oc.cfg['database']['password']
    url = oc.cfg['database']['url']
    port = oc.cfg['database']['port']
    auth_db = oc.cfg['database']['auth_db']

    name = urllib.parse.quote_plus(username)
    passw = urllib.parse.quote_plus(password)

    conn = Arctic('mongodb://%s:%s@%s:%s/%s' %
                  (name, passw, url, port, auth_db))
    conn.initialize_library('username.scratch')
    lib = conn['username.scratch']

    return lib
Exemplo n.º 30
0
    def convert_mkt_history_data(self):
        arctic_store = Arctic(get_mongo_admin_conn_str())
        lib_name = "jy_chn_equity_otvn_chunkstore"

        arctic_store.delete_library(lib_name)
        arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE)
        lib_chunk_store = arctic_store[lib_name]

        # 先 hardcode 日期范围,可以有更优雅的表达
        for i, t_period in enumerate([(date(1990, 1, 1), date(2000, 1, 15)),
                                      (date(2000, 1, 15), date(2010, 1, 15)),
                                      (date(2010, 1, 15), date(2020, 1, 1))]):
            # 测算下来,日频数据,用 "M" 作为 chunk_size 的写入和读取效率是综合最高的
            self._convert_period_equity_mkt_data_to_arctic(
                t_period[0], t_period[1], lib_chunk_store, "mkt_data", "M",
                i == 0)
Exemplo n.º 31
0
def write_to_db(target_building, iterator):
    '''write the data from a building'''

    conn = Arctic('localhost')

    #create a lib for the tgt_bldg, a lib is akin to a collection
    if target_building not in conn.list_libraries():
        conn.initialize_library(target_building, lib_type=CHUNK_STORE)
        print('library for %s created' % target_building)

    #connect to the lib for writing
    lib = conn[target_building]

    for sensor, timestamps, data in iterator:
        df = pd.DataFrame({'date': timestamps, 'data': data})
        df.set_index('date', inplace=True)
        lib.write(sensor, df)
    def __init__(self, database_name, collection_name, host = DEFAULT_MONGO_HOST):

        if database_name is None:
            database_name = DEFAULT_DB

        store = Arctic(host)
        library_name = database_name+"."+collection_name
        store.initialize_library(library_name) # will this fail if already exists??
        library = store[library_name]

        self.database_name = database_name
        self.collection_name = collection_name
        self.host = host

        self.store = store
        self.library_name = library_name
        self.library = library
Exemplo n.º 33
0
    def __init__(self, library='YAHOO', timeframe='1D'):

        self.library = library
        self.timeframe = timeframe

        # Connect to Local MONGODB
        store = Arctic('localhost')

        # Create the library - defaults to VersionStore
        libs = store.list_libraries()
        libName = library + '-' + timeframe
        if (libName) not in libs:
            store.initialize_library(libName)

        # Access the library
        self.lib = store[libName]
        self.tickers = self.lib.list_symbols()
        self.tickers.sort()
    def __init__(self,
                 database_name,
                 collection_name,
                 host=DEFAULT_MONGO_HOST):

        store = Arctic(host)
        library_name = database_name + "." + collection_name
        store.initialize_library(
            library_name)  # will this fail if already exists??
        library = store[library_name]

        self.database_name = database_name
        self.collection_name = collection_name
        self.host = host

        self.store = store
        self.library_name = library_name
        self.library = library
Exemplo n.º 35
0
def main():
    client = MongoClient()
    histData = client.HistoricalData
    #HistImporter(histData)
    #test200201 = monthToCollection(13)
    store = Arctic('localhost')
    store.initialize_library('HistTickStore')
    histlibrary = store['HistTickStore']
    data2 = histlibrary.read('2019-05')
    hashPrimitiveAndStore(inspect.getsource(realTimeUpdate), 0, 0)
    rtData = client.RealTimeData
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(getHistTickData("2001", "2001", histData))
    networkHashExample()
    # if not "HistoricalData" in client.list_database_names():

    if not "RealTimeData" in client.list_database_names():
        realTimeUpdate()
    return 1
Exemplo n.º 36
0
class TimeSuiteAppend(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def __init__(self):
        self.store = Arctic("127.0.0.1")

    def setup(self, idx):
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

        self.lib.write('test_df', df_random[idx])

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None 

    def time_append_dataframe(self, idx):
        self.lib.append('test_df', df_random[idx])
Exemplo n.º 37
0
class TimeSuiteAppend(object):
    params = list(range(len(TEST_SIZES)))
    param_names = ['5K * 10^']

    def __init__(self):
        self.store = Arctic("127.0.0.1")

    def setup(self, idx):
        self.store.delete_library('test.lib')
        self.store.initialize_library('test.lib')
        self.lib = self.store['test.lib']

        self.lib.write('test_df', df_random[idx])

    def teardown(self, arg):
        self.store.delete_library('test.lib')
        self.lib = None

    def time_append_dataframe(self, idx):
        self.lib.append('test_df', df_random[idx])
Exemplo n.º 38
0
pd.set_option('max_columns', 6)

from arctic_updater.updaters.truefx import TrueFXUpdater
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

my_updater = TrueFXUpdater()
symbol = 'EURUSD'
year, month = 2015, 11
%time df = my_updater._read_one_month(symbol, year, month)

# Arctic (MongoDB)
from arctic import Arctic
store = Arctic('localhost')
library_name = 'test'
store.initialize_library(library_name)
library = store[library_name]

%time library.write(symbol, df)
%time df_retrieved = library.read(symbol).data

# HDF5
filename = my_updater._filename(symbol, year, month, '.h5')
%time df.to_hdf(filename, "data", mode='w', complevel=0, complib='zlib', format='table')
%time df_retrieved = pd.read_hdf(filename)

# Make unique index
# http://stackoverflow.com/questions/34575126/create-a-dataframe-with-datetimeindex-with-unique-values-by-adding-a-timedelta/34576154#34576154
df = df.reset_index()

%time df['us'] =  (df['Date'].groupby((df['Date'] != df['Date'].shift()).cumsum()).cumcount()).values.astype('timedelta64[us]')
Exemplo n.º 39
0
    df = pd.DataFrame(collections.OrderedDict(sorted(data.items()))).T
    df = df.convert_objects(convert_numeric=True)
    return df


################################################
# VersionStore: Storing and updating stock data
################################################



arctic = Arctic('localhost')

# Create a VersionStore library
arctic.delete_library('jblackburn.stocks')
arctic.initialize_library('jblackburn.stocks')
arctic.list_libraries()


stocks = arctic['jblackburn.stocks']

# get some prices
aapl = get_stock_history('aapl', '2015-01-01', '2015-02-01')
aapl

# store them in the library
stocks.write('aapl', aapl, metadata={'source': 'YAHOO'})
stocks.read('aapl').data['Adj Close'].plot()
stocks.read('aapl').metadata
stocks.read('aapl').version
Exemplo n.º 40
0
    data = ystockquote.get_historical_prices(ticker, start_date, end_date)
    df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T
    df = df.convert_objects(convert_numeric=True)
    return df


################################################
# VersionStore: Storing and updating stock data
################################################


arctic = Arctic("localhost")

# Create a VersionStore library
arctic.delete_library("jblackburn.stocks")
arctic.initialize_library("jblackburn.stocks")
arctic.list_libraries()


stocks = arctic["jblackburn.stocks"]

# get some prices
aapl = get_stock_history("aapl", "2015-01-01", "2015-02-01")
aapl

# store them in the library
stocks.write("aapl", aapl, metadata={"source": "YAHOO"})
stocks.read("aapl").data["Adj Close"].plot()
stocks.read("aapl").metadata
stocks.read("aapl").version
Exemplo n.º 41
0
    def delete(self, query):
        """
        Simple delete method
        """
        self._collection.delete_one(query)


# Hook the class in for the type string 'CustomArcticLibType'
register_library_type(CustomArcticLibType._LIBRARY_TYPE, CustomArcticLibType)

# Create a Arctic instance pointed at a mongo host
store = Arctic(mongo_host)

### Initialize the library
# Map username.custom_lib -> CustomArcticLibType
store.initialize_library("username.custom_lib", CustomArcticLibType._LIBRARY_TYPE)

# Now pull our username.custom_lib ; note that it has the:
#   - query(...)
#   - store(...)
#   - delete(...)
# API we defined above
lib = store["username.custom_lib"]


# Store some items in the custom library type
lib.store(Stuff("thing", dt(2012, 1, 1), object()))
lib.store(Stuff("thing2", dt(2013, 1, 1), object()))
lib.store(Stuff("thing3", dt(2014, 1, 1), object()))
lib.store(Stuff(["a", "b", "c"], dt(2014, 1, 1), object()))
Exemplo n.º 42
0
def get_stock_history(ticker, start_date, end_date):
    data = ystockquote.get_historical_prices(ticker, start_date, end_date)
    df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T
    df = df.convert_objects(convert_numeric=True)
    return df


################################################
# VersionStore: Storing and updating stock data
################################################

arctic = Arctic('localhost')
lst = arctic.list_libraries()
if 'KRX' not in lst:
	arctic.initialize_library('KRX')
	

#################################
# Dealing with lots of data
#################################


#NSYE library
lib = arctic['KRX']

def load_all_stock_history_KRX():
    # 
    krx = pd.read_csv('krx_market_symbols.csv', dtype=object)
    stocks = [x for x in krx['code_yahoo']]
    print(len(stocks), " symbols")
Exemplo n.º 43
0
from arctic import Arctic 

def download_daily_bars(instrument, start, end):
	bars = get_historical_price(instrument,start,end)
	dump = json.dumps(bars, ensure_ascii=False).encode('utf-8')
	df = pd.read_json(dump)
	print(df.head())
	if df.empty:
		return df
	df = df.set_index('Date')
	return df

store = Arctic('localhost')
if 'KRX_G' not in store.list_libraries():
	store.initialize_library('KRX_G')

lib = store['KRX_G']
krx = pd.read_csv('krx_market_symbols.csv', dtype=object)
stocks = [x for x in krx['code_google']]
print(len(stocks), " symbols")
begin = datetime.date(2000,1,1)
end = datetime.date(2016,5,30)

missing = ['KRX:152550']

#for i, stock in enumerate(stocks[160:769]):
for i, stock in enumerate(missing):
	print("%d th code=%s" % (i, stock))
	now = datetime.datetime.now()
	df = download_daily_bars(stock,begin,end)
Exemplo n.º 44
0
from arctic import Arctic

# Connect to Local MONGODB
store = Arctic('localhost')

# Create the library - defaults to VersionStore
store.initialize_library('NASDAQ')

# Access the library
library = store['NASDAQ']

# Load some data - maybe from Quandl
import quandl
quandl.ApiConfig.api_key = "Cxzxjy2eHKXgwMjVFhbH"
aapl = quandl.get("GOOG/NASDAQ_AAPL")

# Store the data in the library
library.write('AAPL', aapl, metadata={'source': 'Quandl'})

# Reading the data
item = library.read('AAPL')
aapl = item.data
metadata = item.metadata

Exemplo n.º 45
0
from datetime import datetime as dt

import pandas as pd

from arctic import Arctic

# Connect to the mongo-host / cluster
store = Arctic(mongo_host)

# Data is grouped into 'libraries'.
# Users may have one or more named libraries:
store.list_libraries()

# Create a library
store.initialize_library('username.scratch')

# Get a library
# library = m['username.<library>']
library = store['username.scratch']

# Store some data in the library
df = pd.DataFrame({'prices': [1, 2, 3]},
                  [dt(2014, 1, 1), dt(2014, 1, 2), dt(2014, 1, 3)])
library.write('SYMBOL', df)

# Read some data from the library
# (Note the returned object has an associated version number and metadata.)
library.read('SYMBOL')

# Store some data into the library
Exemplo n.º 46
0
class MongoDB(object):
    # to start server - cd to "C:\Program Files\MongoDB\Server\4.0\bin"
    #   then use mongod --dbpath "path to db" when starting database server to point to the correct save location

    # use dataframe.to_dict('records') to convert to dicts to insert to DB. The DB is stored in mongoDB software

    # use Arctic from AHL to store time series data. It uses multiIndex pd.Dataframe to store, and other data goes
    #   into meta data as a dict. It will open its own server, so no need to run

    # https://www.mongodb.com/json-and-bson
    # https://stackoverflow.com/questions/20796714/how-do-i-start-mongo-db-from-windows

    # To back up database, navigate to bin folder of mongodb, then run "mongodump --out ..." where ... is the output
    #   folder to dump
    # To load into another base, navigate to bin folder again, and use "mongorestore --drop ..." where ... is the
    #   folder where database backup folders where dumped
    # MUST RUN CMD AS ADMINISTRATOR!!!!!!!

    def __init__(self):

        self.conn = Arctic("127.0.0.1")

    def verify_library(self, library):
        """ Method to test if library exists in database"""

        libs = self.conn.list_libraries()
        if library in libs:
            return
        # if database doesnt exist, alert and ask if we should create
        else:
            print('"%s" library does not exist in server data path' % library)
            create = input('create "%s"? (y, n)' % library)
            if create.lower() == 'y':
                self.conn.initialize_library(library)
            else:
                return

    @staticmethod
    def getMultiIndex(data, index, columns=None):
        """
        Method to convert dataframe to MultiIndex

        data: pd.DataFrame
            data to be MultiInexed
        index: list
            list of strings containg names of columns to be used as MultiIndex (in order left to right)

        columns:
        """

        data = data.set_index(index)
        return data

    @timeMe
    def save(self, data, library, meta_data={}, append=True):
        """
        Method to save to mongodb library using arctic database.
        
        Parameters
        ----------
        
            data: pd.DataFrame (MultiIndex)
                DataFrame containing data for database in MultiIndex structure.
                Outer index must be the index used for the arctic "symbol" in the "library"
            library: str
                which library to write to
            meta_data: dict
                Dictionary of meta data values to inlcude in save.
                If not provided will maintain existing metadata in database or set to None.
                Keys match "symbol" from data outer index.
            append: bool
                True will append data to existing data for each "symbol" in database
                False will replace data entirely

        """

        self.verify_library(library)

        if isinstance(data, pd.DataFrame):

            # get unique symbols to write
            indicies = list(data.index.unique())
            # take first index as "symbol" for database
            symbols = indicies if isinstance(indicies[0], str) else np.unique([x[0] for x in indicies])
            symbol_list = self.conn[library].list_symbols()

            # fill in blank metadata for symbols not in metadata
            noMeta = np.array(symbols)[~np.in1d(symbols, list(meta_data.keys()))]
            meta_data.update({x: None for x in noMeta})

            for sym in symbols:
                data_cut = data.loc[sym]
                data_cut = pd.DataFrame(data_cut).T if isinstance(data_cut, pd.Series) else data_cut

                if sym in symbol_list:
                    # get current data for symbol
                    db = self.conn[library].read(sym)
                    db_data = db.data
                    # if meta data is present in database but is None in metadata fed in then take from database
                    meta_data[sym] = db.metadata

                # if the symbol is already in the database and we want to "update" the series rather than simply replace
                if sym in symbol_list and append:
                    data_post = pd.concat([db_data, data_cut], axis=0)
                    # remove duplicates and keep latest data
                    if len(data_post.index.unique()) == 1:
                        data_post = data_post.drop_duplicates(keep='last')
                    else:
                        data_post = data_post.groupby(data_post.index).last()
                else:
                    data_post = data_cut

                # sort the data by the indicies
                data_post = data_post.sort_index()

                self.conn[library].write(sym, data_post, metadata=meta_data[sym])

    @timeMe
    def read(self, library, symbols=None):
        """
        Method to read from mongodb library using arctic database.
        
        Parameters
        ----------
            library: str
                which library to write to  
            symbols: 
                list of "symbols" in "library" to read
            
        """

        if symbols is None:
            symbols = self.conn[library].list_symbols()

        data = {}
        for sym in symbols:
            db = self.conn[library].read(sym)
            data[sym] = {}
            data[sym]['data'] = db.data
            if db.metadata is not None:
                data[sym].update(db.metadata)

        return data