def test_multiprocessing_safety_parent_children_race(mongo_host, library_name): # Create Arctic and directly fork/start children (no wait) total_iterations = 12 total_processes = 6 total_writes_per_child = 20 global MY_ARCTIC for i in range(total_iterations): processes = list() MY_ARCTIC = Arctic(mongo_host=mongo_host) for j in range(total_processes): p = Process(target=f, args=(library_name, total_writes_per_child, False)) p.start() # start directly, don't wait to create first all children procs processes.append(p) MY_ARCTIC.initialize_library(library_name, VERSION_STORE) # this will unblock spinning children for p in processes: p.join() for p in processes: assert p.exitcode == 0 MY_ARCTIC.reset() assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
def test_multiprocessing_safety(mongo_host, library_name): # Create/initialize library at the parent process, then spawn children, and start them aligned in time total_processes = 64 total_writes_per_child = 100 register_get_auth_hook(my_auth_hook) global MY_ARCTIC MY_ARCTIC = Arctic(mongo_host=mongo_host) MY_ARCTIC.initialize_library(library_name, VERSION_STORE) assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore) processes = [Process(target=f, args=(library_name, total_writes_per_child, True)) for _ in range(total_processes)] for p in processes: p.start() for p in processes: p.join() for p in processes: assert p.exitcode == 0 assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
class ArcticSaver(AbstractBaseSaver): """ Serializer for Arctic VersionStore. """ def __init__(self, host: str = 'localhost', library: str = 'test_log', note: str = '') -> None: """ Library given at init, collection determined by self.name_str. """ self.host = host self.library = library self.db = Arctic(host) self.db.initialize_library(library) self.store = self.db[library] super().__init__(note) def save(self, df: pd.DataFrame, what: str, contract_str: str, note: str = '') -> None: self.store.write(self.name_str(what, contract_str), df) def keys(self) -> List[str]: return self.store.list_symbols() def read(self, key: str) -> pd.DataFrame: return self.store.read(key) def __str__(self): return (f'ArcticSaver(host={self.host}, library={self.library}, ' f'note={self.note})')
class Database(object): def __init__(self, sym, exchange): self.counter = 0 self.data = list() self.sym = sym self.tz = tz.utc self.exchange = exchange if configs.RECORD_DATA: self.db = Arctic(configs.MONGO_ENDPOINT) self.db.initialize_library(configs.ARCTIC_NAME, lib_type=TICK_STORE) self.collection = self.db[configs.ARCTIC_NAME] print('%s is recording %s\n' % (self.exchange, self.sym)) else: self.db = None self.collection = None def new_tick(self, msg): if self.db is not None: self.counter += 1 msg['index'] = dt.now(tz=self.tz) msg['system_time'] = str(msg['index']) self.data.append(msg) if self.counter % configs.BATCH_SIZE == 0: print('%s added %i msgs to Arctic' % (self.sym, self.counter)) self.collection.write(self.sym, self.data) self.counter = 0 self.data.clear()
def show(): # Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('Bitmex') # Access the library library = store['Bitmex'] #library.write('XBTUSD', df, metadata={'source': 'Bitmex'}) # Reading the data item = library.read('XBTUSD') xbtusd = item.data metadata = item.metadata print (xbtusd) print (metadata) xbtusd['ret'] = -1+xbtusd['close']/xbtusd['close'].shift(1) from math import sqrt xbtusd['vol10'] = sqrt(260)*xbtusd['ret'].rolling(10).std(ddof=0) xbtusd['vol30'] = sqrt(260)*xbtusd['ret'].rolling(30).std(ddof=0) #print (volList) #plt.plot(df.index, df['close'], label='price') plt.plot(xbtusd.index, xbtusd['vol10'], label='vol10') plt.plot(xbtusd.index, xbtusd['vol30'], label='vol30') #plt.plot(xbtusd['ret']) plt.ylabel('vol') plt.xlabel('Date') plt.legend(loc=0) plt.show()
def test_multiprocessing_safety(mongo_host, library_name): # Create/initialize library at the parent process, then spawn children, and start them aligned in time total_processes = 64 total_writes_per_child = 100 register_get_auth_hook(my_auth_hook) global MY_ARCTIC MY_ARCTIC = Arctic(mongo_host=mongo_host) MY_ARCTIC.initialize_library(library_name, VERSION_STORE) assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore) processes = [ Process(target=f, args=(library_name, total_writes_per_child, True)) for _ in range(total_processes) ] for p in processes: p.start() for p in processes: p.join() for p in processes: assert p.exitcode == 0 assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
def sync(): store = Arctic('localhost') store.initialize_library('Bitmex') library = store['Bitmex'] df = get_candle_pandas() print (df) library.write('XBTUSD', df, metadata={'source': 'Bitmex'})
class ArcticBinary: def __init__(self, lib_name: str = _ARCTIC_BINARY_LIBRARY, mongo_db: str = "auto"): """假定一个 instance 只操作一个 library mongo_db : "auto" 根据环境是否为 colab 自动选择 google 还是 local "google" 选择 google 的 mongo "intranet" 选择机房中的Mongo """ # 这里 暂时先 hardcode arctic 所使用的 mongo 地址 mongo_db_conn_str = get_mongo_admin_conn_str() if mongo_db == "google": mongo_db_conn_str = get_google_mongo_conn_str() elif mongo_db == "intranet": mongo_db_conn_str = get_intranet_mongo_conn_str() self._store = Arctic(mongo_db_conn_str) if not self._store.library_exists(lib_name): self._store.initialize_library(lib_name, VERSION_STORE) self._lib = self._store[lib_name] def write_bin_object(self, bin_data: bytes, symbol: str): self._lib.write(symbol, bin_data) def read_bin_object(self, symbol: str) -> bytes: return self._lib.read(symbol).data def has_symbol(self, symbol: str) -> bytes: return self._lib.has_symbol(symbol)
def test_multiprocessing_safety_parent_children_race(mongo_host, library_name): # Create Arctic and directly fork/start children (no wait) total_iterations = 12 total_processes = 6 total_writes_per_child = 20 global MY_ARCTIC for i in range(total_iterations): processes = list() MY_ARCTIC = Arctic(mongo_host=mongo_host) for j in range(total_processes): p = Process(target=f, args=(library_name, total_writes_per_child, False)) p.start( ) # start directly, don't wait to create first all children procs processes.append(p) MY_ARCTIC.initialize_library( library_name, VERSION_STORE) # this will unblock spinning children for p in processes: p.join() for p in processes: assert p.exitcode == 0 MY_ARCTIC.reset() assert isinstance(MY_ARCTIC.get_library(library_name), VersionStore)
def __init__(self, TYPE): """ :param TYPE: 'BS', 'IS', 'CF' """ ############ SETTING ############# self.config = GetConfig() self.TYPE = TYPE # 'BS', 'IS', 'CF' self.MONGO = self.config.MONGO self.CSV = self.config.CSV self.RAW = False self.outdir = self.config.fund_dir self.encode = self.config.encode self.proxypool = self.config.proxypool ############ CHANGE ABOVE SETTING ############# if self.MONGO: from arctic import Arctic # mongod --dbpath D:/idwzx/project/arctic a = Arctic(self.config.ahost) a.initialize_library('ashare_{}'.format(self.TYPE)) self.lib = a['ashare_{}'.format(self.TYPE)] self.result_dict = {}
def save_to_chunkstore_per_symbol(self): lib_name = "jy_equity_mkt_data" arctic_store = Arctic(get_mongo_admin_conn_str()) arctic_store.delete_library(lib_name) arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE) lib_chunk_store = arctic_store[lib_name] df = self.load_all_close_price() df2 = df.pivot_table(values="close_price", index="t", columns="o", aggfunc=np.mean) df2.index.rename("date", inplace=True) i = 0 for col in df2.columns: df3 = df2.loc[:, col] df3 = df3.dropna(axis=0) lib_chunk_store.write(col, df3, chunker=DateChunker(), chunk_size="D") i += 1 if i % 2 == 0: print(f"{i}:{col}")
def __init__(self, collection_name, database_name=None, host=None, port=None): database_name, host, _port_not_used = mongo_defaults(db=database_name, host=host, port=port) # Arctic doesn't accept a port store = Arctic(host) library_name = database_name + "." + collection_name store.initialize_library( library_name) # will this fail if already exists?? library = store[library_name] self.database_name = database_name self.collection_name = collection_name self.host = host self.store = store self.library_name = library_name self.library = library
class Database(object): def __init__(self, sym, exchange): self.counter = 0 self.data = list() self.sym = sym self.tz = TIMEZONE self.exchange = exchange if RECORD_DATA: self.db = Arctic(MONGO_ENDPOINT) self.db.initialize_library(ARCTIC_NAME, lib_type=TICK_STORE) self.collection = self.db[ARCTIC_NAME] print('\n%s is recording %s\n' % (self.exchange, self.sym)) else: self.db = None self.collection = None def new_tick(self, msg): """ If RECORD_DATA is TRUE, add streaming ticks to a list After the list has accumulated BATCH_SIZE ticks, insert batch into the Arctic Tick Store :param msg: incoming tick :return: void """ if self.db is not None: self.counter += 1 msg['index'] = dt.now(tz=self.tz) msg['system_time'] = str(msg['index']) self.data.append(msg) if self.counter % BATCH_SIZE == 0: print('%s added %i msgs to Arctic' % (self.sym, self.counter)) self.collection.write(self.sym, self.data) self.counter = 0 self.data.clear()
class TickBlotter(AbstractBaseBlotter): def __init__(self, save_to_file: bool = True, host: str = 'localhost', library: str = 'tick_blotter', collection: str = 'test_blotter') -> None: self.db = Arctic(host) self.db.initialize_library(library, lib_type=TICK_STORE) self.store = self.db[library] self.collection = collection def write_to_file(self, data: Dict[str, Any]) -> None: data['index'] = pd.to_datetime(data['time'], utc=True) self.store.write(self.collection, [data]) def save(self) -> None: data = [] for d in self.blotter: d.update({'index': pd.to_datetime(d['time'], utc=True)}) data.append(d) self.store.write(self.collection, data) def delete(self, querry: Dict) -> str: raise NotImplementedError def clear(self) -> str: raise NotImplementedError
class Store: """ freq in ['min', 'hour', 'day'] """ chunk_size = {'min': 'D', 'hour': 'D', 'day': 'M'} def __init__(self, library=default_library, db='localhost', **kwargs): self.conn = Arctic(db) def write(self, symbol, data, freq='min', what='TRADES'): lib_name = f'{what}_{freq}' if lib_name in self.conn.list_libraries(): lib = self.conn[lib_name] lib.append(symbol, data) else: self.conn.initialize_library(lib_name) lib = self.conn[lib_name] lib.write(symbol, data, lib_type=CHUNK_STORE, chunk_size=self.chunk_size['freq']) def read(self, symbol, freq='min', what='TRADES'): lib_name = f'{what}_{freq}' lib = self.conn[lib_name] return lib.read(symbol, **kwargs) def check_data_availability( self, symbol, ): pass
class TimeSuiteWrite(object): params = list(range(len(TEST_SIZES))) param_names = ['5K * 10^'] def setup(self, arg): self.store = Arctic("127.0.0.1") self.store.delete_library('test.lib') self.store.initialize_library('test.lib') self.lib = self.store['test.lib'] def teardown(self, arg): self.store.delete_library('test.lib') self.lib = None def time_write_dataframe_random(self, idx): self.lib.write('df_bench_random', df_random[idx]) def time_write_series_random(self, idx): self.lib.write('series_bench_random', s_random[idx]) def time_write_dataframe_compressible(self, idx): self.lib.write('df_bench_compressible', df_compress[idx]) def time_write_series_compressible(self, idx): self.lib.write('series_bench_compressible', s_compress[idx])
def initialize_db(self, lib): if lib: db = Arctic('localhost') if lib not in db.list_libraries(): self.logger.info( 'Data library \'%s\' does not exist -- creating it', lib) db.initialize_library(lib, lib_type=TICK_STORE) self.dlib = lib self.dbconn = db[lib]
def ingest_trades(filename: str, library: str, symbol: str) -> None: store = Arctic('localhost') logger.info(f"Saving to library: {library}, symbol: {symbol}") # Defaults to VersionStore store.initialize_library(library) library = store[library] df = pd.read_csv(filename, names=COLUMN_NAMES) df.time = pd.to_datetime(df.time, unit='s') library.write(symbol, df, metadata={'source': 'csv'})
class ArcticStoreDatabase(Database): def __init__(self): super().__init__() self.db = None self.store = None def open(self, store='chunkstore'): self.db = Arctic('localhost') try: self.store = self.db[store] except: self.db.initialize_library(store, lib_type=CHUNK_STORE) self.store = self.db[store] self.store._arctic_lib.set_quota(maxDBStorage * 1024 * 1024 * 1024) def close(self): pass #no need to close arctic connection def remove(self, key): self.store.delete(key) #used for debugging def getMeatdata(self, key): return self.store.read_metadata(key) def setMetadata(self, key, metadata): self.store.write_metadata(key, metadata) def _save(self, key, data): if self.has_key(key): self.store.append(key, data) else: self.store.write(key, data, chunk_size=chunkSizes.get(key, 'M')) def get(self, key, start=None, end=None, iterator=False): if not iterator: return self.store.read(key, chunk_range=DateRange( start, end, CLOSED_CLOSED)) else: return self.store.iterator(key, chunk_range=DateRange( start, end, CLOSED_CLOSED)) def getLatestRow(self, key): latestDate = self.store.read_metadata(key)['end'] return self.get(key, start=latestDate, end=None) def getFirstRow(self, key): firstDate = self.store.read_metadata(key)['start'] return self.get(key, start=None, end=firstDate) def has_key(self, key): return self.store.has_symbol(key) def list_keys(self): return self.store.list_symbols()
def insert_random_data(config, args, n_rows): store = Arctic(args.mongodb, app_name="benchmark") lib_name = lib_name_from_args(config) store.delete_library(lib_name) store.initialize_library(lib_name, segment='month') lib = store[lib_name] for sym in range(args.symbols): df = gen_oneminute_dataset(n_row=n_rows, n_col=n_rows, dense=args.dense) lib.write('sym' + str(sym), df)
def _get_lib(lib_name: "lib name(str)" = "default", lib_type: "lib type" = VERSION_STORE): client = MongoClient(host=config.MONGO_HOST, port=27017, username=config.MONGO_USER, password=config.MONGO_PWD, authSource=config.MONGO_AUTHDB) a = Arctic(client) if not a.library_exists(lib_name): a.initialize_library(lib_name, lib_type=lib_type) return a[lib_name]
def init(): # Connect to Local MONGODB logger.info('init start') store = Arctic("192.168.1.117:27018") # Create the library - defaults to VersionStore store.initialize_library('bert2vec') # Access the library library = store['bert2vec'] c = pymongo.MongoClient(host=myconfig.MONGO_HOST, port=myconfig.MONGO_PORT) db = c.bert2vec coll = db.guizhou # lib,conn,db,coll logger.info('init ok') return library, c, db, coll
def init(): # Connect to Local MONGODB logger.info('init start') store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('bert2vec') # Access the library library = store['bert2vec'] c = pymongo.MongoClient(host='localhost', port=27018) db = c.bert2vec coll = db.guizhou # lib,conn,db,coll logger.info('init ok') return library, c, db, coll
def main(): parser = argparse.ArgumentParser(prog="store", description='Store data to DB') parser.add_argument('--host', help="MongoDB host", default=MONGO_HOST_DEFAULT, type=str) parser.add_argument('--updater', help="Updater", default=UPDATER_DEFAULT, type=str) parser.add_argument('-s', '--source', help="Source", default=SOURCE_DEFAULT, type=str) parser.add_argument('--symbols', help="Symbol", default=SYMBOLS_DEFAULT, type=str) parser.add_argument('--start', help="Start date", default='', type=str) parser.add_argument('--end', help="End date", default='', type=str) parser.add_argument('--freq', help="Freq", default='', type=str) parser.add_argument('--max_rows', help="max_rows", default=10, type=int) parser.add_argument('--max_columns', help="max_columns", default=6, type=int) parser.add_argument('--api_key', help="API key", default='', type=str) parser.add_argument('--expire_after', help="Cache expiration ('0': no cache, '-1': no expiration, 'HH:MM:SS.X': expiration duration)", default='24:00:00.0', type=str) args = parser.parse_args() pd.set_option('max_rows', args.max_rows) pd.set_option('expand_frame_repr', False) pd.set_option('max_columns', args.max_columns) if args.start != '': start = pd.to_datetime(args.start) else: start = None if args.end != '': end = pd.to_datetime(args.end) else: end = None if args.freq != '': freq = args.freq else: freq = None symbols = args.symbols.split(',') session = get_session(args.expire_after, 'cache') my_updater = updater(args.updater, session=session) if args.api_key != '': my_updater.set_credentials(api_key=args.api_key) store = Arctic(args.host) library_name = my_updater.library_name(args.source, freq) print(library_name) store.initialize_library(library_name) library = store[library_name] for symbol in symbols: update(library, my_updater, symbol, start, end, freq, args.source.lower())
def read_candles(): # Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('crypto') # Access the library library = store['crypto'] # Reading the data item = library.read('XBTUSD') xbtusd = item.data for x in xbtusd: print(x)
def write_candles(): client = broker.get_client(exc.BITMEX) candles = client.trades_candle("XBTUSD", mex.candle_1d) candles.reverse() # Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('crypto') # Access the library library = store['crypto'] library.write('XBTUSD', candles, metadata={'source': 'Bitmex'})
def conn_database(): username = oc.cfg['database']['username'] password = oc.cfg['database']['password'] url = oc.cfg['database']['url'] port = oc.cfg['database']['port'] auth_db = oc.cfg['database']['auth_db'] name = urllib.parse.quote_plus(username) passw = urllib.parse.quote_plus(password) conn = Arctic('mongodb://%s:%s@%s:%s/%s' % (name, passw, url, port, auth_db)) conn.initialize_library('username.scratch') lib = conn['username.scratch'] return lib
def convert_mkt_history_data(self): arctic_store = Arctic(get_mongo_admin_conn_str()) lib_name = "jy_chn_equity_otvn_chunkstore" arctic_store.delete_library(lib_name) arctic_store.initialize_library(lib_name, lib_type=CHUNK_STORE) lib_chunk_store = arctic_store[lib_name] # 先 hardcode 日期范围,可以有更优雅的表达 for i, t_period in enumerate([(date(1990, 1, 1), date(2000, 1, 15)), (date(2000, 1, 15), date(2010, 1, 15)), (date(2010, 1, 15), date(2020, 1, 1))]): # 测算下来,日频数据,用 "M" 作为 chunk_size 的写入和读取效率是综合最高的 self._convert_period_equity_mkt_data_to_arctic( t_period[0], t_period[1], lib_chunk_store, "mkt_data", "M", i == 0)
def write_to_db(target_building, iterator): '''write the data from a building''' conn = Arctic('localhost') #create a lib for the tgt_bldg, a lib is akin to a collection if target_building not in conn.list_libraries(): conn.initialize_library(target_building, lib_type=CHUNK_STORE) print('library for %s created' % target_building) #connect to the lib for writing lib = conn[target_building] for sensor, timestamps, data in iterator: df = pd.DataFrame({'date': timestamps, 'data': data}) df.set_index('date', inplace=True) lib.write(sensor, df)
def __init__(self, database_name, collection_name, host = DEFAULT_MONGO_HOST): if database_name is None: database_name = DEFAULT_DB store = Arctic(host) library_name = database_name+"."+collection_name store.initialize_library(library_name) # will this fail if already exists?? library = store[library_name] self.database_name = database_name self.collection_name = collection_name self.host = host self.store = store self.library_name = library_name self.library = library
def __init__(self, library='YAHOO', timeframe='1D'): self.library = library self.timeframe = timeframe # Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore libs = store.list_libraries() libName = library + '-' + timeframe if (libName) not in libs: store.initialize_library(libName) # Access the library self.lib = store[libName] self.tickers = self.lib.list_symbols() self.tickers.sort()
def __init__(self, database_name, collection_name, host=DEFAULT_MONGO_HOST): store = Arctic(host) library_name = database_name + "." + collection_name store.initialize_library( library_name) # will this fail if already exists?? library = store[library_name] self.database_name = database_name self.collection_name = collection_name self.host = host self.store = store self.library_name = library_name self.library = library
def main(): client = MongoClient() histData = client.HistoricalData #HistImporter(histData) #test200201 = monthToCollection(13) store = Arctic('localhost') store.initialize_library('HistTickStore') histlibrary = store['HistTickStore'] data2 = histlibrary.read('2019-05') hashPrimitiveAndStore(inspect.getsource(realTimeUpdate), 0, 0) rtData = client.RealTimeData pp = pprint.PrettyPrinter(indent=4) pp.pprint(getHistTickData("2001", "2001", histData)) networkHashExample() # if not "HistoricalData" in client.list_database_names(): if not "RealTimeData" in client.list_database_names(): realTimeUpdate() return 1
class TimeSuiteAppend(object): params = list(range(len(TEST_SIZES))) param_names = ['5K * 10^'] def __init__(self): self.store = Arctic("127.0.0.1") def setup(self, idx): self.store.delete_library('test.lib') self.store.initialize_library('test.lib') self.lib = self.store['test.lib'] self.lib.write('test_df', df_random[idx]) def teardown(self, arg): self.store.delete_library('test.lib') self.lib = None def time_append_dataframe(self, idx): self.lib.append('test_df', df_random[idx])
pd.set_option('max_columns', 6) from arctic_updater.updaters.truefx import TrueFXUpdater logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) my_updater = TrueFXUpdater() symbol = 'EURUSD' year, month = 2015, 11 %time df = my_updater._read_one_month(symbol, year, month) # Arctic (MongoDB) from arctic import Arctic store = Arctic('localhost') library_name = 'test' store.initialize_library(library_name) library = store[library_name] %time library.write(symbol, df) %time df_retrieved = library.read(symbol).data # HDF5 filename = my_updater._filename(symbol, year, month, '.h5') %time df.to_hdf(filename, "data", mode='w', complevel=0, complib='zlib', format='table') %time df_retrieved = pd.read_hdf(filename) # Make unique index # http://stackoverflow.com/questions/34575126/create-a-dataframe-with-datetimeindex-with-unique-values-by-adding-a-timedelta/34576154#34576154 df = df.reset_index() %time df['us'] = (df['Date'].groupby((df['Date'] != df['Date'].shift()).cumsum()).cumcount()).values.astype('timedelta64[us]')
df = pd.DataFrame(collections.OrderedDict(sorted(data.items()))).T df = df.convert_objects(convert_numeric=True) return df ################################################ # VersionStore: Storing and updating stock data ################################################ arctic = Arctic('localhost') # Create a VersionStore library arctic.delete_library('jblackburn.stocks') arctic.initialize_library('jblackburn.stocks') arctic.list_libraries() stocks = arctic['jblackburn.stocks'] # get some prices aapl = get_stock_history('aapl', '2015-01-01', '2015-02-01') aapl # store them in the library stocks.write('aapl', aapl, metadata={'source': 'YAHOO'}) stocks.read('aapl').data['Adj Close'].plot() stocks.read('aapl').metadata stocks.read('aapl').version
data = ystockquote.get_historical_prices(ticker, start_date, end_date) df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T df = df.convert_objects(convert_numeric=True) return df ################################################ # VersionStore: Storing and updating stock data ################################################ arctic = Arctic("localhost") # Create a VersionStore library arctic.delete_library("jblackburn.stocks") arctic.initialize_library("jblackburn.stocks") arctic.list_libraries() stocks = arctic["jblackburn.stocks"] # get some prices aapl = get_stock_history("aapl", "2015-01-01", "2015-02-01") aapl # store them in the library stocks.write("aapl", aapl, metadata={"source": "YAHOO"}) stocks.read("aapl").data["Adj Close"].plot() stocks.read("aapl").metadata stocks.read("aapl").version
def delete(self, query): """ Simple delete method """ self._collection.delete_one(query) # Hook the class in for the type string 'CustomArcticLibType' register_library_type(CustomArcticLibType._LIBRARY_TYPE, CustomArcticLibType) # Create a Arctic instance pointed at a mongo host store = Arctic(mongo_host) ### Initialize the library # Map username.custom_lib -> CustomArcticLibType store.initialize_library("username.custom_lib", CustomArcticLibType._LIBRARY_TYPE) # Now pull our username.custom_lib ; note that it has the: # - query(...) # - store(...) # - delete(...) # API we defined above lib = store["username.custom_lib"] # Store some items in the custom library type lib.store(Stuff("thing", dt(2012, 1, 1), object())) lib.store(Stuff("thing2", dt(2013, 1, 1), object())) lib.store(Stuff("thing3", dt(2014, 1, 1), object())) lib.store(Stuff(["a", "b", "c"], dt(2014, 1, 1), object()))
def get_stock_history(ticker, start_date, end_date): data = ystockquote.get_historical_prices(ticker, start_date, end_date) df = pandas.DataFrame(collections.OrderedDict(sorted(data.items()))).T df = df.convert_objects(convert_numeric=True) return df ################################################ # VersionStore: Storing and updating stock data ################################################ arctic = Arctic('localhost') lst = arctic.list_libraries() if 'KRX' not in lst: arctic.initialize_library('KRX') ################################# # Dealing with lots of data ################################# #NSYE library lib = arctic['KRX'] def load_all_stock_history_KRX(): # krx = pd.read_csv('krx_market_symbols.csv', dtype=object) stocks = [x for x in krx['code_yahoo']] print(len(stocks), " symbols")
from arctic import Arctic def download_daily_bars(instrument, start, end): bars = get_historical_price(instrument,start,end) dump = json.dumps(bars, ensure_ascii=False).encode('utf-8') df = pd.read_json(dump) print(df.head()) if df.empty: return df df = df.set_index('Date') return df store = Arctic('localhost') if 'KRX_G' not in store.list_libraries(): store.initialize_library('KRX_G') lib = store['KRX_G'] krx = pd.read_csv('krx_market_symbols.csv', dtype=object) stocks = [x for x in krx['code_google']] print(len(stocks), " symbols") begin = datetime.date(2000,1,1) end = datetime.date(2016,5,30) missing = ['KRX:152550'] #for i, stock in enumerate(stocks[160:769]): for i, stock in enumerate(missing): print("%d th code=%s" % (i, stock)) now = datetime.datetime.now() df = download_daily_bars(stock,begin,end)
from arctic import Arctic # Connect to Local MONGODB store = Arctic('localhost') # Create the library - defaults to VersionStore store.initialize_library('NASDAQ') # Access the library library = store['NASDAQ'] # Load some data - maybe from Quandl import quandl quandl.ApiConfig.api_key = "Cxzxjy2eHKXgwMjVFhbH" aapl = quandl.get("GOOG/NASDAQ_AAPL") # Store the data in the library library.write('AAPL', aapl, metadata={'source': 'Quandl'}) # Reading the data item = library.read('AAPL') aapl = item.data metadata = item.metadata
from datetime import datetime as dt import pandas as pd from arctic import Arctic # Connect to the mongo-host / cluster store = Arctic(mongo_host) # Data is grouped into 'libraries'. # Users may have one or more named libraries: store.list_libraries() # Create a library store.initialize_library('username.scratch') # Get a library # library = m['username.<library>'] library = store['username.scratch'] # Store some data in the library df = pd.DataFrame({'prices': [1, 2, 3]}, [dt(2014, 1, 1), dt(2014, 1, 2), dt(2014, 1, 3)]) library.write('SYMBOL', df) # Read some data from the library # (Note the returned object has an associated version number and metadata.) library.read('SYMBOL') # Store some data into the library
class MongoDB(object): # to start server - cd to "C:\Program Files\MongoDB\Server\4.0\bin" # then use mongod --dbpath "path to db" when starting database server to point to the correct save location # use dataframe.to_dict('records') to convert to dicts to insert to DB. The DB is stored in mongoDB software # use Arctic from AHL to store time series data. It uses multiIndex pd.Dataframe to store, and other data goes # into meta data as a dict. It will open its own server, so no need to run # https://www.mongodb.com/json-and-bson # https://stackoverflow.com/questions/20796714/how-do-i-start-mongo-db-from-windows # To back up database, navigate to bin folder of mongodb, then run "mongodump --out ..." where ... is the output # folder to dump # To load into another base, navigate to bin folder again, and use "mongorestore --drop ..." where ... is the # folder where database backup folders where dumped # MUST RUN CMD AS ADMINISTRATOR!!!!!!! def __init__(self): self.conn = Arctic("127.0.0.1") def verify_library(self, library): """ Method to test if library exists in database""" libs = self.conn.list_libraries() if library in libs: return # if database doesnt exist, alert and ask if we should create else: print('"%s" library does not exist in server data path' % library) create = input('create "%s"? (y, n)' % library) if create.lower() == 'y': self.conn.initialize_library(library) else: return @staticmethod def getMultiIndex(data, index, columns=None): """ Method to convert dataframe to MultiIndex data: pd.DataFrame data to be MultiInexed index: list list of strings containg names of columns to be used as MultiIndex (in order left to right) columns: """ data = data.set_index(index) return data @timeMe def save(self, data, library, meta_data={}, append=True): """ Method to save to mongodb library using arctic database. Parameters ---------- data: pd.DataFrame (MultiIndex) DataFrame containing data for database in MultiIndex structure. Outer index must be the index used for the arctic "symbol" in the "library" library: str which library to write to meta_data: dict Dictionary of meta data values to inlcude in save. If not provided will maintain existing metadata in database or set to None. Keys match "symbol" from data outer index. append: bool True will append data to existing data for each "symbol" in database False will replace data entirely """ self.verify_library(library) if isinstance(data, pd.DataFrame): # get unique symbols to write indicies = list(data.index.unique()) # take first index as "symbol" for database symbols = indicies if isinstance(indicies[0], str) else np.unique([x[0] for x in indicies]) symbol_list = self.conn[library].list_symbols() # fill in blank metadata for symbols not in metadata noMeta = np.array(symbols)[~np.in1d(symbols, list(meta_data.keys()))] meta_data.update({x: None for x in noMeta}) for sym in symbols: data_cut = data.loc[sym] data_cut = pd.DataFrame(data_cut).T if isinstance(data_cut, pd.Series) else data_cut if sym in symbol_list: # get current data for symbol db = self.conn[library].read(sym) db_data = db.data # if meta data is present in database but is None in metadata fed in then take from database meta_data[sym] = db.metadata # if the symbol is already in the database and we want to "update" the series rather than simply replace if sym in symbol_list and append: data_post = pd.concat([db_data, data_cut], axis=0) # remove duplicates and keep latest data if len(data_post.index.unique()) == 1: data_post = data_post.drop_duplicates(keep='last') else: data_post = data_post.groupby(data_post.index).last() else: data_post = data_cut # sort the data by the indicies data_post = data_post.sort_index() self.conn[library].write(sym, data_post, metadata=meta_data[sym]) @timeMe def read(self, library, symbols=None): """ Method to read from mongodb library using arctic database. Parameters ---------- library: str which library to write to symbols: list of "symbols" in "library" to read """ if symbols is None: symbols = self.conn[library].list_symbols() data = {} for sym in symbols: db = self.conn[library].read(sym) data[sym] = {} data[sym]['data'] = db.data if db.metadata is not None: data[sym].update(db.metadata) return data