def __init__(self, directory): super().__init__() import pystore self.directory = directory pystore.set_path(self.directory) self.ohlcv_store = pystore.store("OHLCV") self.fd_store = pystore.store("FD") self.ohlcv_eod_collection = self.ohlcv_store.collection("EOD") self.fd_q_collection = self.fd_store.collection("Q") self.available_symbols = self.list_symbols()
def store_in_pystore(self): self.store = pystore.store(self.store_name) self.collection = self.store.collection(self.collection_name) self.collection.write(self.item_name, self.df, metadata={'source': 'tsetmc'}, overwrite=True)
def __init__(self, name: str, path: str): """Create a PystoreConnector object that points to a Pystore. Parameters ---------- name : str name of the store path : str path to the pystore directory """ try: import pystore except ModuleNotFoundError as e: print("Install pystore, follow instructions at " "https://github.com/ranaroussi/pystore#dependencies") raise e self.name = name self.path = path pystore.set_path(self.path) self.store = pystore.store(self.name) self.libs: dict = {} self._initialize() self.models = ModelAccessor(self) # for older versions of PastaStore, if oseries_models library is empty # populate oseries - models database self._update_all_oseries_model_links()
def delete_pystore(path: str, name: str, libraries: Optional[List[str]] = None) -> None: """Delete libraries from pystore. Parameters ---------- path : str path to pystore name : str name of the pystore libraries : Optional[List[str]], optional list of library names to delete, by default None which deletes all libraries """ try: import pystore except ModuleNotFoundError as e: print("Please install `pystore`!") raise e print(f"Deleting pystore: '{name}' ...", end="") pystore.set_path(path) if libraries is None: pystore.delete_store(name) print(" Done!") else: store = pystore.store(name) for lib in libraries: print() store.delete_collection(lib) print(f" - deleted: {lib}")
def read_data(symbol, interval): pystore.set_path('./pystore') store = pystore.store('binance') collection = store.collection(symbol) data = collection.item(interval) return data.to_pandas()
def run(): #data = get_data("BTCEUR", Client.KLINE_INTERVAL_1DAY, "01-01-2020", "05-10-2020") symbols = ["BTCEUR"] periods = [ Client.KLINE_INTERVAL_1DAY, Client.KLINE_INTERVAL_1HOUR, Client.KLINE_INTERVAL_15MINUTE ] years = [2018, 2020] pystore.set_path('./pystore') store = pystore.store('binance') #print(read_data("BTCUSDT", Client.KLINE_INTERVAL_15MINUTE)) for symbol in symbols: collection = store.collection(symbol) for period in periods: data = get_data(symbol, period, "01-01-{}".format(years[0]), "12-31-{}".format(years[1])) if not data.empty: #print(data) collection.write(period, data, overwrite=True) print("saved {} {}".format(symbol, period))
def test_obs_from_pystore_item(): import pystore pystore.set_path("./tests/data/2019-Pystore-test") store = pystore.store("test_pystore") coll = store.collection(store.collections[0]) item = coll.item(list(coll.list_items())[0]) o = obs.GroundwaterObs.from_pystore_item(item) return o
def __init__(self, user_settings, storeName='AInvesting'): # List stores # pystore.list_stores() pystore.set_path(getDatabasePath(user_settings)) # Connect to datastore (create it if not exist) self.store = pystore.store(storeName) self.cacheDict = {} # Cache pass
def __init__(self, start_date=None, end_date=None): self.nyse: tc.TradingCalendar = tc.get_calendar('NYSE') self.store = pystore.store('finance') today = pd.Timestamp(date.today(), tz='UTC') end_date = end_date and pd.Timestamp(end_date, tz='UTC') or ( today if self.nyse.is_session(today) else self.nyse.previous_close(today) ) start_date = start_date and pd.Timestamp(start_date, tz='UTC') or end_date self.start_date = start_date self.end_date = end_date
def update_store(filename='Data.csv', name_map=name_map, path='./db', date_format='%d/%m/%Y', overwrite=False): pystore.set_path(path) accounts_store = pystore.store('accounts') with open('accounts.txt') as accounts: account_map = dict() for account in accounts: name, number = account.split(' ') account_map[name] = int(number) # read in new data data = pd.read_csv(filename) # rename columns to match internal representation data.rename(columns=name_map, inplace=True) columns = name_map.values() # change data strings into efficient numerical format data.loc[:, DATE] = pd.to_datetime(data.loc[:, DATE], format=date_format) data = data.loc[:, [*columns]] # use only desired columns data.fillna(0, inplace=True) # fill any NaN/blank values with 0 # remove the debit column, if there is one if DEBIT in columns: # merge the debit column into the credit column and remove data.loc[:, CREDIT] -= data.loc[:, DEBIT] data.drop(columns=DEBIT, inplace=True) # finish processing and save data into separate accounts for name, number in account_map.items(): # add as a collection if not already present, assign for convenience collection = accounts_store.collection(name) if not overwrite: if 'transactions' in collection.list_items(): write = collection.append else: write = collection.write else: write = lambda *args, **kw : collection.write(*args, **kw, overwrite=overwrite) if 'transactions' in collection.list_items(): metadata = collection.item('transactions').metadata else: metadata = dict() metadata.update(number=number) # can we somehow mark the index as pre-sorted?? write('transactions', data[data[ACCOUNT_NO] == account_map[name]] .drop(columns=ACCOUNT_NO) .set_index(DATE), metadata=metadata, ) return accounts_store
def get_pystore(data_dir, store_label='iex_data_store'): """ get pystore example: store = get_pystore(data_dir) :param data_dir: :param store_label: :return: """ pystore.set_path(data_dir.as_posix()) store = pystore.store(store_label) return store
def __init__(self, name, root, config, subreddit): self.name = name self.root = root self.config = config self.subreddit = subreddit # datastore db.set_path(os.path.join(self.root, 'data', 'store')) self.datastore = db.store(self.subreddit, 'fastparquet') # collection self.collection = self.datastore.collection(self.name) self.path = os.path.join(self.datastore.datastore, self.collection.collection)
def restore_from_pystore(self, indexs): self.store = pystore.store(self.store_name) self.collection = self.store.collection(self.collection_name) if self.item_name in self.collection.list_items(): self.item = self.collection.item(self.item_name) if self.item.data.shape[1] > 0: self.df = self.item.to_pandas() self.df.set_index(indexs, inplace=True) else: self.df = pd.DataFrame([]) self.collection.write(self.item_name, self.df, metadata={'source': self.pystore_source}, overwrite=True) else: self.df = pd.DataFrame([]) self.collection.write(self.item_name, self.df, metadata={'source': self.pystore_source}, overwrite=True)
def to_pystore(self, store_name, pystore_path, groupby, item_name=None, overwrite=False): """Write timeseries and metadata to Pystore format. Series are grouped by 'groupby'. Each group is a Collection, each series within that group an Item. Parameters ---------- store_name : str name of the store pystore_path : str path where store should be saved groupby : str column name to groupby, (for example, group by location, which would create a collection per location, and write all timeseries at that location as Items into that Collection). item_name : str, optional name of column to use as item name, default is None, Item then takes name from obs.name overwrite : bool, optional if True overwrite current data in store, by default False """ import pystore pystore.set_path(pystore_path) store = pystore.store(store_name) for name, group in self.groupby(by=groupby): # Access a collection (create it if not exist) collection = store.collection(name, overwrite=overwrite) for i, o in enumerate(group.obs): imeta = o.meta.copy() if 'datastore' in imeta.keys(): imeta['datastore'] = str(imeta['datastore']) # add extra columns to item metadata for icol in group.columns: if icol != "obs" and icol != 'meta': # check if type is numpy integer # numpy integers are not json serializable if isinstance(group.iloc[i].loc[icol], np.integer): imeta[icol] = int(group.iloc[i].loc[icol]) else: imeta[icol] = group.iloc[i].loc[icol] if item_name is None: name = o.name else: name = o.meta[item_name] collection.write(name, o, metadata=imeta, overwrite=overwrite)
def write_data_to_pystore( *, config: Box, data_frames: List[pd.DataFrame], name: str, metadata: Optional[Dict] = None, ) -> None: """Create or update the pandas.DataFrames as Pystore collection.items. The DataFrames must contain time series data with the index of type datetime64. The lowest index (min(index)) will be converted as YYYY-MM string and set as the item name. Each dataframe must only contain data of one day! This function doesn't check max(df.index). Note, PyStore will make sure there is a unique index: ~/.../site-packages/pystore/collection.py in append(self, item, data, npartitions, epochdate, threaded, reload_items, **kwargs) 183 # combined = current.data.append(new) 184 combined = dd.concat([current.data, new]).drop_duplicates(keep="last") PyStore: https://medium.com/@aroussi/fast-data-store-for-pandas-time-series-data-using-pystore-89d9caeef4e2 """ if metadata is None: metadata = {} if not data_frames: log.debug(f"Did not receive any data for {name}.") return store = pystore.store("discovergy") collection = store.collection(name) item_names = collection.list_items() for df in data_frames: if not len(df): log.debug(f"Did not find any data in {df}. Skipping...") continue first_ts = min(df.index) item_name = f"{first_ts.year}-{first_ts.month:02d}" if item_name in item_names: # FIXME (a8): Create one partition per day. There must be a better way. Issue is that # pandas loads the full pd.DataFrame into memory. That requires memory. npartitions = first_ts.day log.debug(f"Appended to {item_name} {first_ts}.") collection.append(item_name, df, npartitions=npartitions) else: log.debug("Created new Dask DF.") collection.write(item_name, df, metadata=metadata, overwrite=False)
def read_store_metadata(store, items='all', verbose=False): """read only metadata from pystore Parameters ---------- store : pystore.store store containing data items : str, list of str, optional if 'all' read all items if 'first' read first item only if list of str, read those items verbose : bool, optional if True, print progress info Returns ------- list : list of dictionaries list of dictionaries containing metadata """ store = pystore.store(store) meta_list = [] for coll in store.collections: c = store.collection(coll) if items == "all": item_list = c.list_items() elif items == 'first': item_list = list(c.list_items())[0:1] else: item_list = items for i in item_list: metadata = pystore.utils.read_metadata(c._item_path(i)) if metadata is None: if verbose: print("Cannot read metadata for {0}/{1}".format(coll, i)) metadata = dict() metadata['item_name'] = "" else: metadata['item_name'] = i metadata['collection_name'] = coll meta_list.append(metadata) return meta_list
def store_to_obslist(store, ObsClass=GroundwaterObs, collection_names=None, item_names=None, nameby="item", verbose=True, progressbar=False): """convert pystore to list of ObsClass. Parameters ---------- store : pystore.store pystore store containing data ObsClass : type of Obs type of observation DataFrames, by default GroundwaterObs item_names : list of str item (Observation) names that will be extracted from the store the other items (Observations) will be ignored. if None all items are read. nameby : str pick whether obs are named by collection or item name Returns ------- list : list of obs list of ObsClass DataFrames """ store = pystore.store(store) obs_list = [] if collection_names is None: collections = store.collections else: collections = collection_names for coll in (tqdm(collections) if progressbar else collections): obs_list += collection_to_obslist(store, coll, ObsClass=ObsClass, item_names=item_names, nameby=nameby, verbose=verbose) return obs_list
def __init__(self, name: str, path: str): """Create a PystoreConnector object that points to a Pystore. Parameters ---------- name : str name of the store path : str path to the pystore directory """ try: import pystore except ModuleNotFoundError as e: print("Install pystore, follow instructions at " "https://github.com/ranaroussi/pystore#dependencies") raise e self.name = name self.path = path pystore.set_path(self.path) self.store = pystore.store(self.name) self.libs: dict = {} self._initialize()
def delete_pystore_connector(conn=None, path: Optional[str] = None, name: Optional[str] = None, libraries: Optional[List[str]] = None) -> None: """Delete libraries from pystore. Parameters ---------- conn : PystoreConnector, optional PystoreConnector object path : str, optional path to pystore name : str, optional name of the pystore libraries : Optional[List[str]], optional list of library names to delete, by default None which deletes all libraries """ import pystore if conn is not None: name = conn.name path = conn.path elif name is None or path is None: raise ValueError("Please provide 'name' and 'path' OR 'conn'!") print(f"Deleting PystoreConnector database: '{name}' ...", end="") pystore.set_path(path) if libraries is None: pystore.delete_store(name) print(" Done!") else: store = pystore.store(name) for lib in libraries: print() store.delete_collection(lib) print(f" - deleted: {lib}")
def store_in_pystore(self): self.store = pystore.store(self.store_name) self.collection = self.store.collection(self.collection_name) self.collection.write(self.item_name, self.df.reset_index(), metadata={'source': self.pystore_source}, overwrite=True)
""" Keeping low-usage historic time-series data in parquet form on my laptop (via pystore) to avoid paying cloud hosting fees - https://pypi.org/project/PyStore/ Keeping high-usage daily data in MongoDB Atlas All exposed using an unified Flask API (likely GraphQL via Graphene) - https://github.com/graphql-python/graphene-mongo/blob/master/docs/tutorial.rst """ import pystore import pandas as pd pystore.set_path('../data/historic/') instruments = pystore.store('instruments') stocks = instruments.collection('stocks') options = instruments.collection('options') stocks.delete_item("AAPL") #stocks.write('AAPL', aapl[:-1], metadata={'source': 'Quandl'}) #stocks.append('AAPL', aapl[2:3], npartitions=stocks.item("AAPL").data.npartitions) """ use snapshots to protect data - e.g. stocks.create_snapshot('snapshot_name') snap_df = stocks.item('AAPL', snapshot='snapshot_name') collection.write('AAPL', snap_df.to_pandas(), metadata={'source': 'Quandl'}, overwrite=True)
else: value = 0 cost = 0 for stock in self._stocks.values(): value += stock.get_value(stored_balance) cost += stock.get_cost(brokerage) return value / cost - 1 def __str__(self): ''' ''' sep = '\n ' return 'Stocks{}{sep}profit={} ({:.2f}%){sep}Stocks:\n{sep}'.format( super().__str__(), self.get_profit(), 100*self.get_profit(relative=True), sep=sep) +\ '\n\n{sep}'.join('{}{}{!s}'.format(symbol, sep, stock) for \ symbol, stock in self._stocks.items()) \ .format(sep=sep) if __name__ == '__main__': pystore.set_path('./db') store = pystore.store('accounts') savings = Account(store, 'savings') with open('API_KEY.txt') as magical_key: apikey = magical_key.readline() stocks = StocksAccount(store, 'stocks', apikey=apikey) print(savings) print() print(stocks)
from functools import lru_cache from collections import OrderedDict import numpy as np import pandas as pd import pandas_datareader import pandas_datareader.data as web import pystore import unidecode import requests_cache logging.getLogger(__name__).addHandler(logging.NullHandler()) storage_path = os.path.expanduser("~/.prcc") pystore.set_path(storage_path) collection = pystore.store("data").collection("all") requests_cache.core.install_cache(os.path.join(storage_path, "cache"), "sqlite", expire_after=86400) _last_api_call = 0.0 _b3_indices = { # Índices Amplos "ibovespa": "IBOV", "ibrx100": "IBRX", "ibrx50": "IBXL", "ibra": "IBRA", # Índices de Governança "igcx": "IGCX", "itag": "ITAG",
import quandl import pystore import time p = "./share/pystore" pystore.set_path(p) store = pystore.store('quandl_datastore') it = 'CHRIS' # Access a collection (create it if not exist) collection = store.collection(it) # List all collections in the datastore print(store.list_collections()) # returns ['NASDAQ.EOD'] jt1 = 'CME_0D1' jt2 = 'CME_0D2' #qd = quandl.get(it + '/' + jt, authtoken='TEbsCbsPnjdCqQCWJzCX') # Store the data in the collection under AAPL #collection.write(jt, qd[:-1], metadata={'source': 'Quandl'}) print(collection.list_items()) import time start = time.time()
def __init__(self): self.lock = threading.Lock() pystore.set_path("./db/pystore") self.store = pystore.store("cellarstone_db")
"--expiration", default=None, type=str, help="Expiration in YYYYMMDD format. If none is provided, " "the system computes front expiration after next earnings") args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) # Set storage path pystore.set_path(args.storage_path) store = pystore.store("ib") wrapper = RequestWrapper() wrapper.start_app(args.host, args.port, args.clientid) bar_size_str = args.bar_size.replace("mins", "m").replace(" ", "") duration = f"{args.duration[:-1]} {args.duration[-1]}" today = datetime.datetime.today() if args.expiration: expiration_dt = datetime.datetime.strptime(args.expiration, "%Y%m%d") if expiration_dt < today: query_time_str = expiration_dt.strftime("%Y%m%d %H:%M:%S") else: query_time_str = today.strftime("%Y%m%d %H:%M:%S")
import pystore import pandas as pd import numpy as np # my_df = pd.DataFrame(["AAPL", "GOOG"], columns=["Symbol"]) my_df = pd.DataFrame([["abcd", "abcd", "abcd", "abcd"]], columns=["W", "X", "Y", "Z"]) my_df.index = range(1, len(my_df) + 1) print(my_df) pystore.set_path("./db/pystore") store = pystore.store("mydatastore") collection = store.collection("test") collection.write("AAA", my_df, overwrite=True) item = collection.item("AAA") print(item.to_pandas()) # print(collection.item("AAA").to_pandas()) # my_new_df = pd.DataFrame(["UBER"], columns=["Symbol"]) my_new_df = pd.DataFrame([["xyz", "xyz", "xyz", "xyz"]], columns=["W", "X", "Y", "Z"]) my_new_df.index = range(2, len(my_df) + 2) print(my_new_df) item = collection.item("AAA") collection.append("AAA", my_new_df, npartitions=item.data.npartitions) item = collection.item("AAA")
"""Test.""" import pandas as pd import pystore import yfinance as yf from yahoo_fin.stock_info import tickers_dow from typing import Tuple import itertools PYSTORE = pystore.store("timeseries") BOE_COLLECTION = PYSTORE.collection("BOE") MORTAGE_INSTRUMENTS_ITEM = "mortage_instruments" DEFAULT_SYMBOL = "AAPL" INCONSISTENT_SERIES = ["DOW"] def get_symbols_down_jones(): """Retrieves Dow Jones Symbols""" return tickers_dow() def load_dow_jones() -> Tuple[pd.DataFrame, pd.DataFrame]: """Load BOE Mortage Instruments from cache or the network.""" mi_item = BOE_COLLECTION.item(MORTAGE_INSTRUMENTS_ITEM) mi = pd.read_json(mi_item.metadata["instruments"]) return mi, mi_item.to_pandas def store_dow_jones(): """Store BOE load_mortage_instruments into cache.""" mortage_instruments = boe.mortage_instruments() series = boe.getFullSeries(mortage_instruments.SERIES)
default=60, choices=[60, 300, 900, 3600, 21600, 86400], help= 'granularity of candle data in seconds. choices: 1m, 5m, 15m, 6h, 24h') args = parser.parse_args() # use formatted datetime for filenames curr_datetime_str = dt.today().strftime('%Y-%m-%d-%H-%M-%S') # setup logging log_name = 'exchanges.cb' log_filename = 'logs/{:s}-{:s}'.format(args.market, curr_datetime_str) log = logger.setup(log_name, log_filename, logger.levels[args.loglevel]) # setup pystore for storing time series data ps_store = pystore.store('coinbase') ps_collection = ps_store.collection('candles.minute') ps_item = '{:s}'.format(args.market) ps_item_exists = ps_item in ps_collection.list_items() # track execution time to monitor avg request time exec_time = time.time() start_date = get_start_date(args.market) dates = _gen_date_frames(start_date) for count, (start, end) in enumerate(dates): # CB API limited to 3 reqs/sec but it's not accurate at all time.sleep(0.2)
def delete_pystore_item(self): self.store = pystore.store(self.store_name) self.collection = self.store.collection(self.collection_name) self.collection.delete_item(self.item_name)