def test_query_edges_on_multiple_years(symbol, isvariablelength, timeframe): client.destroy(tbk=f"{symbol}/{timeframe}/TICK") # original bug fixed by https://github.com/alpacahq/marketstore/pull/249 data = np.array( [ (pd.Timestamp("2017-01-01 00:00").value / 10**9, 10.0), (pd.Timestamp("2018-01-01 00:00").value / 10**9, 11.0), ], dtype=[("Epoch", "i8"), ("Ask", "f4")], ) client.write(data, f"{symbol}/{timeframe}/TICK", isvariablelength=isvariablelength) params = pymkts.Params(symbol, timeframe, "TICK", limit=1, limit_from_start=True) d_start = client.query(params).first().df() params = pymkts.Params(symbol, timeframe, "TICK", limit=1, limit_from_start=False) d_end = client.query(params).first().df() assert len(d_start) == 1 assert len(d_end) == 1 assert datetime(2017, 1, 1, 0, 0, 0, tzinfo=timezone.utc) == d_start.index[0] assert datetime(2018, 1, 1, 0, 0, 0, tzinfo=timezone.utc) == d_end.index[0]
def test_query_edges(symbol, isvariablelength, timeframe): client.destroy(tbk=f"{symbol}/{timeframe}/TICK") data = np.array( [ (pd.Timestamp("2017-01-01 00:00").value / 10**9, 10.0), (pd.Timestamp("2017-02-01 00:00").value / 10**9, 11.0), (pd.Timestamp("2017-03-01 00:00").value / 10**9, 12.0), ], dtype=[("Epoch", "i8"), ("Ask", "f4")], ) client.write(data, f"{symbol}/{timeframe}/TICK", isvariablelength=isvariablelength) params = pymkts.Params(symbol, timeframe, "TICK", limit=1, limit_from_start=True) d_start = client.query(params).first().df() params = pymkts.Params(symbol, timeframe, "TICK", limit=1, limit_from_start=False) d_end = client.query(params).first().df() assert len(d_start) == 1 assert len(d_end) == 1 assert datetime(2017, 1, 1, 0, 0, 0, tzinfo=timezone.utc) == d_start.index[0] assert datetime(2017, 3, 1, 0, 0, 0, tzinfo=timezone.utc) == d_end.index[0]
def test_grpc_query_all_symbols(): # --- init --- symbol = "TEST" symbol2 = "TEST2" symbol3 = "TEST3" timeframe = "1Sec" attribute = "OHLCV" tbk = "{}/{}/{}".format(symbol, timeframe, attribute) tbk2 = "{}/{}/{}".format(symbol2, timeframe, attribute) tbk3 = "{}/{}/{}".format(symbol3, timeframe, attribute) # --- write --- data = np.array([ (pd.Timestamp('2017-01-01 00:00:00').value / 10**9, 10.0, 20.0), ], dtype=[('Epoch', 'i8'), ('High', 'f4'), ('Low', 'f4')]) data2 = np.array([ (pd.Timestamp('2017-01-01 01:00:00').value / 10**9, 30.0, 40.0), ], dtype=[('Epoch', 'i8'), ('High', 'f4'), ('Low', 'f4')]) data3 = np.array([ (pd.Timestamp('2017-01-01 02:00:00').value / 10**9, 50.0, 60.0), ], dtype=[('Epoch', 'i8'), ('High', 'f4'), ('Low', 'f4')]) client.write(data, tbk) client.write(data2, tbk2) client.write(data3, tbk3) time.sleep(0.5) # --- query all symbols using * --- resp = client.query(pymkts.Params( "*", timeframe, attribute, limit=2, )) assert len(resp.keys( )) >= 3 # TEST, TEST2, TEST3, (and maybe some other test buckets) # --- query comma-separated symbol names --- print("{},{}/{}/{}".format(symbol, symbol2, timeframe, attribute)) resp = client.query( pymkts.Params( [symbol, symbol2], timeframe, attribute, limit=2, )) assert set(resp.keys()) == {tbk, tbk2} # --- tearDown --- client.destroy(tbk) client.destroy(tbk2) client.destroy(tbk3)
def get_value(client, query: str, column: str, start_dt: datetime, end_dt: datetime): symbol, timeframe, attribute = query.split("/") try: params = pymkts.Params(symbol, timeframe, attribute, limit=1, start=start_dt, end=end_dt) df = client.query(params).first().df() if df is None or df.empty: # there are no result return (0, ERROR_VALUE_OF_LATENCY) value = df.tail(1).get(column) if value is None: logger.error("column %s does not exists", column) return (0, 0) latency = end_dt - df.index[-1] return (value, latency.total_seconds()) except ConnectionError as e: logger.error("connection error") except Exception as e: if is_symbol_does_not_exist_error(e): logger.error("symbol does not exists: %s", query) else: logger.error(e) return (0, 0)
def build_test(in_df, symbol, timeframe, start, end): param = pymkts.Params([symbol], timeframe, "TICK", start=start, end=end) out_df = client.query(param).first().df() processed_out_df = process_query_result(out_df, inplace=False) assert not out_df.empty try: pd.testing.assert_frame_equal(in_df, processed_out_df) except AssertionError: df1 = in_df df2 = processed_out_df if len(df1) != len(df2): print("lengths do not match, inspect manually") raise bad_locations = df1.index != df2.index dilated_bad_locations = np.convolve( bad_locations.astype(int), [1, 1, 1], mode="same" ).astype(bool) # print("Show dilated bad locations".center(40, "-")) # print("\ninput df") # display(df1.loc[dilated_bad_locations, :]) # print("\noutput df, postprocessed") # display(df2.loc[dilated_bad_locations, :]) # print("\noutput df, raw") # display(out_df.loc[dilated_bad_locations, :]) raise
def test_nanosec_range(symbol, data, start, end, limit, limit_from_start, response): # ---- given ---- print( client.write(np.array(data, dtype=DATA_TYPE_NANOSEC), "{}/1Sec/TICK".format(symbol), isvariablelength=True)) # ---- when ---- reply = client.query( pymkts.Params( symbol, '1Sec', 'TICK', start=start, end=end, limit=limit, limit_from_start=limit_from_start, )) client.destroy("{}/1Sec/TICK".format(symbol)) # ---- then ---- ret_df = reply.first().df() assert (response == ret_df.values).all()
def test_tickcandler(aggfunc, limit, limit_from_start, exp_open, exp_high, exp_low, exp_close): # ---- given ---- tbk = "{}/1Sec/TICK".format(symbol2) client.destroy(tbk) # setup client.write(np.array(data2, dtype=data_type2), tbk, isvariablelength=False) # ---- when ---- agg_reply = client.query( pymkts.Params(symbol2, '1Sec', 'TICK', limit=limit, limit_from_start=limit_from_start, functions=aggfunc)) # ---- then ---- ret = agg_reply.first().df() assert ret["Open"][0] == exp_open assert ret["High"][0] == exp_high assert ret["Low"][0] == exp_low assert ret["Close"][0] == exp_close
def test_ticks_simple_cases(symbol, data, index, nanoseconds): client.destroy(tbk=f"{symbol}/1Sec/TICK") in_df = utils.build_dataframe( data, pd.to_datetime(index, format="%Y-%m-%d %H:%M:%S").tz_localize("utc"), nanoseconds=nanoseconds, ) ret = write(in_df, symbol, extract_nanoseconds=nanoseconds is not None) print("Msg ret: {}".format(ret)) param = pymkts.Params([symbol], "1Sec", "TICK") out_df = client.query(param).first().df() processed_out_df = utils.process_query_result(out_df, inplace=False) print("\ninput df") print(in_df) print("\noutput df, postprocessed") print(processed_out_df) print("\noutput df, raw") print(out_df) pd.testing.assert_frame_equal(in_df, processed_out_df)
def get(self, exchange): args = parser.parse_args() # exchange_symbol = 'binance-BTCUSDT' # args['interval'] = '1Min' datatype = 'OHLCV' cli = pymkts.Client('http://206.189.216.139:5993/rpc') # query pymkts allsymbols = cli.list_symbols() filtered_symbols = list(filter(lambda i: exchange in i, allsymbols)) # args['exchange_symbol'] param = pymkts.Params(allsymbols, args['interval'], datatype, limit=100) reply = cli.query(param) data = reply.first().df() # close = data['Close'] # rsi = talib.RSI(close, timeperiod=12) # {'data' : data[0:100000].to_json(), 'rsi': rsi.to_json()[0:10000]} # return data.to_csv(sep='\t'), 201, {'Access-Control-Allow-Origin': '*'} return filtered_symbols, 201, {'Access-Control-Allow-Origin': '*'}
def test_write(): # write -> query -> destroy # --- init --- destroy("TEST/1Min/Tick") # --- write --- data = np.array( [(pd.Timestamp('2017-01-01 00:00').value / 10**9, 10.0, 20.0)], dtype=[('Epoch', 'i8'), ('High', 'f4'), ('Low', 'f4')]) master_client.write(data, 'TEST/1Min/OHLCV') # --- wait until replication is done --- time.sleep(0.1) # --- query --- resp = replica_client.query(pymkts.Params('TEST', '1Min', 'OHLCV')) assert (resp.first().df().values == [10.0, 20.0]).all() # --- list_symbols --- symbols = replica_client.list_symbols() assert "TEST" in symbols # --- destroy --- destroy("TEST/1Min/OHLCV")
def test_corporate_actions(): params = pymkts.Params('AAPL', '1D', 'ACTIONS') ca = client.query(params).first().df() assert len(ca) == 1 assert ca.TextNumber[0] == 2103357 assert ca.Rate[0] == 4.0
def test_write_unsorted_ticks_returns_sorted_ticks(symbol, data, index, nanoseconds): client.destroy(tbk=f"{symbol}/1Sec/TICK") in_df = utils.build_dataframe( data, pd.to_datetime(index, format="%Y-%m-%d %H:%M:%S").tz_localize("utc"), nanoseconds=nanoseconds, ) ret = write(in_df, symbol, extract_nanoseconds=nanoseconds is not None) print("Msg ret: {}".format(ret)) param = pymkts.Params([symbol], "1Sec", "TICK") out_df = client.query(param).first().df() processed_out_df = utils.process_query_result(out_df, inplace=False) print("\ninput df") print(in_df) print("\noutput df, postprocessed") print(processed_out_df) print("\noutput df, raw") print(out_df) pd.testing.assert_frame_equal( in_df.sort_index(kind="merge"), processed_out_df, # check_less_precise=True, # commented out as the warning shown: # FutureWarning: The 'check_less_precise' keyword in testing.assert_*_equal is deprecated and will be removed # in a future version. You can stop passing 'check_less_precise' to silence this warning. check_less_precise=True, )
def test_no_data_available(symbol, timeframe, data, index, nanoseconds, start, end): start = pd.Timestamp(start, tz="utc") end = pd.Timestamp(end, tz="utc") in_df = utils.build_dataframe( data, pd.to_datetime(index, format="%Y-%m-%d %H:%M:%S").tz_localize("utc"), columns=["Bid", "Ask"], nanoseconds=nanoseconds, ) records = utils.to_records(in_df, extract_nanoseconds=nanoseconds is not None) tbk = f"{symbol}/{timeframe}/TICK" # ---- given ---- ret = client.write(records, tbk, isvariablelength=True) print("Msg ret: {}".format(ret)) param = pymkts.Params([symbol], timeframe, "TICK", start=start, end=end) # ---- when ---- ret = client.query(param) out_df = ret.first().df() assert out_df.empty
def test_tick(): # --- init --- symbol = "TEST" timeframe = "1Sec" attribute = "Tick" destroy("{}/{}/{}".format(symbol, timeframe, attribute)) # --- write --- data = np.array( [ (pd.Timestamp('2017-01-01 00:00:00').value / 10 ** 9, 10.0, 20.0), (pd.Timestamp('2017-01-01 00:00:00').value / 10 ** 9, 30.0, 40.0), ], dtype=[('Epoch', 'i8'), ('Ask', 'f4'), ('Bid', 'f4')] ) master_client.write(data, "{}/{}/{}".format(symbol, timeframe, attribute), isvariablelength=True) time.sleep(0.1) # --- query --- resp = replica_client.query(pymkts.Params(symbol, timeframe, attribute)) assert (resp.first().df().loc[:, ['Ask', 'Bid']].values == [[10.0, 20.0], [30.0, 40.0]]).all() # --- tearDown --- destroy("{}/{}/{}".format(symbol, timeframe, attribute))
def test_integrity_ticks(db, symbol, with_nanoseconds): # ---- given ---- data = db[symbol] records = convert(data, with_nanoseconds=with_nanoseconds) tbk = get_tbk(symbol, TIMEFRAME, ATTRGROUP) # ---- when ---- ret = client.write(records, tbk, isvariablelength=True) print("Msg ret: {}".format(ret)) assert symbol in list(db.keys()) param = pymkts.Params([symbol], TIMEFRAME, ATTRGROUP, ) ret_df = client.query(param).first().df() # delete Nanoseconds column because it has some error and we can't assert # assert (data == ret_df).all().all() # fails if assert here! data = data.drop(columns="Nanoseconds", errors="ignore") ret_df = ret_df.drop(columns="Nanoseconds", errors="ignore") # ---- then ---- assert (data == ret_df).all().all()
def test_query_edges_on_overlapping_years(symbol, isvariablelength, timeframe): # original bug fixed by https://github.com/alpacahq/marketstore/pull/249 client.destroy(tbk=f"{symbol}/{timeframe}/TICK") data = np.array( [ (pd.Timestamp("2017-01-01 00:00").value / 10**9, 10.0), (pd.Timestamp("2018-01-01 00:00").value / 10**9, 11.0), ], dtype=[("Epoch", "i8"), ("Ask", "f4")], ) client.write(data, f"{symbol}/{timeframe}/TICK", isvariablelength=isvariablelength) params = pymkts.Params( symbol, timeframe, "TICK", start=pd.Timestamp("2017-01-01 00:00"), end=pd.Timestamp("2018-01-02 00:00"), ) d_all = client.query(params).first().df() print(d_all) assert d_all.shape[0] == 2 assert datetime(2017, 1, 1, 0, 0, 0, tzinfo=timezone.utc) == d_all.index[0] assert datetime(2018, 1, 1, 0, 0, 0, tzinfo=timezone.utc) == d_all.index[-1]
def query( self, symbol, tbk: Tuple[str, str] = _tick_tbk_ids, ) -> pd.DataFrame: # XXX: causes crash # client.query(pymkts.Params(symbol, '*', 'OHCLV' result = self._client.query(pymkts.Params(symbol, *tbk), ) return result.first().df()
def test_symbol_does_not_exist(symbol, timeframe, start, end): start = pd.Timestamp(start, tz="utc") end = pd.Timestamp(end, tz="utc") param = pymkts.Params([symbol], "1Sec", "TICK", start=start, end=end) with pytest.raises(Exception) as excinfo: # resp = {'error': {'code': -32000, 'data': None, 'message': 'no files returned from query parse'}, 'id': '1', 'jsonrpc': '2.0'} # noqa # pymarketstore/jsonrpc.py:48: Exception client.query(param) assert "no files returned from query parse" in str(excinfo.value)
def test_query(stub): # --- given --- c = pymkts.GRPCClient() p = pymkts.Params('BTC', '1Min', 'OHLCV') # --- when --- c.query(p) # --- then --- assert c.stub.Query.called == 1
def test_driver(): data = np.array([(pd.Timestamp('2017-01-01 00:00').value / 10**9, 10.0)], dtype=[('Epoch', 'i8'), ('Ask', 'f4')]) cli = pymkts.Client() cli.write(data, 'TEST/1Min/Tick') d2 = cli.query(pymkts.Params('TEST', '1Min', 'Tick')).first().df() print("Length of result: ",d2.shape[0]) assert d2.shape[0] == 1 assert datetime(2017,1,1,0,0,0,tzinfo=timezone.utc).timestamp() == d2.index[0].timestamp()
def get_data(self, symbol, timeframe, limits) -> pd.DataFrame: limit = None if all(isinstance(x, int) for x in limits): limit = max(*limits) if len(limits) > 1 else limits[0] return self.client.query( pymkts.Params( symbol, timeframe, attrgroup='OHLCV', limit=limit, end=pd.Timestamp(self.day.date(), tz='America/New_York'))).first().df()
def test_write_unsorted_random_data(symbol, size, start, window): client.destroy(tbk=f"{symbol}/1Sec/TICK") window = pd.Timedelta(window) start = pd.Timestamp(start, tz="utc") end = start + window np.random.seed(42) random.seed(42) # because we expect the some leakage within 1 second due to the nanoseconds field, # we add some margin to data around (not exactly super close to the central data) pre_in_df = utils.generate_dataframe( size, start - window, start - pd.Timedelta("1s"), random_data=True, sort_index=False, ) in_df = utils.generate_dataframe(size, start, end, random_data=True, sort_index=False) post_in_df = utils.generate_dataframe(size, end + pd.Timedelta("1s"), end + window, random_data=True, sort_index=False) write(pre_in_df, symbol, extract_nanoseconds=True) write(in_df, symbol, extract_nanoseconds=True) write(post_in_df, symbol, extract_nanoseconds=True) param = pymkts.Params([symbol], "1Sec", "TICK", start=start, end=end) out_df = client.query(param).first().df() processed_out_df = utils.process_query_result(out_df, inplace=False) print("\ninput df") print(in_df) print("\noutput df, postprocessed") print(processed_out_df) print("\noutput df, raw") print(out_df) pd.testing.assert_frame_equal( in_df.sort_index(kind="merge"), processed_out_df, # check_less_precise=True, # commented out as the warning shown: # FutureWarning: The 'check_less_precise' keyword in testing.assert_*_equal is deprecated and will be removed # in a future version. You can stop passing 'check_less_precise' to silence this warning. check_less_precise=True, )
def assert_query_result(df, symbol, size, timeframe, start, end): start = pd.Timestamp(start).tz_convert("utc") end = pd.Timestamp(end).tz_convert("utc") param = pymkts.Params([symbol], timeframe, "TICK", start=start, end=end) out_df = client.query(param).first().df() processed_out_df = utils.process_query_result(out_df, inplace=False) assert not out_df.empty assert size == len(out_df) assert out_df.index.is_monotonic_increasing try: if timeframe == "1Sec": pd.testing.assert_frame_equal(df, processed_out_df, check_less_precise=True) else: # remove all nanoseconds information for now because of the precision issue # on nanoseconds # BUG # though whe nwe look at the raw value of the nanoseconds returned, we can # see it has the same issue as for 1sec: it is set to 0 which is then # flipped to 99999999xx due to precision issue pd.testing.assert_frame_equal( rm_ns_from_idx(df), rm_ns_from_idx(processed_out_df), # check_less_precise=True, # commented out as the warning shown: # FutureWarning: The 'check_less_precise' keyword in testing.assert_*_equal is deprecated and will be removed # in a future version. You can stop passing 'check_less_precise' to silence this warning. check_less_precise=True, ) except AssertionError: if len(df) != len(out_df): print("lengths do not match, inspect manually") raise bad_locations = df.index != processed_out_df.index dilated_bad_locations = np.convolve( bad_locations.astype(int), [1, 1, 1], mode="same" ).astype(bool) print("Show dilated bad locations".center(40, "-")) print("\ninput df") print(df.loc[dilated_bad_locations, :]) print("\noutput df, postprocessed") print(processed_out_df.loc[dilated_bad_locations, :]) print("\noutput df, raw") print(out_df.loc[dilated_bad_locations, :]) raise
def test_1sec_tf_tick(symbol, data): # ---- given ---- client.write(np.array(data, dtype=DATA_TYPE_TICK), "{}/1Sec/TICK".format(symbol), isvariablelength=True) # ---- when ---- reply = client.query(pymkts.Params(symbol, '1Sec', 'TICK', limit=10)) # ---- then ---- data_without_epochs = [record[1:] for record in data] assert (reply.first().df().values == data_without_epochs).all()
def test_column_coerce(symbol, data_type, data, coerce_to): # ---- given ---- tbk = "{}/1Sec/TICK".format(symbol) client.destroy(tbk) # setup print(client.write(np.array(data, dtype=data_type), tbk, isvariablelength=False)) # ---- when ---- reply_before = client.query(pymkts.Params(symbol, '1Sec', 'TICK', limit=10)) # write the same data but with different column dataType so that coerce_column happens ret = client.write(np.array(data, dtype=coerce_to), tbk, isvariablelength=False) reply_after = client.query(pymkts.Params(symbol, '1Sec', 'TICK', limit=10)) # ---- then ---- # no error (when there is no write error, json-rpc client returns "{'responses': None}" and gRPC client returns "") assert str(ret) in ["{'responses': None}", ""] # no data is updated assert (reply_before.first().df().values == reply_after.first().df().values).all()
def test_1sec_tf_candle(symbol, data): # ---- given ---- print( client.write(np.array(data, dtype=DATA_TYPE_CANDLE), "{}/1Sec/OHLCV".format(symbol), isvariablelength=False)) # ---- when ---- reply = client.query(pymkts.Params(symbol, '1Sec', 'OHLCV', limit=10)) # ---- then ---- data_without_epochs = [record[1:] for record in data] assert (reply.first().df().values == data_without_epochs).all()
def test_to_query_request(): p = pymkts.Params('TSLA', '1Min', 'OHLCV', 1500000000, 4294967296) assert p.to_query_request() == { 'destination': 'TSLA/1Min/OHLCV', 'epoch_start': 1500000000, 'epoch_end': 4294967296, } p2 = pymkts.Params(symbols=['FORD', 'TSLA'], timeframe='5Min', attrgroup='OHLCV', start=1000000000, end=4294967296, limit=200, limit_from_start=False) assert p2.to_query_request() == { 'destination': 'FORD,TSLA/5Min/OHLCV', 'epoch_start': 1000000000, 'epoch_end': 4294967296, 'limit_record_count': 200, 'limit_from_start': False, }
def test_build_query(): # --- given --- c = pymkts.GRPCClient(endpoint="127.0.0.1:5995") p = pymkts.Params('TSLA', '1Min', 'OHLCV', 1500000000, 4294967296) # --- when --- query = c.build_query([p]) # --- then --- assert query == MultiQueryRequest(requests=[ QueryRequest(destination="TSLA/1Min/OHLCV", epoch_start=1500000000, epoch_end=4294967296) ])
def test_too_long_string(): # --- given --- tbk = "STR2/1D/TICK" client.destroy(tbk) recarray = np.array([(timestamp('2019-05-01 00:00:00'), "this_is_longer_than_16_characters")], dtype=dtype) # --- when --- rep = client.write(recarray=recarray, tbk=tbk, isvariablelength=False) symbol, timeframe, attribute_group = tbk.split("/") param = pymkts.Params(symbol, timeframe, attribute_group) reply = client.query(param) # --- then --- print(reply.first().df())
def test_build_query(): c = pymkts.Client("127.0.0.1:5994") p = pymkts.Params('TSLA', '1Min', 'OHLCV', 1500000000, 4294967296) p2 = pymkts.Params('FORD', '5Min', 'OHLCV', 1000000000, 4294967296) query_dict = c.build_query([p, p2]) test_query_dict = {} test_lst = [] param_dict1 = { 'destination': 'TSLA/1Min/OHLCV', 'epoch_start': 1500000000, 'epoch_end': 4294967296 } test_lst.append(param_dict1) param_dict2 = { 'destination': 'FORD/5Min/OHLCV', 'epoch_start': 1000000000, 'epoch_end': 4294967296 } test_lst.append(param_dict2) test_query_dict['requests'] = test_lst assert query_dict == test_query_dict query_dict = c.build_query(p) assert query_dict == {'requests': [param_dict1]}