def test_to_csv(self): """ Test to_csv. """ fn = "/tmp/test_racoon.csv" df = racoon.test_df() racoon.to_csv(df, fn) dg = pd.read_csv(fn) assert df.shape == dg.shape
def minute_arrow(): """ Maintain the minutely key files. """ logger.info("<keys_minutely>") for key in tsdb_keys: if key.endswith("_timestamp"): continue df = pd.read_parquet(f"/data/tsdb/minutely/{key}.parq") date_filter = datetime.utcnow().date() - timedelta(days=7 * 4) df = df.query(f"date > '{date_filter}'") df = df.dropna() racoon.to_csv(df, f"/data/adbcsv/{key}_minutely.csv") logger.info("</keys_minutely>")
def day_arrow(): """ External day level wrapper. """ logger.info("<day_arrow>") for key in keys: if "timestamp" in key: continue fn = f"/data/tsdb/daily/{key}.parq" if os.path.exists(fn): df = pd.read_parquet(fn) racoon.to_csv(df, f"/data/adbcsv/{key}_daily.csv") else: logger.warn(f"/data/tsdb/daily/{key}.parq files are missing.") logger.info("</day_arrow>")
def minute_arrow(): """ Maintain stats_minutely.csv. """ logger.info("<stats_minutely>") df = pd.read_parquet("/data/tsdb/minutely/blockchain_stats.parq") keep_stats = [ "date", "timestamp", "market_price_usd", "trade_volume_btc", "blocks_size", "hash_rate", "difficulty", "miners_revenue_btc", "n_blocks_total", "minutes_between_blocks" ] df = df[keep_stats] df["log_difficulty"] = df.difficulty.apply(math.log10) df["log_hash_rate"] = df.hash_rate.apply(math.log10) + 12.0 df["arrival_rate"] = (df.minutes_between_blocks * 60).astype(int) racoon.to_csv(df, "/data/adbcsv/stats_minutely.csv") logger.info("</stats_minutely>")
def gold_minute_arrow(): """ Maintain minutely spot aggregates for gold. """ global gold_spot_keys logger.info("<gold_spots_minutely>") gold_spot_keys = gold_spot_keys.copy() df = pd.read_parquet(f"/data/tsdb/minutely/{gold_spot_keys[0]}.parq") df["source"] = gold_spot_keys[0].split("_")[0] for spot_key in gold_spot_keys[1:]: df = pd.concat([df, pd.read_parquet(f"/data/tsdb/minutely/{spot_key}.parq")], ignore_index=True, sort=True) source = spot_key.split("_")[0] df.source = df.source.fillna(source) cutoff = datetime.datetime.utcnow().replace(second=0, microsecond=0) cutoff = cutoff - datetime.timedelta(hours=3*24) df = df[df.date >= cutoff] df = df.dropna() racoon.to_csv(df, "/data/adbcsv/spots_btcxau_minutely.csv") df = df.drop_duplicates(subset=["source"], keep="last") df = df.dropna() racoon.to_csv(df, "/data/adbcsv/spots_btcxau_minutely_tail.csv") logger.info("</gold_spots_minutely>")
def day_arrow(): """ Daily spot model. """ logger.info("<day_spot_model>") fn = "/data/tsdb/daily/spot_xaubtc.parq" df = pd.read_parquet(fn) df.columns = ["date", "spot"] df["time"] = range(len(df)) df["log1p_spot"] = df.spot.apply(np.log1p) aa, bb, cc, dd, ee = np.polyfit(df.time, df.log1p_spot, 4) labels = [f"spot_model_{x}" for x in range(6)] log_pads = [-0.80, -0.40, 0.0, 0.5, 1, 1.2] exp_pads = [0.662, 0.496, 0.248, -0.244, -1.044, -1.512] #log_pads = [-0.80, -0.40, 0.0, 0.5, 1.0, 1.1] #exp_pads = [0.66, 0.5, 0.25, -0.2, -1.0, -1.1] for label, lpad, epad in zip(labels, log_pads, exp_pads): log1p_spot_model = aa * df.time**4 + bb * df.time**3 + cc * df.time**2 + dd * df.time + ee log1p_spot_model += lpad spot_model = np.exp(log1p_spot_model) - 1 spot_model += epad df[f"{label}"] = spot_model df[f"{label}"] = df[f"{label}"].apply(lambda x: max(x, 0.001)) spot_model_bands = df[labels].tail(1).values.tolist()[0] spot = df.spot.iloc[-1] spot_model_band = stats.closest(spot_model_bands, spot) color_index = spot_model_bands.index(spot_model_band) rock.rocks("adbrocks").put("spot_model_bands", spot_model_bands) rock.rocks("adbrocks").put("color_index", color_index) racoon.to_csv(df, "/data/adbcsv/spot_model_xaubtc_daily.csv") logger.info("</day_spot_model>")
def bitcoin_minute_arrow(): """ Maintain minutely spot aggregates for bitcoin. """ global bitcoin_spot_keys logger.info("<bitcoin_spots_minutely>") bitcoin_spot_keys = bitcoin_spot_keys.copy() bitcoin_spot_keys = [x for x in bitcoin_spot_keys if "bisq" not in x] base_cutoff = datetime.datetime.utcnow().replace(second=0, microsecond=0) df = pd.read_parquet(f"/data/tsdb/minutely/{bitcoin_spot_keys[0]}.parq") df["source"] = bitcoin_spot_keys[0].split("_")[0] for spot_key in bitcoin_spot_keys[1:]: df = pd.concat([df, pd.read_parquet(f"/data/tsdb/minutely/{spot_key}.parq")], ignore_index=True, sort=True) source = spot_key.split("_")[0] df.source = df.source.fillna(source) cutoff = base_cutoff - datetime.timedelta(hours=2*24) df = df[df.date >= cutoff] for spot_key in bitcoin_spot_keys: df = pd.concat([df, pd.read_parquet(f"/data/tsdb/hourly/{spot_key}.parq")], ignore_index=True, sort=True) source = spot_key.split("_")[0] df.source = df.source.fillna(source) cutoff = base_cutoff - datetime.timedelta(hours=7*24) df = df[df.date >= cutoff] for spot_key in bitcoin_spot_keys: df = pd.concat([df, pd.read_parquet(f"/data/tsdb/daily/{spot_key}.parq")], ignore_index=True, sort=True) source = spot_key.split("_")[0] df.source = df.source.fillna(source) cutoff = base_cutoff - datetime.timedelta(hours=21*24) df = df[df.date >= cutoff] df = df.dropna() df = df.drop_duplicates(subset=["date", "source"], keep="last") df = df[df.source != "bisq"] df = df.sort_values("value", ascending=True, axis=0) racoon.to_csv(df, "/data/adbcsv/spots_xaubtc_minutely.csv") df = df.drop_duplicates(subset=["source"], keep="last") racoon.to_csv(df, "/data/adbcsv/spots_xaubtc_minutely_tail.csv") logger.info("</bitcoin_spots_minutely>")