def alphavantage_import_eod_prices( ctx: Context, tickers_input: Optional[Reference[Ticker]], api_key: str, tickers: Optional[List] = None, ) -> Iterator[Records[AlphavantageEodPrice]]: assert api_key is not None tickers = prepare_tickers(tickers, tickers_input) if not tickers: return None ticker_latest_dates_imported = ( ctx.get_state_value("ticker_latest_dates_imported") or {}) conn = JsonHttpApiConnection() def fetch_prices(params: Dict, tries: int = 0) -> Optional[Records]: if tries > 2: return None resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params, stream=True) try: record = resp.json() # Json response means error if is_alphavantage_error(record): # TODO: Log this failure? print(f"Error for {params} {record}") return None if is_alphavantage_rate_limit(record): time.sleep(60) return fetch_prices(params, tries=tries + 1) except: pass # print(resp.raw.read().decode("utf8")) # resp.raw.seek(0) records = list(read_csv(resp.iter_lines())) return records for ticker in tickers: assert isinstance(ticker, str) latest_date_imported = ensure_datetime( ticker_latest_dates_imported.get(ticker, MIN_DATETIME)) assert latest_date_imported is not None if utcnow() - ensure_utc(latest_date_imported) < timedelta(days=1): # Only check once a day continue params = prepare_params_for_ticker(ticker, ticker_latest_dates_imported) params["apikey"] = api_key records = fetch_prices(params) if records: # Symbol not included for r in records: r["symbol"] = ticker yield records # Update state ticker_latest_dates_imported[ticker] = utcnow() ctx.emit_state_value("ticker_latest_dates_imported", ticker_latest_dates_imported) if not ctx.should_continue(): break
def cast(self, obj: Any, strict: bool = False) -> Any: if strict: if isinstance(obj, date): obj = datetime(obj.year, obj.month, obj.day) if not isinstance(obj, datetime): raise TypeError(obj) return obj if is_nullish(obj): return None return ensure_datetime(obj)
def import_orders( ctx: Context, api_key: str, store_id: str, from_date: date = None, to_date: date = None, ) -> Iterator[Records[bigcommerce.BigCommerceOrder]]: params = { "limit": ENTRIES_PER_PAGE, "min_date_created": from_date, "max_date_created": to_date, "sort": "date_modified:asc", } latest_modified_date_imported = ctx.get_state_value("latest_modified_date_imported") latest_modified_date_imported = ensure_datetime(latest_modified_date_imported) if latest_modified_date_imported: params["min_date_modified"] = latest_modified_date_imported page = 1 while ctx.should_continue(): params["page"] = page resp = HttpApiConnection().get( url="{}{}/v2/orders".format(BIGCOMMERCE_API_BASE_URL, store_id), params=params, headers={"X-Auth-Token": api_key, "Accept": "application/json",}, ) # check if there is anything left to process if resp.status_code == HTTPStatus.NO_CONTENT: break json_resp = resp.json() assert isinstance(json_resp, list) latest_modified_date_imported = max( [r.get("date_modified", latest_modified_date_imported) for r in json_resp] ) yield json_resp ctx.emit_state_value( "latest_modified_date_imported", latest_modified_date_imported, ) page += 1
def prepare_params_for_ticker( ticker: str, ticker_latest_dates_imported: Dict[str, datetime]) -> Dict: latest_date_imported = ensure_datetime( ticker_latest_dates_imported.get(ticker, MIN_DATETIME)) if ensure_utc(latest_date_imported) <= utcnow() - timedelta(days=100): # More than 100 days worth, get full outputsize = "full" else: # Less than 100 days, compact will suffice outputsize = "compact" params = { "symbol": ticker, "outputsize": outputsize, "datatype": "csv", "function": "TIME_SERIES_DAILY_ADJUSTED", } return params
def import_observations( ctx: Context, api_key: str, series_id: str, ) -> Iterator[Records[FredObservation]]: latest_fetched_at = ctx.get_state_value("latest_fetched_at") if latest_fetched_at: # Two year curing window (to be safe) obs_start = ensure_datetime(latest_fetched_at) - timedelta(days=365 * 2) else: obs_start = MIN_DATE params = { "file_type": "json", "api_key": api_key, "series_id": series_id, "observation_start": obs_start, "offset": 0, "limit": 100000, } conn = JsonHttpApiConnection(date_format="%Y-%m-%d") endpoint_url = FRED_API_BASE_URL + "series/observations" while True: resp = conn.get(endpoint_url, params) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["observations"] if len(records) == 0: # All done break for r in records: r["series_id"] = params[ "series_id"] # Add series ID to data so we know what the data is r["value"] = ( None if r["value"] == "." else r["value"] ) # FRED quirk, returns empty decimal number "." instead of null yield records num_returned = len(records) if num_returned < json_resp["limit"]: # we got back less than limit, so must be done (no other way to tell?) break params["offset"] += num_returned # We only update date if we have fetched EVERYTHING available as of now ctx.emit_state_value("latest_fetched_at", utcnow())
def marketstack_import_tickers( ctx: Context, access_key: str, exchanges: List = ["XNYS", "XNAS"], ) -> Iterator[Records[MarketstackTicker]]: use_https = False # TODO: when do we want this True? # default_from_date = ctx.get_param("from_date", MIN_DATE) assert access_key is not None assert isinstance(exchanges, list) last_imported_at = ensure_datetime( ctx.get_state_value("last_imported_at") or "2020-01-01 00:00:00") assert last_imported_at is not None last_imported_at = ensure_utc(last_imported_at) if utcnow() - last_imported_at < timedelta(days=1): # TODO: from config return conn = JsonHttpApiConnection() if use_https: endpoint_url = HTTPS_MARKETSTACK_API_BASE_URL + "tickers" else: endpoint_url = MARKETSTACK_API_BASE_URL + "tickers" for exchange in exchanges: params = { "limit": 1000, "offset": 0, "access_key": access_key, "exchange": exchange, } while ctx.should_continue(): resp = conn.get(endpoint_url, params) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["data"] if len(records) == 0: # All done break # Add a flattened exchange indicator for r in records: r["exchange_acronym"] = r.get("stock_exchange", {}).get("acronym") yield records # Setup for next page params["offset"] = params["offset"] + len(records) ctx.emit_state_value("last_imported_at", utcnow())
def import_order_products( ctx: Context, api_key: str, store_id: str, from_date: date = None, to_date: date = None, ) -> Iterator[Records[bigcommerce.BigCommerceOrderProduct]]: params = { "limit": 50, # Keep small so we get good incremental progress on sub-calls "min_date_created": from_date, "max_date_created": to_date, "sort": "date_modified:asc", } latest_modified_date_imported = ctx.get_state_value( "latest_modified_date_imported") latest_modified_date_imported = ensure_datetime( latest_modified_date_imported) if latest_modified_date_imported: params["min_date_modified"] = latest_modified_date_imported page = 1 while ctx.should_continue(): params["page"] = page resp = HttpApiConnection().get( url=f"{BIGCOMMERCE_API_BASE_URL}{store_id}/v2/orders", params=params, headers={ "X-Auth-Token": api_key, "Accept": "application/json", }, ) # check if there is anything left to process if resp.status_code == HTTPStatus.NO_CONTENT: break json_resp = resp.json() assert isinstance(json_resp, list) order_products = [] for order in json_resp: resp = HttpApiConnection( ratelimit_params=dict(calls=30, period=10) ).get( url= f"{BIGCOMMERCE_API_BASE_URL}{store_id}/v2/orders/{order['id']}/products", params={"limit": ENTRIES_PER_PAGE}, headers={ "X-Auth-Token": api_key, "Accept": "application/json", }, ) if resp.status_code == HTTPStatus.NO_CONTENT: continue # TODO: pagination, if order has more than LIMIT items order_products = resp.json() assert isinstance(order_products, list) yield order_products latest_modified_date_imported = max([ r.get("date_modified", latest_modified_date_imported) for r in json_resp ]) ctx.emit_state_value("latest_modified_date_imported", latest_modified_date_imported) page += 1
def alphavantage_import_company_overview( ctx: Context, tickers_input: Optional[Reference[Ticker]], api_key: str, tickers: Optional[List] = None, ) -> Iterator[Records[AlphavantageCompanyOverview]]: assert api_key is not None tickers = prepare_tickers(tickers, tickers_input) if tickers is None: # We didn't get an input block for tickers AND # the config is empty, so we are done return None ticker_latest_dates_imported = ( ctx.get_state_value("ticker_latest_dates_imported") or {}) conn = JsonHttpApiConnection() batch_size = 50 records = [] tickers_updated = [] def fetch_overview(params: Dict, tries: int = 0) -> Optional[Dict]: if tries > 2: return None resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params) record = resp.json() # Alphavantage returns 200 and json error message on failure if is_alphavantage_error(record): # TODO: Log this failure? # print(f"Error for ticker {params['symbol']}: {record}") return None if is_alphavantage_rate_limit(record): time.sleep(20) return fetch_overview(params, tries=tries + 1) return record for i, ticker in enumerate(tickers): assert isinstance(ticker, str) latest_date_imported = ensure_datetime( ticker_latest_dates_imported.get(ticker, MIN_DATETIME)) assert latest_date_imported is not None # Refresh at most once a day # TODO: make this configurable instead of hard-coded 1 day if utcnow() - ensure_utc(latest_date_imported) < timedelta(days=1): continue params = { "apikey": api_key, "symbol": ticker, "function": "OVERVIEW", } record = fetch_overview(params) if not record: continue # Clean up json keys to be more DB friendly record = {title_to_snake_case(k): v for k, v in record.items()} records.append(record) tickers_updated.append(ticker) if len(records) >= batch_size or i == len(tickers) - 1: yield records # Update state for updated_ticker in tickers_updated: ticker_latest_dates_imported[updated_ticker] = utcnow() ctx.emit_state_value("ticker_latest_dates_imported", ticker_latest_dates_imported) if not ctx.should_continue(): break records = [] tickers_updated = []
def import_subscription_items( ctx: Context, api_key: str, curing_window_days: int = 90 ) -> Iterator[Records[StripeSubscriptionItemRaw]]: """ # TODO: repeated code """ latest_full_import_at = ctx.get_state_value("latest_full_import_at") latest_full_import_at = ensure_datetime(latest_full_import_at) current_starting_after = ctx.get_state_value("current_starting_after") params = { "limit": 100, "status": "all", } # if earliest_created_at_imported <= latest_full_import_at - timedelta(days=int(curing_window_days)): if latest_full_import_at and curing_window_days: # Import only more recent than latest imported at date, offset by a curing window # (default 90 days) to capture updates to objects (refunds, etc) params["created[gte]"] = int( (latest_full_import_at - timedelta(days=int(curing_window_days))).timestamp()) if current_starting_after: params["starting_after"] = current_starting_after conn = JsonHttpApiConnection() endpoint_url = STRIPE_API_BASE_URL + "subscriptions" all_done = False while ctx.should_continue(): resp = conn.get(endpoint_url, params, auth=HTTPBasicAuth(api_key, "")) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["data"] if len(records) == 0: # All done all_done = True break for record in records: item_params = { "limit": 100, "subscription": record["id"], } while True: items_url = STRIPE_API_BASE_URL + "subscription_items" items_resp = conn.get(items_url, item_params, auth=HTTPBasicAuth(api_key, "")) items_json_resp = items_resp.json() assert isinstance(items_json_resp, dict) items = items_json_resp["data"] if len(items) == 0: # All done break yield items if not items_json_resp.get("has_more"): break latest_item_id = items[-1]["id"] item_params["starting_after"] = latest_item_id if not ctx.should_continue(): break latest_object_id = records[-1]["id"] if not json_resp.get("has_more"): all_done = True break params["starting_after"] = latest_object_id ctx.emit_state_value("current_starting_after", latest_object_id) else: # Don't update any state, we just timed out return # We only update state if we have fetched EVERYTHING available as of now if all_done: ctx.emit_state_value("latest_imported_at", utcnow()) # IMPORTANT: we reset the starting after cursor so we start from the beginning again on next run ctx.emit_state_value("current_starting_after", None)
def stripe_importer( endpoint: str, ctx: Context, api_key: str, curing_window_days: int = None, extra_params: Dict = None, ): """ Stripe only allows fetching records in one order: from newest to oldest, so we use its cursor based pagination to iterate once through all historical. Stripe doesn't have a way to request by "updated at" times, so we must refresh old records according to our own logic, using a "curing window" to re-import records up to one year (the default) old. """ latest_full_import_at = ctx.get_state_value("latest_full_import_at") latest_full_import_at = ensure_datetime(latest_full_import_at) current_starting_after = ctx.get_state_value("current_starting_after") params = { "limit": 100, } if extra_params: params.update(extra_params) # if earliest_created_at_imported <= latest_full_import_at - timedelta(days=int(curing_window_days)): if latest_full_import_at and curing_window_days: # Import only more recent than latest imported at date, offset by a curing window # (default 90 days) to capture updates to objects (refunds, etc) params["created[gte]"] = int( (latest_full_import_at - timedelta(days=int(curing_window_days))).timestamp()) if current_starting_after: params["starting_after"] = current_starting_after conn = JsonHttpApiConnection() endpoint_url = STRIPE_API_BASE_URL + endpoint all_done = False while ctx.should_continue(): resp = conn.get(endpoint_url, params, auth=HTTPBasicAuth(api_key, "")) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["data"] if len(records) == 0: # All done all_done = True break # Return acutal data yield records latest_object_id = records[-1]["id"] if not json_resp.get("has_more"): all_done = True break params["starting_after"] = latest_object_id ctx.emit_state_value("current_starting_after", latest_object_id) else: # Don't update any state, we just timed out return # We only update state if we have fetched EVERYTHING available as of now if all_done: ctx.emit_state_value("latest_imported_at", utcnow()) # IMPORTANT: we reset the starting after cursor so we start from the beginning again on next run ctx.emit_state_value("current_starting_after", None)