def clean_record(record: Dict[str, Any], ensure_keys_snake_case: bool = True) -> Dict[str, Any]: if ensure_keys_snake_case: return { title_to_snake_case(k): process_raw_value(v) for k, v in record.items() } return {k: process_raw_value(v) for k, v in record.items()}
def dict_to_rough_schema(name: str, d: Dict, convert_to_snake_case=True, **kwargs): fields = [] for k, v in d.items(): if convert_to_snake_case: k = title_to_snake_case(k) fields.append((k, pandas_series_to_field_type(pd.Series([v])))) fields = sorted(fields) return create_quick_schema(name, fields, **kwargs)
def test_snake_and_title_cases(): assert snake_to_title_case("_hello_world") == "HelloWorld" assert snake_to_title_case("_hello__world") == "HelloWorld" assert snake_to_title_case("_hello__world_") == "HelloWorld" assert snake_to_title_case("_hello_world_goodbye") == "HelloWorldGoodbye" assert snake_to_title_case("hello") == "Hello" assert snake_to_title_case("") == "" # t -> s assert title_to_snake_case("HelloWorld") == "hello_world" assert title_to_snake_case("Hello") == "hello" assert title_to_snake_case("hello") == "hello" assert title_to_snake_case("HELLO") == "hello" assert title_to_snake_case("helloWorld") == "hello_world" assert title_to_snake_case("helloWorldGoodbye") == "hello_world_goodbye" assert title_to_snake_case("HELLOWorldGoodbye") == "hello_world_goodbye" assert title_to_snake_case( "HELLOWorld98Goodbye") == "hello_world_98_goodbye" assert title_to_snake_case( "HELLOWorld98goodbye") == "hello_world_98goodbye"
def __tablename__(cls): return SNAPFLOW_METADATA_TABLE_PREFIX + title_to_snake_case( cls.__name__) # type: ignore
def alphavantage_import_company_overview( ctx: Context, tickers_input: Optional[Reference[Ticker]], api_key: str, tickers: Optional[List] = None, ) -> Iterator[Records[AlphavantageCompanyOverview]]: assert api_key is not None tickers = prepare_tickers(tickers, tickers_input) if tickers is None: # We didn't get an input block for tickers AND # the config is empty, so we are done return None ticker_latest_dates_imported = ( ctx.get_state_value("ticker_latest_dates_imported") or {}) conn = JsonHttpApiConnection() batch_size = 50 records = [] tickers_updated = [] def fetch_overview(params: Dict, tries: int = 0) -> Optional[Dict]: if tries > 2: return None resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params) record = resp.json() # Alphavantage returns 200 and json error message on failure if is_alphavantage_error(record): # TODO: Log this failure? # print(f"Error for ticker {params['symbol']}: {record}") return None if is_alphavantage_rate_limit(record): time.sleep(20) return fetch_overview(params, tries=tries + 1) return record for i, ticker in enumerate(tickers): assert isinstance(ticker, str) latest_date_imported = ensure_datetime( ticker_latest_dates_imported.get(ticker, MIN_DATETIME)) assert latest_date_imported is not None # Refresh at most once a day # TODO: make this configurable instead of hard-coded 1 day if utcnow() - ensure_utc(latest_date_imported) < timedelta(days=1): continue params = { "apikey": api_key, "symbol": ticker, "function": "OVERVIEW", } record = fetch_overview(params) if not record: continue # Clean up json keys to be more DB friendly record = {title_to_snake_case(k): v for k, v in record.items()} records.append(record) tickers_updated.append(ticker) if len(records) >= batch_size or i == len(tickers) - 1: yield records # Update state for updated_ticker in tickers_updated: ticker_latest_dates_imported[updated_ticker] = utcnow() ctx.emit_state_value("ticker_latest_dates_imported", ticker_latest_dates_imported) if not ctx.should_continue(): break records = [] tickers_updated = []