def store_fundamentals(data, collection: pymongo.collection.Collection): if not data: return for datum in data: if not datum: continue print(datum) instrument_id = parse_instrument_url(datum["instrument"]) doc = {**omit("instrument", datum), "instrument_id": instrument_id} try: collection.replace_one({"instrument_id": instrument_id}, doc, True) except Exception as e: print(f"Error storing fundamentals: {e}")
def try_update_instrument(index_coll, instrument_datum: dict): instrument_id = instrument_datum["id"] new_doc = {**omit("id", instrument_datum), "instrument_id": instrument_id} try: index_coll.replace_one({"instrument_id": instrument_id}, new_doc, True) except pymongo.errors.DuplicateKeyError: # There must be another document with the same symbol in the collection. We should delete # it and replace it with this one instead. res = index_coll.delete_many({"symbol": instrument_datum["symbol"]}) if res.deleted_count == 0: print(( f"WARN: Duplicate key error for symbol {instrument_datum['symbol']} " f"(id {instrument_id}) but no other entry with that symbol in the " "index collection")) else: index_coll.insert_one(new_doc) print( f"Handled symbol conflict; re-trying update for instrument {instrument_id}" )
def cli(rabbitmq_host: str, rabbitmq_port: int, scraper_request_cooldown_seconds: float): print("init rabbitmq connection") rabbitmq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=rabbitmq_host, port=rabbitmq_port)) rabbitmq_channel = rabbitmq_connection.channel() rabbitmq_channel.queue_declare(queue="instrument_ids") print("rabbitmq connection init'd") # Lock and flush the existing cache print("Locking the cache in preparation for update...") set_update_started() trader = Robinhood() res = trader.get_url("https://api.robinhood.com/instruments/") db = get_db() index_coll = db["index"] index_coll.create_index("instrument_id", unique=True) total_ids = 0 quotes = [] instrument_ids = [] while True: fetched_instruments: List[Dict[str, str]] = res["results"] tradable_instruments = get_tradable_instruments(fetched_instruments) total_ids += len(tradable_instruments) for instrument_datum in tradable_instrument_ids: try: index_coll.insert_one({ **omit("id", instrument_datum), "instrument_id": instrument_datum["id"] }) except pymongo.errors.DuplicateKeyError: pass instrument_ids.append(instrument_id) quotes.append(symbol) if len(quotes) == 20: rabbitmq_channel.basic_publish(exchange="", routing_key="symbols", body=",".join(quotes)) rabbitmq_channel.basic_publish(exchange="", routing_key="instrument_ids", body=",".join(instrument_ids)) quotes = [] instrument_ids = [] if res.get("detail"): # Request was throttled; wait for a cooldown before continuing cooldown_seconds = parse_throttle_res(res["detail"]) print( "Instruments fetch request failed; waiting for {} second cooldown..." .format(cooldown_seconds)) sleep(cooldown_seconds) elif res.get("next"): # There are more instruments to scrape. Wait for the standard cooldown and then # continue by fetching the next request url. sleep(scraper_request_cooldown_seconds) res = trader.get_url(res["next"]) else: # We're done scraping; there are no more instruments in the list. rabbitmq_channel.basic_publish(exchange="", routing_key="symbols", body=",".join(quotes)) rabbitmq_channel.basic_publish(exchange="", routing_key="instrument_ids", body=",".join(instrument_ids)) # Publish a finished message over the channels to indicate that there are no more # items to process in this run. rabbitmq_channel.basic_publish(exchange="", routing_key="symbols", body="__DONE") rabbitmq_channel.basic_publish(exchange="", routing_key="instrument_ids", body="__DONE") # Mark the instrument scrape as finished set_instruments_finished() print( "Finished scraping; fetched a total of {} tradable instrument IDs." .format(total_ids)) break rabbitmq_connection.close()