def build_replay_info( rp_gen: Generator = REPLAY_GEN, db_collection: pymongo.collection.Collection = replays_info) -> bool: ''' Triggers the search for new replays at CONFIG.replay_path. Adds the information description of the replays to the a data collection within a MongoDB data base, if they are not in the database already. *Args:* - rp_gen (Generator = REPLAY_GEN): sc2reader.resources.Replay generator that yields the replays found in the CONFIG.replay_path. - db_collection (pymongo.collection.Collection = replays_info): the database where the function adds the new documents. *Returns:* - bool: True if new replays were found and added to the replay_info collection, False otherwise. ''' replays_data_set = [ asdict(replay_data) for replay_data in get_replays_data_set(rp_gen, db_collection) if replay_data != None ] if replays_data_set: db_collection.insert_many(replays_data_set) return True else: print(f'No new replays at {CONFIG.replay_path}') return False
def insertion(collection: pymongo.collection.Collection, candles: List[Dict]): try: collection.insert_many(candles, ordered=False) except pymongo.errors.BulkWriteError as e: panic_list = list( filter(lambda x: x['code'] != 11000, e.details['writeErrors'])) if len(panic_list) > 0: raise e
def add_users(coll: pymongo.collection.Collection, ids: list): users = [{ "_id": _id, "user": _id, "role": [], "workpans": [], "created": datetime.now().replace(microsecond=0), "type": "user", "email_validated": "no", } for _id in ids if isinstance(_id, ObjectId)] coll.insert_many(users) return ids
def store_quotes(quotes: list, collection: pymongo.collection.Collection): """ Creates entries in the database for the provided quotes. """ def map_quote(quote: dict) -> dict: instrument_id = parse_instrument_url(quote["instrument"]) plucked = { "instrument_id": instrument_id, **pluck(DESIRED_QUOTE_KEYS, quote) } plucked["updated_at"] = parse_updated_at(plucked["updated_at"]) return plucked quotes = list(filter(lambda quote: quote != None, quotes)) def format_quote(quote: dict) -> dict: return { "symbol": quote["symbol"], "bid": quote["bid_price"], "ask": quote["ask_price"] } pprint(list(map(format_quote, quotes))) # Update the index collection with up-to-date tradability info timestamp = datetime.datetime.utcnow() def update_index_symbol(datum: dict) -> pymongo.operations.UpdateOne: data = { "timestamp": timestamp, "has_traded": datum.get("has_traded"), "updated_at": parse_updated_at(datum.get("updated_at")), "trading_halted": datum.get("trading_halted"), } instrument_id = parse_instrument_url(datum["instrument"]) return pymongo.operations.UpdateOne({"instrument_id": instrument_id}, {"$set": data}) ops = list(map(update_index_symbol, quotes)) INDEX_COL.bulk_write(ops, ordered=False) quotes = list(map(map_quote, quotes)) try: collection.insert_many(quotes, ordered=False) except BulkWriteError as bwe: for err in bwe.details["writeErrors"]: if "duplicate key" not in err["errmsg"]: print("ERROR: Unhandled exception occured during batch write:") pprint(err)
def store_popularities(popularity_map: dict, collection: pymongo.collection.Collection): """ Creates an entry in the database for the popularity. """ timestamp = datetime.datetime.utcnow() pprint(popularity_map) mapped_documents = map( lambda key: { "timestamp": timestamp, "instrument_id": key, "popularity": popularity_map[key], }, popularity_map.keys(), ) collection.insert_many(mapped_documents)
def import_from_csv_into_collection(collection: pymongo.collection.Collection, filename: Union[str, bytes]) -> NoReturn: """Imports data into specified collection from specified CSV file. File existence is checked. The collection is cleared before new data are imported into it. The function uses 'utf-8-sig' encoding which can deal with '\ufeff' character if it appears in CSV file. :param collection: pymongo collection to import data into; :param filename: CSV file name to export data from; :return: None """ exit_if_file_does_not_exist(filename) file_handle = open(file=filename, mode="rt", encoding='utf-8-sig') reader = csv.DictReader(file_handle) collection.delete_many({}) collection.insert_many(reader)
def mongodb_put_array( array: np.ndarray, coll: pymongo.collection.Collection, meta_id: ObjectId, name: str, chunk: tuple, chunk_size_bytes: int, ) -> None: """Insert a single chunk into MongoDB""" docs = array_to_docs( array, meta_id=meta_id, name=name, chunk=chunk, chunk_size_bytes=chunk_size_bytes, ) assert docs coll.insert_many(docs)
def record_json_to_mongodb(json_data: list, collection: pymongo.collection.Collection, logger: logging.Logger)->bool: """ Records JSON data to MongoDB """ stage_name = "MONGODB" result = collection.insert_many(json_data) if result.acknowledged: logger.info(f"{stage_name} - Recorded {len(json_data)} new results. Overall documents count - {collection.count_documents({})}") logger.debug(f"{stage_name} - Newly recorded IDS: {', '.join([str(id) for id in result.inserted_ids])}") return True else: logger.error(f"{stage_name} - JSON was not recorded to DB") return False
def upload_table( df: pd.DataFrame, collection: pymongo.collection.Collection, **kwargs ) -> pd.Series: """ Uploads a DataFrame to a MongoDB collection. Parameters ---------- df collection kwargs Returns ------- Series with inserted documents IDs. """ documents = df.to_dict("records") result = collection.insert_many(documents, kwargs) return pd.Series(result.inserted_ids)
def record_json_to_mongodb(json_data: list, collection: pymongo.collection.Collection, max_retries: int, logger: logging.Logger) -> bool or None: """ Records JSON data to MongoDB """ stage_name = "MONGODB" try_number = 0 while True: result = collection.insert_many(json_data) if result.acknowledged: logger.info( f"{stage_name} - Recorded {len(json_data)} new results. " f"Overall documents count - {collection.count_documents({})}") logger.debug( f"{stage_name} - Newly recorded IDS: {', '.join([str(id) for id in result.inserted_ids])}" ) return True else: try_number += 1 err = f"{stage_name} - JSON was not recorded to DB, result is not acknowledged" retry(stage_name, try_number, max_retries, err, logger)
def build_graph(users: list, edges: pymongo.collection.Collection, db=None): client = GraphClient(settings.graph_server_credits) for user in users: client.add_vertex(user.id) curr_edge = 0 counter = 0 buffer = [] buff_ms = 800 try: for p1, p2 in itertools.combinations(users, 2): features = heuristic.find_relations(p1, p2, deep=False, db=None) if len(features) != 0: weight = 0 for feature in features: weight += feature['plus_w'] client.add_edge(p1.id, p2.id, curr_edge, weight) buffer.append({ 'eid': curr_edge, 'features': features[::], 'vid_pair': sorted([p1.id, p2.id]) }) curr_edge += 1 if len(buffer) == buff_ms: edges.insert_many(buffer) buffer[:] = [] counter += 1 if counter % 10000 == 0: print('Pairs:', counter) print('Curr edge:', curr_edge) edges.insert_many(buffer) except Exception as e: edges.insert_many(buffer) print(e) return curr_edge