def update_regional_trends_collection(): """Update regional trends collection""" n_docs = 0 response = {"status": "ko", "regions": [], "updated": False, "errors": []} try: df = load_df(URL_REGIONAL) df = preprocess_regional_df(df) for r in REGIONS: _filter = {REGION_KEY: r} update = { "$set": { REGION_KEY: r, "trends": build_national_trends(df[df[REGION_KEY] == r]) } } results = reg_trends_coll.update_one(_filter, update, upsert=True) if results.modified_count: n_docs += 1 response["regions"].append(r) response["n_docs"] = n_docs response["updated"] = results.acknowledged msg = f"{n_docs} docs updated in {reg_trends_coll.name}" response["msg"] = msg if not response["updated"]: msg = f"Nothing to update in {reg_trends_coll.name}" response["msg"] = msg response["status"] = "ok" app.logger.warning(msg) except Exception as e: response["errors"].append(f"{e}") app.logger.error(f"{e}") return response
def update_regional_breakdown_collection(): """Update regional breakdown""" response = {"status": "ko", "updated": False, "errors": []} try: df = load_df(URL_REGIONAL) df = preprocess_regional_df(df) breakdown = build_regional_breakdown(df) try: doc = reg_bdown_coll.find().next() mongo_id = doc["_id"] _filter = {"_id": mongo_id} update = {"$set": breakdown} res = reg_bdown_coll.update_one(_filter, update, upsert=True) msg = f"Updated regional breakdown in {reg_bdown_coll.name}" response["updated"], response["msg"] = res.acknowledged, msg response["status"] = "ok" app.logger.warning(msg) except StopIteration as e: msg = f"{e}" app.logger.error(msg) response["msg"] = msg except Exception as e: response["errors"].append(f"{e}") app.logger.error(f"{e}") return response
def update_regional_collection(): """Update regional-data collection""" inserted_ids = [] response = {"status": "ko", "updated": False, "errors": [], "msg": ""} try: df = load_df(URL_REGIONAL) df = preprocess_regional_df(df) latest_dt = df[DATE_KEY].max() cursor = reg_data_coll.find().sort(DATE_KEY, -1).limit(1) latest_dt_db = cursor.next()[DATE_KEY] if latest_dt.date() == latest_dt_db.date(): msg = "DB up-to-date" app.logger.warning(msg) else: df = df[df[DATE_KEY] > latest_dt_db] msg = f"Latest data missing in {reg_data_coll.name} ! Updating..." app.logger.warning(msg) new_records = df.to_dict(orient='records') r = reg_data_coll.insert_many(new_records, ordered=True) inserted_ids.extend(r.inserted_ids) msg = f"{len(inserted_ids)} docs updated in {reg_data_coll.name}" response["updated"] = True response["status"], response["msg"] = "ok", msg app.logger.warning(msg) except Exception as e: err = f"{e}" app.logger.error(err) response["errors"].append(err) response["n_docs"] = len(inserted_ids) return response
def create_regional_collection(): """Drop and recreate regional data collection""" df = pd.read_csv(URL_REGIONAL, parse_dates=[DATE_KEY]) df.drop(columns=COLUMNS_TO_DROP, inplace=True) df_regional_augmented = preprocess_regional_df(df) regional_records = df_regional_augmented.to_dict(orient='records') try: app.logger.info("Creating regional collection") reg_data_coll.drop() reg_data_coll.insert_many(regional_records, ordered=True) except Exception as e: app.logger.error(e)
def create_regional_breakdown_collection(): """Drop and recreate regional breakdown data collection""" df = pd.read_csv( URL_REGIONAL, parse_dates=[DATE_KEY], low_memory=False) df.drop(columns=COLUMNS_TO_DROP, inplace=True) df_regional_augmented = preprocess_regional_df(df) regional_breakdown = build_regional_breakdown(df_regional_augmented) try: app.logger.info("Creating regional breakdown collection") reg_bdown_coll.drop() reg_bdown_coll.insert_one(regional_breakdown) except Exception as e: app.logger.error(e)