def extract_data(export=False): df = get_books() df_todo = get_todo() out = { "dashboard": get_dashboard(df), "year_by_category": get_year_data(df), "month_by_category": get_month_data(df), "colors": {name: get_colors(data) for name, data in c.COLORS.items()}, } # Add percents data = out["year_by_category"] out["year_percent"] = get_year_percent(data, cumsum=False) out["year_percent_cumsum"] = get_year_percent(data, cumsum=True) # Extract totals out["year"] = out["year_by_category"].pop("Total") out["month"] = out["month_by_category"].pop("Total") # Top Authors out["top_authors"] = get_top(df, groupby=c.COL_AUTHOR) # TO DO section out["todo_by_author"] = get_top(df_todo, c.COL_AUTHOR) out["todo_by_source"] = get_top(df_todo, c.COL_SOURCE) out["todo_by_ownership"] = get_top(df_todo, c.COL_OWNED) if export: u.get_vdropbox().write_yaml(out, f"{c.PATH_VBOOKS}/report_data.yaml") return out
def merge_flights_history(mdate): vdp = get_vdropbox() # Check for monthly folders and get all parquets inside for folder in vdp.ls(c.PATH_HISTORY): is_date_folder = re.search(r"\d{4}_\d{2}", folder) if is_date_folder and ("." not in folder) and (folder < f"{mdate:%Y_%m}"): log.info(f"Merging '{folder}' vflights history") sub_folder = f"{c.PATH_HISTORY}/{folder}" # Read all daily parquets dfs = [] for file in vdp.ls(sub_folder): if file.endswith(".parquet"): dfs.append(vdp.read_parquet(f"{sub_folder}/{file}")) # Export it as only one parquet file df = pd.concat(dfs) vdp.write_parquet(df, f"{sub_folder}.parquet") log.success(f"Successfuly merged '{folder}' vflights history") # Delete original folder vdp.delete(sub_folder)
def main(mdate=datetime.now(), data=None): """Creates the report""" mdate = mdate.replace(day=1) vdp = u.get_vdropbox() # Read data if data is None: log.debug("Reading report_data from dropbox") data = vdp.read_yaml(f"{c.PATH_EXPENSOR}/report_data/{mdate.year}/{mdate:%Y_%m}.yaml") # Add title data["mdate"] = f"{mdate:%Y_%m}" data["title"] = f"{mdate:%Y_%m} Expensor" data["sections"] = { "evolution": "fa-chart-line", "comparison": "fa-poll", "pies": "fa-chart-pie", "liquid": "fa-tint", "investments": "fa-wallet", "fire": "fa-fire-alt", "sankey": "fa-stream", } # Create report report = u.render_jinja_template("expensor.html", data) vdp.write_file(report, f"{c.PATH_EXPENSOR}/reports/{mdate.year}/{mdate:%Y_%m}.html")
def vbooks(): """Creates the report""" data = extract_data() # Add title data["title"] = "VBooks" data["sections"] = { "evolution": "fa-chart-line", "percent": "fa-percent", "authors": "fa-user", "todo": "fa-list", } # Create report report = u.render_jinja_template("vbooks.html", data) u.get_vdropbox().write_file(report, f"{c.PATH_VBOOKS}/vbooks.html")
def backup_files(): """Back up all files from URIS""" vdp = get_vdropbox() for kwargs in files_regexs: log.info("Scanning '{path}/{regex}'".format(**kwargs)) one_backup(vdp, **kwargs)
def clean_backups(): """Delete backups so that only one per month remain (except if newer than 30d)""" vdp = get_vdropbox() df = get_all_backups(vdp) df = tag_duplicates(df) # Delete files tagged as 'delete' for uri in df[df["delete"]].index: vdp.delete(uri)
def send_summary(mdate, channel): """Send gcalendar report""" vdp = get_vdropbox() df = get_daily_data(vdp, mdate) # Prepare slack message data = get_n_week(df) block = create_slack_block(data) # Send slack send_slack(channel=channel, blocks=[block])
def money_lover(): """Retrives all dataframes and update DFS global var""" vdp = get_vdropbox() # Read df = get_money_lover_df(vdp) # Transform df = transform_transactions(df) # Export vdp.write_excel(df, c.FILE_TRANSACTIONS)
def flights(mdate): filename = c.FILE_FLIGHTS_DAY.format(date=mdate) vdp = get_vdropbox() if vdp.file_exists(filename): log.warning(f"File '{filename}' already exists, skipping flights task") # Only query if the file does not exist else: df = retrive_all_flights() vdp.write_parquet(df, filename)
def get_airports_pairs(): """Get a set of all airports combinations""" vdp = get_vdropbox() df_airports = vdp.read_excel(c.FILE_AIRPORTS) out = set() for _, row in df_airports.iterrows(): out.add((row[c.COL_ORIGIN], row[c.COL_DESTINATION])) out.add((row[c.COL_DESTINATION], row[c.COL_ORIGIN])) log.info("Airports retrived from dropbox") return out
def export_calendar_events(mdate): """Export all events as a parquet""" vdp = get_vdropbox() download_token(vdp) # Get events calendars = read_calendars() df = get_all_events(calendars, mdate) # Export events vdp.write_parquet(df, PATH_GCAL_DATA) upload_token(vdp)
def get_data(): """Retrive dataframes""" # Get dfs log.debug("Reading excels from gdrive") dfs = { x: read_df_gdrive(c.FILE_DATA, x, cols) for x, cols in c.DFS_ALL_FROM_DATA.items() } # Add transactions log.debug("Reading data from dropbox") vdp = get_vdropbox() dfs[c.DF_TRANS] = vdp.read_excel(c.FILE_TRANSACTIONS).set_index(c.COL_DATE) return dfs
def main(dfs, mdate=datetime.now(), export_data=False): """Create the report""" mdate = mdate.replace(day=1) # Filter dates dfs = filter_by_date(dfs, mdate) # Get config info vdp = get_vdropbox() yml = vdp.read_yaml(c.FILE_CONFIG) out = {} # Expenses, incomes, result and savings ratio log.debug("Extracting expenses, incomes, result and savings ratio") for period in ["month", "year"]: out[period] = get_basic_traces(dfs, period[0].upper() + "S", mdate) # Liquid, worth and invested log.debug("Adding liquid, worth and invested") data = [(c.DF_LIQUID, c.LIQUID), (c.DF_WORTH, c.INVEST), (c.DF_INVEST, c.INVEST)] for name, yml_name in data: out["month"].update(get_investment_or_liquid(dfs, yml[yml_name], name)) out["month"].update(get_total_investments(out)) out["month"].update(get_salaries(dfs, mdate)) out["comp"] = get_comparison_traces(dfs) out["pies"] = get_pie_traces(dfs, mdate) out["dash"] = get_dashboard(out, mdate) out["ratios"] = get_ratios(out) out["bubbles"] = get_bubbles(dfs, mdate) out["sankey"] = extract_sankey(out) out["colors"] = add_colors(dfs, yml) if export_data: vdp.write_yaml( out, f"{c.PATH_EXPENSOR}/report_data/{mdate.year}/{mdate:%Y_%m}.yaml") return out
def run_etl(): """Run the ETL for today""" # Get dropbox connector vdp = u.get_vdropbox() download_log(vdp) detect_env() log.info("Starting vtasks") result = u.timeit(flow.run)(mdate=date.today()) log.info("End of vtasks") copy_log(vdp) if not result.is_successful(): log.error("ETL has failed") raise ValueError("ETL has failed")
def gcal_report(mdate): """Creates the report""" # Start of last month mdate = mdate.replace(day=1) vdp = get_vdropbox() df = get_daily_data(vdp, mdate) data = extract_data(vdp, df) # Add title data["title"] = "Calendar" data["sections"] = { "evolution": "fa-chart-line", "pies": "fa-chart-pie", } # Create report report = render_jinja_template("gcalendar.html", data) vdp.write_file(report, f"{PATH_GCAL}/gcalendar.html")
def extract_gcal_confusions(exclude_other=True, merge_study=True, min_alpha=0.1): vdp = get_vdropbox() dfg = vdp.read_parquet(PATH_GCAL_DATA) df_aux = clear_potential_confusions(dfg, exclude_other, merge_study) df_matrix = get_confusion_matrix(df_aux, col_text="summary", col_category="calendar") df_confusions = filter_confusions(df_matrix, min_alpha) num_confusions = df_confusions.shape[0] if num_confusions > 0: log.warning( f"There are {num_confusions} in google calendar. Exporting them") vdp.write_excel(df_confusions, PATH_CONFUSIONS) else: log.success("There are no confusions in google calendar")