예제 #1
0
def extract_data(export=False):

    df = get_books()
    df_todo = get_todo()

    out = {
        "dashboard": get_dashboard(df),
        "year_by_category": get_year_data(df),
        "month_by_category": get_month_data(df),
        "colors": {name: get_colors(data)
                   for name, data in c.COLORS.items()},
    }

    # Add percents
    data = out["year_by_category"]
    out["year_percent"] = get_year_percent(data, cumsum=False)
    out["year_percent_cumsum"] = get_year_percent(data, cumsum=True)

    # Extract totals
    out["year"] = out["year_by_category"].pop("Total")
    out["month"] = out["month_by_category"].pop("Total")

    # Top Authors
    out["top_authors"] = get_top(df, groupby=c.COL_AUTHOR)

    # TO DO section
    out["todo_by_author"] = get_top(df_todo, c.COL_AUTHOR)
    out["todo_by_source"] = get_top(df_todo, c.COL_SOURCE)
    out["todo_by_ownership"] = get_top(df_todo, c.COL_OWNED)

    if export:
        u.get_vdropbox().write_yaml(out, f"{c.PATH_VBOOKS}/report_data.yaml")

    return out
예제 #2
0
def merge_flights_history(mdate):

    vdp = get_vdropbox()

    # Check for monthly folders and get all parquets inside
    for folder in vdp.ls(c.PATH_HISTORY):

        is_date_folder = re.search(r"\d{4}_\d{2}", folder)
        if is_date_folder and ("."
                               not in folder) and (folder < f"{mdate:%Y_%m}"):

            log.info(f"Merging '{folder}' vflights history")

            sub_folder = f"{c.PATH_HISTORY}/{folder}"

            # Read all daily parquets
            dfs = []
            for file in vdp.ls(sub_folder):
                if file.endswith(".parquet"):
                    dfs.append(vdp.read_parquet(f"{sub_folder}/{file}"))

            # Export it as only one parquet file
            df = pd.concat(dfs)
            vdp.write_parquet(df, f"{sub_folder}.parquet")
            log.success(f"Successfuly merged '{folder}' vflights history")

            # Delete original folder
            vdp.delete(sub_folder)
예제 #3
0
def main(mdate=datetime.now(), data=None):
    """Creates the report"""

    mdate = mdate.replace(day=1)

    vdp = u.get_vdropbox()

    # Read data
    if data is None:
        log.debug("Reading report_data from dropbox")
        data = vdp.read_yaml(f"{c.PATH_EXPENSOR}/report_data/{mdate.year}/{mdate:%Y_%m}.yaml")

    # Add title
    data["mdate"] = f"{mdate:%Y_%m}"
    data["title"] = f"{mdate:%Y_%m} Expensor"
    data["sections"] = {
        "evolution": "fa-chart-line",
        "comparison": "fa-poll",
        "pies": "fa-chart-pie",
        "liquid": "fa-tint",
        "investments": "fa-wallet",
        "fire": "fa-fire-alt",
        "sankey": "fa-stream",
    }

    # Create report
    report = u.render_jinja_template("expensor.html", data)
    vdp.write_file(report, f"{c.PATH_EXPENSOR}/reports/{mdate.year}/{mdate:%Y_%m}.html")
예제 #4
0
def vbooks():
    """Creates the report"""

    data = extract_data()

    # Add title
    data["title"] = "VBooks"
    data["sections"] = {
        "evolution": "fa-chart-line",
        "percent": "fa-percent",
        "authors": "fa-user",
        "todo": "fa-list",
    }

    # Create report
    report = u.render_jinja_template("vbooks.html", data)
    u.get_vdropbox().write_file(report, f"{c.PATH_VBOOKS}/vbooks.html")
예제 #5
0
def backup_files():
    """Back up all files from URIS"""

    vdp = get_vdropbox()

    for kwargs in files_regexs:
        log.info("Scanning '{path}/{regex}'".format(**kwargs))
        one_backup(vdp, **kwargs)
예제 #6
0
def clean_backups():
    """Delete backups so that only one per month remain (except if newer than 30d)"""

    vdp = get_vdropbox()

    df = get_all_backups(vdp)
    df = tag_duplicates(df)

    # Delete files tagged as 'delete'
    for uri in df[df["delete"]].index:
        vdp.delete(uri)
예제 #7
0
def send_summary(mdate, channel):
    """Send gcalendar report"""

    vdp = get_vdropbox()
    df = get_daily_data(vdp, mdate)

    # Prepare slack message
    data = get_n_week(df)
    block = create_slack_block(data)

    # Send slack
    send_slack(channel=channel, blocks=[block])
예제 #8
0
def money_lover():
    """Retrives all dataframes and update DFS global var"""

    vdp = get_vdropbox()

    # Read
    df = get_money_lover_df(vdp)

    # Transform
    df = transform_transactions(df)

    # Export
    vdp.write_excel(df, c.FILE_TRANSACTIONS)
예제 #9
0
def flights(mdate):

    filename = c.FILE_FLIGHTS_DAY.format(date=mdate)

    vdp = get_vdropbox()

    if vdp.file_exists(filename):
        log.warning(f"File '{filename}' already exists, skipping flights task")

    # Only query if the file does not exist
    else:
        df = retrive_all_flights()
        vdp.write_parquet(df, filename)
예제 #10
0
def get_airports_pairs():
    """Get a set of all airports combinations"""

    vdp = get_vdropbox()
    df_airports = vdp.read_excel(c.FILE_AIRPORTS)

    out = set()
    for _, row in df_airports.iterrows():
        out.add((row[c.COL_ORIGIN], row[c.COL_DESTINATION]))
        out.add((row[c.COL_DESTINATION], row[c.COL_ORIGIN]))

    log.info("Airports retrived from dropbox")

    return out
예제 #11
0
def export_calendar_events(mdate):
    """Export all events as a parquet"""

    vdp = get_vdropbox()

    download_token(vdp)

    # Get events
    calendars = read_calendars()
    df = get_all_events(calendars, mdate)

    # Export events
    vdp.write_parquet(df, PATH_GCAL_DATA)

    upload_token(vdp)
예제 #12
0
def get_data():
    """Retrive dataframes"""

    # Get dfs
    log.debug("Reading excels from gdrive")
    dfs = {
        x: read_df_gdrive(c.FILE_DATA, x, cols)
        for x, cols in c.DFS_ALL_FROM_DATA.items()
    }

    # Add transactions
    log.debug("Reading data from dropbox")
    vdp = get_vdropbox()
    dfs[c.DF_TRANS] = vdp.read_excel(c.FILE_TRANSACTIONS).set_index(c.COL_DATE)

    return dfs
예제 #13
0
def main(dfs, mdate=datetime.now(), export_data=False):
    """Create the report"""

    mdate = mdate.replace(day=1)

    # Filter dates
    dfs = filter_by_date(dfs, mdate)

    # Get config info
    vdp = get_vdropbox()
    yml = vdp.read_yaml(c.FILE_CONFIG)

    out = {}

    # Expenses, incomes, result and savings ratio
    log.debug("Extracting expenses, incomes, result and savings ratio")
    for period in ["month", "year"]:
        out[period] = get_basic_traces(dfs, period[0].upper() + "S", mdate)

    # Liquid, worth and invested
    log.debug("Adding liquid, worth and invested")
    data = [(c.DF_LIQUID, c.LIQUID), (c.DF_WORTH, c.INVEST),
            (c.DF_INVEST, c.INVEST)]
    for name, yml_name in data:
        out["month"].update(get_investment_or_liquid(dfs, yml[yml_name], name))

    out["month"].update(get_total_investments(out))
    out["month"].update(get_salaries(dfs, mdate))

    out["comp"] = get_comparison_traces(dfs)
    out["pies"] = get_pie_traces(dfs, mdate)
    out["dash"] = get_dashboard(out, mdate)
    out["ratios"] = get_ratios(out)
    out["bubbles"] = get_bubbles(dfs, mdate)
    out["sankey"] = extract_sankey(out)

    out["colors"] = add_colors(dfs, yml)

    if export_data:
        vdp.write_yaml(
            out,
            f"{c.PATH_EXPENSOR}/report_data/{mdate.year}/{mdate:%Y_%m}.yaml")

    return out
예제 #14
0
def run_etl():
    """Run the ETL for today"""

    # Get dropbox connector
    vdp = u.get_vdropbox()

    download_log(vdp)

    detect_env()

    log.info("Starting vtasks")
    result = u.timeit(flow.run)(mdate=date.today())
    log.info("End of vtasks")

    copy_log(vdp)

    if not result.is_successful():
        log.error("ETL has failed")
        raise ValueError("ETL has failed")
예제 #15
0
def gcal_report(mdate):
    """Creates the report"""

    # Start of last month
    mdate = mdate.replace(day=1)

    vdp = get_vdropbox()

    df = get_daily_data(vdp, mdate)
    data = extract_data(vdp, df)

    # Add title
    data["title"] = "Calendar"
    data["sections"] = {
        "evolution": "fa-chart-line",
        "pies": "fa-chart-pie",
    }

    # Create report
    report = render_jinja_template("gcalendar.html", data)
    vdp.write_file(report, f"{PATH_GCAL}/gcalendar.html")
예제 #16
0
def extract_gcal_confusions(exclude_other=True,
                            merge_study=True,
                            min_alpha=0.1):
    vdp = get_vdropbox()

    dfg = vdp.read_parquet(PATH_GCAL_DATA)

    df_aux = clear_potential_confusions(dfg, exclude_other, merge_study)
    df_matrix = get_confusion_matrix(df_aux,
                                     col_text="summary",
                                     col_category="calendar")
    df_confusions = filter_confusions(df_matrix, min_alpha)

    num_confusions = df_confusions.shape[0]

    if num_confusions > 0:
        log.warning(
            f"There are {num_confusions} in google calendar. Exporting them")
        vdp.write_excel(df_confusions, PATH_CONFUSIONS)
    else:
        log.success("There are no confusions in google calendar")