def main(args: argparse.Namespace) -> ResultValue: log = logging.getLogger('Main') log.info(" >>") rv: ResultValue = ResultKo(Exception("Error")) try: data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "data", "reduced_report_data.csv") result = load_data_file(data_file=data_file) delta_cols = ["CASI TOTALI - A", "DECEDUTI"] for col in delta_cols: if result.is_ok: result = calculate_daily_diffs(cast(pd.DataFrame, result()), in_col=col, out_col="D - {c}".format(c=col)) else: break if result.is_ok(): result = save_data_file( cast(pd.DataFrame, result()), os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "data", "report_data.csv"), owerwrite=True) if result.is_ok(): rv = ResultOk(None) data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "data", "report_data.csv") result = load_data_file(data_file=data_file) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) rv = ResultKo(ex) log.info(" ({rv}) <<".format(rv=rv)) return rv
def save_data_file(df: pd.DataFrame, data_file_out: str, sorting_col: str = "REPORT DATE", owerwrite: bool = False) -> ResultValue: log = logging.getLogger('save_data_file') log.info(" >>") try: mode = 'w' header = True column_list = df.columns.values df.sort_values(by=[sorting_col], inplace=True) if os.path.isfile(data_file_out) == True: if not owerwrite: mode = 'a' header = False else: header = True with open(data_file_out) as fh: csv_reader = csv.reader(fh) csv_headings = next(csv_reader) if csv_headings != list(column_list): ex = Exception( "Columns differnt from file header\n {l1}\n {l2}\n". format(l1=column_list, l2=csv_headings)) log.error("Error in date translation - {e}".format(e=ex)) return ResultKo(ex) log.info("Save to: {f} headers: {h}".format(f=data_file_out, h=header)) df.to_csv(data_file_out, mode=mode, header=header, index=False) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(df)
def save_df_to_csv(df: pd.DataFrame, csv_file_name: str, column_list: List[str], sorting_col: str) -> ResultValue: log = logging.getLogger('save_df_to_csv') log.info(" >>") try: mode = 'w' header = True df = df.loc[:, column_list] df.sort_values(by=[sorting_col], inplace=True) if os.path.isfile(csv_file_name) == True: header = False mode = 'a' with open(csv_file_name) as fh: csv_reader = csv.reader(fh) csv_headings = next(csv_reader) if csv_headings != column_list: ex = Exception( "Columns differnt from file header\n {l1}\n {l2}\n". format(l1=column_list, l2=csv_headings)) log.error("Error in date translation - {e}".format(e=ex)) return ResultKo(ex) log.info("Save to: {f} headers: {h}".format(f=csv_file_name, h=header)) df.to_csv(csv_file_name, mode=mode, header=header, index=False) except Exception as ex: log.error(" failed - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(True)
def age_distribution(df: pd.DataFrame, ax: mp.axes.Axes, gender: str = "F") -> ResultValue: log = logging.getLogger('age_distribution') log.info(" >>") try: if gender.upper() not in ["M", "F", "B"]: msg = "Geneder {v} value not known".format(v=gender) log.error(msg) return ResultKo(Exception(msg)) by_age = df.groupby(["fascia_anagrafica"]).sum() by_age.reset_index(level=0, inplace=True) by_age["totals"] = by_age["sesso_femminile"] + by_age["sesso_maschile"] values = by_age["sesso_femminile" if gender == "F" else ("sesso_maschile" if gender == "M" else "totals")] labels = by_age["fascia_anagrafica"] ax.pie(values, labels=labels, autopct='%1.1f%%', colors=colors) ax.set_title("Distribuzione per eta'", fontsize=18) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(True)
def save_content_to_file(file_name: str, content: bytes) -> ResultValue: log = logging.getLogger('save_content_to_file') rv: ResultValue = ResultKo(Exception("Error")) try: with open(file_name, "wb") as fh: fh.write(content) except Exception as ex: log.error("save_content_to_file failed - {ex}".format(ex=ex)) rv = ResultKo(ex) else: rv = ResultOk(True) return rv
def load_date_range_reports(begin: dt.datetime, to: dt.datetime, context: dict) -> ResultValue: log = logging.getLogger('load_date_range_reports') log.info(" >>") try: for single_date in daterange(begin, to): df = append_new_data(single_date.strftime("%d/%m/%Y"), context) if df.is_in_error(): return ResultKo(Exception("Failure in append_new_data.")) except Exception as ex: log.error(" failed - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(df)
def company_distribution(df: pd.DataFrame, ax: mp.axes.Axes) -> ResultValue: log = logging.getLogger('company_distribution') log.info(" >>") try: def autopct_format(values): def my_format(pct): total = sum(values) val = int(round(pct * total / 100.0)) str_val = f'{val:n}' return '{v:d}'.format(v=val) return my_format colors = [ "#9aff33", "#34ff33", "#33ff98", "#33fffe", "#339aff", "#3371ff", "#5b33ff", "#c133ff", "#ff33d7" ] by_company = df.groupby(["fornitore"]).sum() by_company.reset_index(level=0, inplace=True) values = by_company["numero_dosi"] labels = by_company["fornitore"] ax.pie(values, labels=labels, colors=colors, autopct=autopct_format(values)) ax.set_title("Vaccini consegnati", fontsize=18) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(True)
def plot_delivered_vaccines_quantity(df_delivered: pd.DataFrame, ax: mp.axes.Axes) -> ResultValue: log = logging.getLogger('plot_delivered_vaccines_quantity') log.info(" >>") rv: ResultValue = ResultKo(Exception("Error")) try: line_label = "Dosi consegnate - somma" line_color = "#ff5733" df_delivered.sort_values(by="data_consegna", inplace=True) by_date = df_delivered.groupby(["data_consegna"]).sum() by_date.reset_index(level=0, inplace=True) by_date["cumulata"] = by_date["numero_dosi"].cumsum() x_del = by_date["data_consegna"] y_del = by_date["cumulata"] remove_tick_lines('x', ax) remove_tick_lines('y', ax) set_axes_common_properties(ax, no_grid=True) ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) ax.xaxis.set_minor_formatter(mdates.DateFormatter("%d/%m")) ax.xaxis.set_major_locator(mdates.DayLocator(interval=2)) ax.scatter(x_del, y_del, s=30, marker='.') line = ax.plot(x_del, y_del, 'b-', linewidth=2, color=line_color, label=line_label) ax.set_xticklabels(x_del, rotation=80) handles, labels = ax.get_legend_handles_labels() patch = mpatches.Patch(color=line_color, label=line_label) handles.append(patch) plt.legend(handles=handles, loc='upper left') rv = ResultOk(line) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) rv = ResultKo(ex) log.info(" <<") return rv
def get_web_file(url: str) -> ResultValue: log = logging.getLogger('get_web_file') log.info(" >>") log.info("Url: {u}".format(u=url)) rv: ResultValue = ResultKo(Exception("Error")) result_content: bytes = bytearray() try: result = requests.get(url) if result.status_code not in ok_statuses: log.info("Get data failed. Received error code: {er}".format( er=str(result.status_code))) else: result_content = result.content rv = ResultOk(result_content) except Exception as ex: log.error(" failed - {ex}".format(ex=ex)) return ResultKo(ex) log.info("get_web_file ({rv}) <<".format(rv=rv)) return rv
def create_delivered_dataframe(data_file: str) -> ResultValue: log = logging.getLogger('create_delivered_dataframe') log.info(" >>") try: df = pd.read_csv(data_file, sep=',', parse_dates=["data_consegna"]) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(df)
def main(args: argparse.Namespace) -> bool: log = logging.getLogger('Main') log.info(" >>") rv: ResultValue = ResultKo(Exception("Error")) try: date_format = '%d/%m/%Y' data_file_name = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data", "reduced_report_data.csv") if args.date_range is not None: begin_dt = dt.datetime.strptime(args.date_range[0], date_format) end_dt = dt.datetime.strptime(args.date_range[1], date_format) if end_dt < begin_dt: log.error("Wrong date range: {b} < {e}".format(b=begin_dt, e=end_dt)) return False columns_report_charts = [ "REPORT DATE", "Regione", "Ricoverati con sintomi", "Terapia intensiva", "Totale attualmente positivi", "DECEDUTI", "Isolamento domiciliare", "CASI TOTALI - A", "Totale tamponi effettuati", "SCHEMA VERSION" ] temp_content_dir = os.path.join(os.sep, 'tmp') rv = load_date_range_reports(begin=begin_dt, to=end_dt, context={ "temp_dir": temp_content_dir, "data file": data_file_name, "columns": columns_report_charts, "save": True, "sort column": "REPORT DATE" }) rv = ResultOk(True) elif args.get_date_range is not None and args.get_date_range == True: df = pd.read_csv(data_file_name, sep=',') msg = "Data minima: {dmin} - data massima: {dmax} - numero righe: {nr}".format( nr=df.shape, dmin=df["REPORT DATE"].min(), dmax=df["REPORT DATE"].max()) print(msg) log.info(msg) rv = ResultOk(True) else: msg = "Nothing to do!" rv = ResultOk(True) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return False log.info(" (Is ok: {rv}) <<".format(rv=rv.is_ok())) return rv.is_ok()
def download_csv_file(url: str, data_file: str) -> ResultValue: log = logging.getLogger('download_csv_file') log.info(" >>") rv: ResultValue = ResultKo(Exception("Error")) try: result = requests.get(url) if result.status_code in [200]: with open(data_file, "w") as text_file: text_file.write(result.text) rv = ResultOk(True) else: msg = "Error downloading the data file: {e}.".format( e=result.reason) log.error(msg) rv = ResultKo(Exception(msg)) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) rv = ResultKo(ex) log.info(" <<") return rv
def translate_to_date(report_date: List[str]) -> ResultValue: #log.info("translate_to_date {p} >>".format(p=str(dt))) log = logging.getLogger('data_downloader') date = None months_names = { "gennaio": 1, "febbraio": 2, "marzo": 3, "aprile": 4, "maggio": 5, "giugno": 6, "luglio": 7, "agosto": 8, "settembre": 9, "ottobre": 10, "novembre": 11, "dicembre": 12 } if len(report_date) >= 3: try: day = report_date[0] year = report_date[2] month = months_names.get(report_date[1].lower()) if month is not None: #log.info("Dt: {d}/{m}/{y}".format(d=day,m=month,y=year)) date = dt.datetime(year=int(year), month=int(month), day=int(day)) else: ex = Exception("Unknown month: {m}".format(m=report_date[1])) log.error("Error in date translation - {e}".format(e=ex)) return ResultKo(ex) except Exception as ex: log.error("Exception - {e}".format(e=ex)) return ResultKo(ex) else: exc = Exception("Wrong format: {dt}".format(dt=str(dt))) log.error("Error in date translation - {e}".format(e=exc)) return ResultKo(exc) return ResultOk(date)
def create_dataframe(pdf_url: str, local_file_path: str, pdf_version: str) -> ResultValue: log = logging.getLogger('create_dataframe') log.info(" >>") ret_data_frame: ResultValue = ResultKo(Exception("Error")) try: file_downloaded_rv = get_web_file(pdf_url) if file_downloaded_rv.is_ok: if save_content_to_file(local_file_path, cast(bytes, file_downloaded_rv())) == True: to_df_rv = pdf_to_dataframe(local_file_path) if to_df_rv.is_ok(): df, report_date = to_df_rv() ret_data_frame = refactor_region_df( df, report_date, pdf_version) except Exception as ex: log.error(" failed - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" ({rv}) <<".format(rv=rv)) return ret_data_frame
def calculate_daily_diffs(df: pd.DataFrame, in_col: str, out_col: str) -> ResultValue: log = logging.getLogger('calculate_daily_diffs') log.info("({oc}) >>".format(oc=out_col)) try: regions_list = df["Regione"].unique() for region in regions_list: mask = df["Regione"] == region df.loc[mask, out_col] = df.loc[mask, in_col].diff(periods=1) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(df)
def create_dataframe(data_file: str) -> ResultValue: log = logging.getLogger('create_dataframe') log.info(" >>") try: df = pd.read_csv(data_file, sep=',', parse_dates=["data_somministrazione"]) df["totali"] = df["sesso_maschile"] + df["sesso_femminile"] except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(df)
def load_data_file(data_file: str) -> ResultValue: log = logging.getLogger('load_data_file') log.info(" >>") try: df = pd.read_csv(data_file, sep=',', parse_dates=["REPORT DATE"], dtype={ "Ricoverati con sintomi": np.int64, "CASI TOTALI - A": np.int64 }) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(df)
def get_version_from_date(date: dt.datetime) -> ResultValue: log = logging.getLogger('get_version_from_date') log.info(" >>") version = "" if date >= dt.datetime.strptime("03/12/2020", '%d/%m/%Y'): version = "v6" elif date >= dt.datetime.strptime("25/06/2020", '%d/%m/%Y'): version = "v1" elif date >= dt.datetime.strptime("01/05/2020", '%d/%m/%Y'): version = "v5" else: ex = Exception("Unable to find a valid version for {d}".format(d=date)) log.error("Error {e}".format(e=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(version)
def append_new_data(report_date: str, context: dict) -> ResultValue: log = logging.getLogger('append_new_data') log.info(" >>") try: date = dt.datetime.strptime(report_date, '%d/%m/%Y') version = get_version_from_date(date) if version.is_in_error(): return ResultKo(version()) pdf_file_name = "dpc-covid19-ita-scheda-regioni-{y}{m}{d}.pdf".format( y=date.year, m=str(date.month).rjust(2, '0'), d=str(date.day).rjust(2, '0')) pdf_url = "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/schede-riepilogative/regioni/{fn}".format( fn=pdf_file_name) log.info("Url: {u}".format(u=pdf_url)) content = get_web_file(pdf_url) if content.is_in_error(): return ResultKo(content()) file_name = os.path.join(context["temp_dir"], pdf_file_name) if save_content_to_file(file_name, cast(bytes, content())).is_in_error(): return ResultKo(Exception("Error in save_content_to_file.")) rv = pdf_to_dataframe(file_name) if rv.is_in_error(): return ResultKo(Exception("Error in pdf_to_dataframe.")) df, report_read_date = rv() df_regions = refactor_region_df(df, report_read_date, version()) if df_regions.is_in_error(): return ResultKo(df_regions()) if context["save"] == True: rv = save_df_to_csv(df_regions(), context["data file"], context["columns"], context["sort column"]) if rv.is_in_error(): return ResultKo(rv()) except Exception as ex: log.error("append_new_data failed - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(df_regions)
def main(args: argparse.Namespace) -> ResultValue: log = logging.getLogger('Main') log.info(" >>") data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "data", "report_data.csv") result = load_data_file(data_file=data_file) if result.is_in_error(): return ResultKo(Exception("load data failed.")) df = result() region_name = 'Lombardia' mask = df['Regione'] == region_name region_df = df.loc[mask, :] region_df = region_df.sort_values(["REPORT DATE"]) x = region_df["REPORT DATE"] y = region_df["DECEDUTI"] y_tot = region_df["CASI TOTALI - A"] chart_composite(x, y, y_tot, region_name) log.info(" <<") return ResultOk(True)
def chart_vaccinations_male_female(df: pd.DataFrame, ax: mp.axes.Axes) -> ResultValue: log = logging.getLogger('chart_vaccinations_male_female') log.info(" >>") try: num_male = df["sesso_maschile"].sum() num_female = df["sesso_femminile"].sum() parts = [num_female, num_male] labels = ["Donne", "Uomini"] female_color = "#f1a29b" male_color = "#9bd7f1" ax.pie(parts, labels=labels, colors=[female_color, male_color], autopct='%1.1f%%') ax.set_title("Distribuzione per genere", fontsize=18) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(True)
def chart_vaccinations_fornitore(df: pd.DataFrame, ax: mp.axes.Axes) -> ResultValue: log = logging.getLogger('chart_vaccinations_fornitore') log.info(" >>") try: by_company = df.groupby(["fornitore"]).sum() by_company["totals"] = by_company["sesso_maschile"] + by_company[ "sesso_femminile"] by_company.reset_index(level=0, inplace=True) values = by_company["totals"] labels = by_company["fornitore"] ax.pie(values, labels=labels, colors=["#dfeef4", "#c2e7f6", "#7fd2f3"], autopct='%1.1f%%') ax.set_title("Distribuzione per fornitore", fontsize=18) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(True)
def plot_vaccinations_by_time(df: pd.DataFrame, df_delivered: pd.DataFrame, ax: mp.axes.Axes, wich: str = "first") -> ResultValue: log = logging.getLogger('plot_vaccinations_by_time') log.info(" >>") try: ln_one_color = "#f08814" ln_two_color = "#92b7e9" ln_one_label = "Cumulata numero vaccinazioni" ln_two_label = "Distribuzione giornaliera" grp_by_time = df.groupby("data_somministrazione").sum() x = grp_by_time.index.values y = grp_by_time["prima_dose"] y_cum_sum = grp_by_time["prima_dose"].cumsum() set_axes_common_properties(ax, no_grid=False) ax.get_yaxis().set_major_formatter( mp.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) remove_tick_lines('x', ax) remove_tick_lines('y', ax) ax.set_xticks(x) ax.set_xticklabels(x, rotation=80) ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) ax.xaxis.set_minor_formatter(mdates.DateFormatter("%d/%m")) ax.xaxis.set_major_locator(mdates.DayLocator(interval=2)) ax.set_ylabel(ln_one_label, fontsize=14) ax.set_xlabel("Data", fontsize=14) ax.set_title("Vaccinazioni nel tempo - prima dose", fontsize=18) ax.tick_params(axis='y', colors=ln_one_color) ax.yaxis.label.set_color(ln_one_color) ax.scatter(x, y_cum_sum, color=ln_one_color, s=30, marker='.') ln_one = ax.plot(x, y_cum_sum, 'b-', linewidth=2, color=ln_one_color, label=ln_one_label) result = plot_delivered_vaccines_quantity(df_delivered, ax) if result.is_in_error() == True: log.error(result()) return result line_three = result() ax_dec = ax.twinx() remove_tick_lines('y', ax_dec) remove_tick_lines('x', ax_dec) set_axes_common_properties(ax_dec, no_grid=True) ax_dec.scatter(x, y, color=ln_two_color, s=30, marker='.') ln_two = ax_dec.plot(x, y, 'b-', linewidth=2, color=ln_two_color, label=ln_two_label) ax_dec.set_ylabel(ln_two_label, fontsize=14) ax_dec.yaxis.label.set_color(ln_two_color) ax_dec.tick_params(axis='y', colors=ln_two_color) lns = ln_one + ln_two + line_three labs = [l.get_label() for l in lns] ax.legend(lns, labs, loc='upper left') except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(True)
def chart_composite(x: pd.Series, y_one: pd.Series, y_two: pd.Series, region_name: str) -> ResultValue: log = logging.getLogger('chart_composite') log.info(" >>") try: locale.setlocale(locale.LC_ALL, 'it_IT.UTF-8') fig = plt.figure(figsize=(20, 10)) gs1 = gridspec.GridSpec(1, 1, hspace=0.2, wspace=0.1, figure=fig) ax = [] ax.append(fig.add_subplot(gs1[0, 0])) idx = 0 set_axes_common_properties(ax[0], no_grid=False) ax[idx].get_yaxis().set_major_formatter( mp.ticker.FuncFormatter(lambda x, p: format(int(x), ','))) remove_tick_lines('x', ax[idx]) remove_tick_lines('y', ax[idx]) ax[idx].set_xticks(x) ax[idx].set_xticklabels(x, rotation=80) ax[idx].xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) ax[idx].xaxis.set_minor_formatter(mdates.DateFormatter("%d/%m")) ax[idx].xaxis.set_major_locator(mdates.DayLocator(interval=7)) ax[idx].set_ylabel("Numero", fontsize=14) ax[idx].set_xlabel("Data", fontsize=14) ax[idx].set_title("{reg} - {title} ".format( title="Deceduti/Ammalati - totale", reg=region_name), fontsize=18) ax[idx].scatter(x, y_one, color="#b9290a", s=30, marker='.') ln_one = ax[idx].plot(x, y_one, 'b-', linewidth=2, color="#f09352", label="Totale ammalati") dec_color = "#8f0013" ax_dec = ax[idx].twinx() remove_tick_lines('y', ax_dec) set_axes_common_properties(ax_dec, no_grid=True) ax_dec.scatter(x, y_two, color=dec_color, s=30, marker='.') ln_two = ax_dec.plot(x, y_two, 'b-', linewidth=2, color=dec_color, label="Totale deceduti") ax_dec.set_ylabel("Totale deceduti", fontsize=14) ax_dec.yaxis.label.set_color(dec_color) ax_dec.tick_params(axis='y', colors=dec_color) lns = ln_one + ln_two labs = [l.get_label() for l in lns] ax[idx].legend(lns, labs, loc='upper left') #ax_dec.axhline(c='#f0b4a7', lw=1) #ax[idx].legend(fontsize=12, loc='upper left') except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(plt)
def refactor_region_df(df: pd.DataFrame, report_date: dt.datetime, pdf_version: str = "v1") -> ResultValue: log = logging.getLogger('refactor_region_df') log.info(" ({ver} - {dt}) >>".format(dt=report_date, ver=pdf_version)) log.debug("\n{d}".format(d=str(df))) df_res = None try: df_res = df if pdf_version == "v1": df_res.rename(columns={ df_res.columns[0]: "Regione", df_res.columns[1]: "Ricoverati con sintomi", df_res.columns[2]: "Terapia intensiva", df_res.columns[3]: "Isolamento domiciliare", df_res.columns[4]: "Totale attualmente positivi", df_res.columns[5]: "DIMESSI/GUARITI", df_res.columns[6]: "DECEDUTI", df_res.columns[7]: "CASI TOTALI - A", df_res.columns[8]: "INCREMENTO CASI TOTALI (rispetto al giorno precedente)", df_res.columns[9]: "Casi identificatidal sospettodiagnostico", df_res.columns[10]: "Casi identificatida attività discreening", df_res.columns[11]: "CASI TOTALI - B", df_res.columns[12]: "Totale casi testati", df_res.columns[13]: "Totale tamponi effettuati", df_res.columns[14]: "INCREMENTO TAMPONI" }, inplace=True) elif pdf_version in ["v6"]: df_res.rename(columns={ df_res.columns[0]: "Regione", df_res.columns[1]: "Ricoverati con sintomi", df_res.columns[2]: "Terapia intensiva", df_res.columns[3]: "Terapia intensiva / Ingressi delgiorno", df_res.columns[4]: "Isolamento domiciliare", df_res.columns[5]: "Totale attualmente positivi", df_res.columns[6]: "DIMESSI/GUARITI", df_res.columns[7]: "DECEDUTI", df_res.columns[8]: "CASI TOTALI - A", df_res.columns[9]: "INCREMENTO CASI TOTALI (rispetto al giorno precedente)", df_res.columns[10]: "Totale persone testate", df_res.columns[11]: "Totale tamponi effettuati", df_res.columns[12]: "INCREMENTO TAMPONI" }, inplace=True) elif pdf_version in ["v5"]: df_res.rename(columns={ df_res.columns[0]: "Regione", df_res.columns[1]: "Ricoverati con sintomi", df_res.columns[2]: "Terapia intensiva", df_res.columns[3]: "Isolamento domiciliare", df_res.columns[4]: "Totale attualmente positivi", df_res.columns[5]: "DIMESSI/GUARITI", df_res.columns[6]: "DECEDUTI", df_res.columns[7]: "CASI TOTALI - A", df_res.columns[8]: "INCREMENTO CASI TOTALI (rispetto al giorno precedente)", df_res.columns[9]: "Totale tamponi effettuati", df_res.columns[10]: "Casi testati" }, inplace=True) else: ex = Exception("Unknown pdf version: {pv}".format(pv=pdf_version)) log.error("Error - {ex}".format(ex=ex)) return ResultKo(ex) df_res[ "REPORT DATE"] = report_date #pd.to_datetime(report_date, format="%d/%m/%Y") df_res["SCHEMA VERSION"] = pdf_version log.debug("\n{d}".format(d=str(df_res))) except Exception as ex: log.error(" failed - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(df_res)
def pdf_to_dataframe(pdf_file_name: str) -> ResultValue: log = logging.getLogger('pdf_to_dataframe') log.info(" ({fn}) >>".format(fn=pdf_file_name)) df = None report_date: dt.datetime = dt.datetime(1964, 8, 3, 0, 0) try: df = tabula.read_pdf(pdf_file_name, pages='all') #log.info("Df list len: {l}".format(l=len(df))) csv_file = os.path.splitext(pdf_file_name)[0] + ".csv" tabula.convert_into(pdf_file_name, csv_file, output_format="csv", pages='all') list_reg = [] with open(csv_file, "r") as fh: start = False end = False reg = re.compile("(\d{1,3}) (\d)") for line in fh: if line.startswith("Lombardia") == True: start = True if line.startswith("TOTALE") == True: end = True start = False if start == True: line = line.replace(".", "") line = line.replace("+ ", "") #line = line.replace(" ", ",") line = reg.sub("\\1,\\2", line) line = line.replace("\n", "") list_reg.append(line) if 'Aggiornamento casi Covid-19' in line: parts = line.split(" - ") if len(parts) > 1: report_date_s = parts[0] if parts[0][0] == "\"": report_date_s = parts[0][1:] log.debug(report_date) report_date_rv = translate_to_date( report_date_s.split(" ")) if report_date_rv.is_in_error(): msg = "Error in date translation." log.error(msg) return ResultKo(Exception(msg)) else: report_date = report_date_rv() elif 'AGGIORNAMENTO ' in line: parts = line.split(" ") if len(parts) > 1: report_date = dt.datetime.strptime( parts[1], '%d/%m/%Y') log.info("RDate: {rd}".format(rd=report_date)) df = pd.DataFrame([line.split(",") for line in list_reg]) except Exception as ex: log.info("pdf_to_dataframe failed - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" (report_date={rd}) <<".format(rd=report_date)) return ResultOk((df, report_date))
def chart_single_line(x: pd.Series, y: pd.Series, context: dict) -> ResultValue: log = logging.getLogger('chart_composite') log.info(" >>") try: if context.get('region name') is None: msg = "Error: region name field is mandatory." log.error(msg) return ResultKo(Exception(msg)) else: region_name = context["region name"] if context.get('title') is None: msg = "Error: title field is mandatory." log.error(msg) return ResultKo(Exception(msg)) else: title = context["title"] fig = plt.figure(figsize=(20, 10)) gs1 = gridspec.GridSpec(1, 1, hspace=0.2, wspace=0.1, figure=fig) ax = [] ax.append(fig.add_subplot(gs1[0, 0])) idx = 0 set_axes_common_properties(ax[0], no_grid=False) remove_tick_lines('x', ax[idx]) remove_tick_lines('y', ax[idx]) ax[idx].set_xticks(x) ax[idx].set_xticklabels(x, rotation=80) ax[idx].xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) ax[idx].xaxis.set_minor_formatter(mdates.DateFormatter("%d/%m")) ax[idx].xaxis.set_major_locator(mdates.DayLocator(interval=7)) ax[idx].set_ylabel("Numero", fontsize=14) ax[idx].set_xlabel("Data", fontsize=14) ax[idx].set_title("{reg} - {title} ".format(title=title, reg=region_name), fontsize=18) ax[idx].scatter(x, y, color="#b9290a", s=30, marker='.', label=title) ax[idx].plot(x, y, 'b-', linewidth=2, color="#f09352") if context.get('dad begin date') is not None: ax[idx].axvline(context.get('dad begin date'), color="#048f9e") ax[idx].text(0.50, 0.25, 'Inizio dad scuole superiori', horizontalalignment='center', verticalalignment='center', transform=ax[idx].transAxes, rotation=90, color="#048f9e", fontsize=12) ax[idx].axvline(context.get('school opening date'), color="#048f9e") ax[idx].text(0.95, 0.25, 'Riapertura scuole', horizontalalignment='center', verticalalignment='center', transform=ax[idx].transAxes, rotation=90, color="#048f9e", fontsize=12) ax[idx].legend(fontsize=12, loc='upper left') except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) return ResultKo(ex) log.info(" <<") return ResultOk(plt)
def main(args: argparse.Namespace) -> ResultValue: log = logging.getLogger('Main') log.info(" >>") rv: ResultValue = ResultKo(Exception("Error")) try: today = dt.datetime.now().strftime("%Y%m%d") if args.download_vaccinazioni == True: file_name = "{dt}_vaccinazioni.csv".format(dt=today) url = "https://raw.githubusercontent.com/italia/covid19-opendata-vaccini/master/dati/somministrazioni-vaccini-latest.csv" if args.download_consegne == True: file_name = "{dt}_vaccini_consegnati.csv".format(dt=today) url = "https://raw.githubusercontent.com/italia/covid19-opendata-vaccini/master/dati/consegne-vaccini-latest.csv" if args.download_vaccinazioni == True or args.download_consegne == True: data_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data", file_name) rv = download_csv_file(url=url, data_file=data_file) if rv.is_in_error(): msg = "Data download error: {e}".format(e=rv.value) log.error(msg) else: msg = "Data downloaded." log.info(msg) if args.chart == True: data_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data", "vaccinazioni.csv") rv = create_dataframe(data_file=data_file) if rv.is_in_error(): log.error(rv.value()) return ResultKo(rv()) df = rv.value() data_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "data", "vaccini_consegnati.csv") rv = create_delivered_dataframe(data_file=data_file) if rv.is_in_error(): log.error(rv.value()) return ResultKo(rv()) df_delivered = rv() region_name = "Lombardia" mask_region = (df["nome_area"] == region_name) df_region = df.loc[mask_region, [ "data_somministrazione", "totali", 'fascia_anagrafica', "sesso_maschile", "sesso_femminile", "fornitore", "prima_dose", "seconda_dose" ]] mask_region = (df_delivered["nome_area"] == region_name) df_delivered_region = df_delivered.loc[ mask_region, ["fornitore", "numero_dosi", "data_consegna"]] fig = plt.figure(figsize=(20, 10)) gs1 = gridspec.GridSpec(1, 1, hspace=0.2, wspace=0.1, figure=fig) ax = [] ax.append(fig.add_subplot(gs1[0, 0])) idx = 0 rv = plot_delivered_vaccines_quantity(df_delivered_region, ax=ax[idx]) if rv.is_ok(): plt.savefig(os.path.join(os.sep, "tmp", "vaccini_fig.png"), bbox_inches='tight', pad_inches=0.2) except Exception as ex: log.error("Exception caught - {ex}".format(ex=ex)) rv = ResultKo(ex) log.info(" ({rv}) <<".format(rv=rv.is_ok())) return rv