Exemplo n.º 1
0
def prepare_data_features_kr(df_feat_kr):
    '''
    Prepare DafaFrame to be used with model
    Reduce timeframe to usable dates without NaN values for features
    '''
    df_out = df_feat_kr.copy()

    # correct age
    df_age_pos = df_out["age_pos"].copy()
    df_age_pos.fillna(method="pad", inplace=True)
    df_out["age_pos"] = df_age_pos

    # check by drop if nan
    df_out.dropna(inplace=True,
                  subset=[
                      "date", 'nb_cases', 'T_min', 'T_max', 'H_mean',
                      'W_speed', 'pos', 'test', 'day_num', 'age_pos'
                  ])
    date_old = add_days(df_out.index[0].strftime("%Y-%m-%d"), -1)
    dates_index = df_out.index.strftime("%Y-%m-%d")
    for date_curr in dates_index:
        if date_curr != add_days(date_old, 1):
            print("ERROR : ", date_curr)
            #break
        assert date_curr == add_days(date_old, 1)
        date_old = date_curr
    return df_out
Exemplo n.º 2
0
def extrap_missing_kr(df_feat_kr_tmp, list_missing):
    '''
    Extrapolate data if missing dates in data
    '''
    def fun_apply_extrap(date_curr):
        return date_curr in list_missing

    if len(list_missing) == 0:
        return df_feat_kr_tmp
    '''for dates_curr in list_missing:
        list_dates_tmp = df_feat_kr_tmp["date"].tolist()
        # search day-1
        date_before = add_days(dates_curr, -1)
        if date_before in list_dates_tmp:'''
    str_date_min = min(df_feat_kr_tmp["date"])
    str_date_min = add_days(str_date_min, -1) 
    list_range = generate_list_dates(str_date_min, 
                                 max(df_feat_kr_tmp["date"]))

    new_index = pd.DatetimeIndex(list_range)
    df_feat_kr_tmp = df_feat_kr_tmp.reindex(new_index, method='ffill')

    df_feat_kr_tmp["date"] = \
        pd.to_datetime(df_feat_kr_tmp.index).strftime("%Y-%m-%d").tolist()

    df_feat_kr_tmp["extrap"] = False
    
    df_feat_kr_tmp["extrap"] = df_feat_kr_tmp["date"].apply(fun_apply_extrap)

    return df_feat_kr_tmp
Exemplo n.º 3
0
def check_update_df_feat_kr(date_now=None, force_update=False):
    
    if date_now is None:
        date_now = datetime.datetime.now().strftime("%Y-%m-%d")
    
    if force_update:
        return True, DATE_FIRST_CASES_GOUV_KR, \
                DATE_FIRST_FEAT_OK_KR, date_now
    
    flag_update = True # update to be done ?
    flag_update_age = True
    
    if os.path.isfile(PATH_DF_FEAT_KR):
        df_feat_kr = pd.read_csv(PATH_DF_FEAT_KR)
        # first date to download for cases
        date_first_extrap = get_first_day_extrap_kr(df_feat_kr)
        if date_first_extrap is not None: 
            # take first date of extrap to try to update it
            date_req_start = date_first_extrap
        else:
            # normal mode : take last date +1
            date_req_start = add_days(df_feat_kr["date"].max(), 1)
        # first date to download for ages
        date_req_start_age = \
            df_feat_kr[df_feat_kr["daily_age"].isna() & \
                (df_feat_kr["date"] > DATE_FIRST_FEAT_OK_KR)]["date"].min()
        if (date_req_start_age is np.nan):
            date_req_start_age = date_req_start
        #df_feat_kr[df_feat_kr["age_pos"].isna()]["date"].max()
    else:
        date_req_start = DATE_FIRST_CASES_GOUV_KR
        date_req_start_age = DATE_FIRST_FEAT_OK_KR
    
    if date_req_start > date_now:
        #print("     date_req_start : ", date_req_start)
        #print("update false")
        flag_update = False
            
    if date_req_start_age > date_now:
            flag_update_age = False
    
    if not flag_update:
        date_req_start = None
        
    if not flag_update_age:
        date_req_start_age = None
    
    if (flag_update | flag_update_age):
        date_req_end = date_now
    else:
        date_req_end = None
    print("Updating Data KR...")
    print("update cases : ", flag_update)
    print("update age : ", flag_update_age)
    print("date_req_start: ", date_req_start)
    print("date_req_start_age: ", date_req_start_age)
    print("date_req_end: ", date_req_end)
    return flag_update, flag_update_age, \
        date_req_start, date_req_start_age, date_req_end
Exemplo n.º 4
0
def update_pred_pos(df_feat_fr, from_disk=False):
    '''
    Update prediction data positive cases France
    '''

    # check if last prediction is after last known date
    if os.path.isfile(PATH_DF_PLOT_PRED):
        df_plot_pred = pd.read_csv(PATH_DF_PLOT_PRED)
        df_plot_pred.index = df_plot_pred["date"]
        if df_plot_pred["date"].min() <= df_feat_fr["date"].max():
            from_disk = False

    # if no prediction or if from disk
    if (not settings.PREDICT) | from_disk:
        df_plot_pred = pd.read_csv(PATH_DF_PLOT_PRED)
        df_plot_pred.index = df_plot_pred["date"]
        return df_plot_pred

    # prepare features
    dataset, data_std, data_mean = prepare_dataset(df_feat_fr)
    # predict next days
    if settings.MODEL_TFLITE:
        json_list_list_x = prepare_to_lambda_future(dataset)
        resp = requests.post(URL_PREDICT, json=json_list_list_x)
        print("status code : ", resp.status_code)
        if resp.status_code == 200:
            y_multi_pred = retrieve_from_lambda(resp)
        else:
            print("AWS Lamdba future pred ERROR!")
            df_plot_pred = pd.read_csv(PATH_DF_PLOT_PRED)
            df_plot_pred.index = df_plot_pred["date"]
            return df_plot_pred
    else:
        # prepare data : very last days
        x_multi = np.array([dataset[-PAST_HISTORY:, :]])
        # load model
        multi_step_model = tf.keras.models.load_model(PATH_MDL_MULTI_STEP)
        y_multi_pred = multi_step_model.predict(x_multi)

    # convert in positive cases
    y_pos_pred = y_multi_pred * data_std[4] + data_mean[4]
    # pos pred next 3 days from last day : date, pos, total (sum)
    str_date_pred_0 = df_feat_fr.date.max()
    str_date_pred_1 = add_days(str_date_pred_0, FUTURE_TARGET)
    list_dates_pred = generate_list_dates(str_date_pred_0, str_date_pred_1)
    # figure
    df_plot_pred = pd.DataFrame(index=list_dates_pred,
                                columns=["date"],
                                data=list_dates_pred)

    df_plot_pred["pos"] = y_pos_pred[0].astype(int)

    arr_nb_pred = df_plot_pred["pos"].cumsum().values
    df_plot_pred["nb_cases"] = df_feat_fr["nb_cases"].max() + arr_nb_pred

    # save for future pred
    df_plot_pred.to_csv(PATH_DF_PLOT_PRED, index=False)

    return df_plot_pred
Exemplo n.º 5
0
def update_pos(df_feat_fr):
    '''
    Update plot data positive cases France
    '''
    # pos last NB_DAY_PLOT days : date, pos, total (sum)
    str_date_0 = add_days(df_feat_fr.date.max(), -NB_DAY_PLOT)
    df_plot = df_feat_fr[df_feat_fr["date"] >= str_date_0].copy()
    return df_plot
Exemplo n.º 6
0
def update_pred_pos_all_kr(df_feat_kr, from_disk=False):
    '''
    Update prediction data positive cases France for all days
    '''
    if os.path.isfile(PATH_DF_PLOT_PRED_ALL_KR):
        df_plot_pred = pd.read_csv(PATH_DF_PLOT_PRED_ALL_KR)
        df_plot_pred.index = df_plot_pred["date"]
        if df_plot_pred["date"].max() < df_feat_kr["date"].max():
            from_disk = False
    else:
        from_disk = False

    if (not settings.PREDICT) | from_disk:
        df_plot_pred_all = pd.read_csv(PATH_DF_PLOT_PRED_ALL_KR)
        df_plot_pred_all.index = df_plot_pred_all["date"]
        return df_plot_pred_all

    # prepare features
    dataset, data_std, data_mean = prepare_dataset_kr(df_feat_kr)

    # predict
    if settings.MODEL_TFLITE:
        json_list_list_x = prepare_to_lambda(dataset)
        resp = requests.post(URL_PREDICT_KR, json=json_list_list_x)
        print("status code : ", resp.status_code)
        if resp.status_code == 200:
            y_multi_pred = retrieve_from_lambda(resp)
        else:
            print("AWS Lamdba future pred ERROR!")
            df_plot_pred_all = pd.read_csv(PATH_DF_PLOT_PRED_ALL_KR)
            df_plot_pred_all.index = df_plot_pred_all["date"]
            return df_plot_pred_all
    else:

        # load model
        multi_step_model = tf.keras.models.load_model(PATH_MDL_MULTI_STEP_KR)

        list_x = []

        # prepare data : very last days
        nb_max = NB_PERIOD_PLOT
        for I in range(nb_max, 0, -1):
            I_start = I * FUTURE_TARGET - PAST_HISTORY
            if I_start < 0:
                break
            I_end = I * FUTURE_TARGET
            list_x.append(np.array([dataset[I_start:I_end, :]]))

        # model prediction
        for I, x_multi in enumerate(list_x):
            if I:
                y_multi_pred = np.concatenate(
                    [y_multi_pred,
                     multi_step_model.predict(x_multi)], axis=1)
            else:
                y_multi_pred = multi_step_model.predict(x_multi)

    # convert in positive cases
    y_pos_pred = y_multi_pred * data_std[4] + data_mean[4]

    # list of dates
    K_days = y_pos_pred.shape[1]
    print("K_days = ", K_days)
    print("y_pos_pred.shape = ", y_pos_pred.shape)
    str_date_pred_1 = df_feat_kr.date.max()
    str_date_pred_0 = add_days(str_date_pred_1, -1 * K_days)
    list_dates_pred = generate_list_dates(str_date_pred_0, str_date_pred_1)

    # create df output
    df_plot_pred_all = pd.DataFrame(index=list_dates_pred,
                                    columns=["date"],
                                    data=list_dates_pred)
    # daily
    df_plot_pred_all["pos"] = y_pos_pred[0].astype(int)

    # Total : cumulate sum
    list_nb_cases = []
    str_date_nb_0 = str_date_pred_0
    for I in range(0, df_plot_pred_all["pos"].shape[0], FUTURE_TARGET):
        str_date_nb_0 = add_days(str_date_pred_0, I)
        nb_0 = df_feat_kr[df_feat_kr["date"] == str_date_nb_0]["nb_cases"][0]
        arr_nb = nb_0 + \
            df_plot_pred_all.iloc[I:I+FUTURE_TARGET]["pos"].cumsum().values
        list_nb_cases = list_nb_cases + arr_nb.tolist()
    df_plot_pred_all["nb_cases"] = list_nb_cases

    # save for future pred
    df_plot_pred_all.to_csv(PATH_DF_PLOT_PRED_ALL_KR, index=False)

    return df_plot_pred_all