def add_all_preds(df_county):
    """
    add single predictor predictions for the past {ndays} days
    """

    for method in methods:
        for t in tqdm(range(1, ndays + 1)):
            d = today - timedelta(t)
            if d < date(2020, 3, 16) and method in ['demographic']:
                continue 
            use_df = exponential_modeling.leave_t_day_out(df_county, 0 + t)

            if method != 'ensemble' and method != 'demographic':
                use_df = fit_and_predict.fit_and_predict(use_df, 
                                                 target_day=np.arange(1, horizon + 1),
                                                 outcome=outcome,
                                                 method=method,
                                                 mode='predict_future',
                                                 output_key=f'predicted_{outcome}_{method}_{horizon}')
            elif method == 'demographic':
                use_df = fit_and_predict.fit_and_predict(use_df, 
                                                 target_day=np.arange(1, horizon + 1),
                                                 outcome=outcome,
                                                 method='shared_exponential',
                                                 mode='predict_future',
                                                 demographic_vars=very_important_vars,
                                                 output_key=f'predicted_{outcome}_{method}_{horizon}') 
                
            df_county[f'all_{outcome}_pred_{d.month}_{d.day}_{method}_{horizon}'] = use_df[f'predicted_{outcome}_{method}_{horizon}']
     
    return df_county
Exemplo n.º 2
0
def compute_pmdl_weight(df, methods, outcome):
    
    y = np.array([df[outcome].values[i][-7:] for i in range(len(df))])
    weights = {}
    for method in methods:
        
        y_preds = np.zeros(y.shape)
        for t in range(1, 8):
            
            df2 = exponential_modeling.leave_t_day_out(df, t)
            df2 = fit_and_predict.get_forecasts(df2,
                                                outcome=outcome,
                                                method=method,
                                                output_key='y_preds',
                                                target_day=np.array([1]))
            y_preds[:,(7-t)] = np.array([df2['y_preds'].values[i][0] for i in range(len(df))])
            
        weights[method] = pmdl_weight(np.log(y + 1), np.log(y_preds + 1))
        
    return weights
    

        
        
        
        
        
        
    
Exemplo n.º 3
0
def compute_pmdl_weight(df, methods, outcome, target_day):
    y = np.array([df[outcome].values[i][-7:] for i in range(len(df))])
    weights = {}
    for (i, model) in enumerate(methods):

        if 'demographic_vars' in model:
            demographic_vars = model['demographic_vars']
        else:
            demographic_vars = []

        y_preds = np.zeros(y.shape)
        for t in range(1, 8):
            df2 = exponential_modeling.leave_t_day_out(df, t + 3 - 1)
            df2 = fit_and_predict.fit_and_predict(
                df2,
                outcome=outcome,
                method=model['model_type'],
                mode='predict_future',
                target_day=np.array([3]),
                output_key='y_preds',
                demographic_vars=demographic_vars)

            y_preds[:, (7 - t)] = np.array(
                [df2['y_preds'].values[i][-1] for i in range(len(df))])

        weights[i] = pmdl_weight(np.log(y + 1),
                                 np.log(np.maximum(y_preds, 0) + 1))

    return weights
Exemplo n.º 4
0
def previous_prediction_errors(df,
                               target_day: np.ndarray = np.array([1]),
                               outcome: str = 'deaths',
                               methods: list = [advanced_model, linear],
                               look_back_day: int = 5,
                               output_key: str = None):
    """
    Calculating prediction errors of previous days
    Input:
        df: pd.DataFrame
        target_day: np.ndarray
        outcome: str
        methods: list
        look_back_day: int
            returns the prediction errors for the last {look_back_day} days
    Output:
        list of {len(df)} dictionaries, the keys of each dictionary are days in target_day, and the values are a list of (normalized) l1 error, of length {look_back_day}
    """

    # find previous models to run
    previous_start_days = defaultdict(list)
    for day in target_day:
        for back_day in range(look_back_day):
            previous_start_days[day + back_day].append(day)

    #previous_model_predictions = {}
    previous_model_errors = [defaultdict(list) for i in range(len(df))]
    prediction_uncertainty = [defaultdict(list) for i in range(len(df))]

    for t in previous_start_days:

        previous_target_days = previous_start_days[t]
        df_old = exponential_modeling.leave_t_day_out(df, t)

        previous_model_predictions = fit_and_predict_ensemble(
            df_old,
            target_day=np.array(previous_target_days),
            outcome=outcome,
            methods=methods,
            mode='predict_future',
            output_key='old_predictions',
        )['old_predictions'].values  # running old prediction models
        for i in range(len(df)):
            for (j, td) in enumerate(previous_target_days):
                pred = previous_model_predictions[i][j]
                actual_outcome = df[outcome].iloc[i][td - t - 1]
                error = actual_outcome / max(pred, 1) - 1
                previous_model_errors[i][td].append(error)

    #for i in range(len(df)):
    #    for td in target_day:
    #       prediction_uncertainty[i][td] = max(previous_model_errors[i][td])

    df[output_key] = previous_model_errors

    return df
def fit_and_predict_ensemble(df,
                             target_day: np.ndarray = np.array([1]),
                             outcome: str = 'deaths',
                             methods: list = [shared_exponential, linear],
                             mode: str = 'predict_future',
                             output_key: str = None,
                             verbose: bool = False):
    """
    Function for ensemble prediction
    Input:
        df: pd.DataFrame
        target_day: array
        outcome: str 
        method: list of dictionary
            each dictionary specify the type and parameters of the model
        mode: str
        output_key: str
    Output:
        df with ensemble prediction
    """
    if output_key is None:
        output_key = f'predicted_{outcome}_ensemble_{target_day[-1]}'
    predictions = {}
    for (i, model) in enumerate(methods):

        if 'demographic_vars' in model:
            demographic_vars = model['demographic_vars']
        else:
            demographic_vars = []

        predictions[i] = fit_and_predict(
            df,
            outcome=outcome,
            method=model['model_type'],
            mode=mode,
            target_day=target_day,
            output_key=f'y_preds_{i}',
            demographic_vars=demographic_vars,
            verbose=verbose)[f'y_preds_{i}'].values

    if mode == 'predict_future':
        use_df = df
    else:
        use_df = exponential_modeling.leave_t_day_out(df, target_day[-1])

    weights = pmdl_weight.compute_pmdl_weight(use_df,
                                              methods=methods,
                                              outcome=outcome,
                                              target_day=target_day)
    sum_weights = np.zeros(len(use_df))
    for model_index in weights:
        sum_weights = sum_weights + np.array(weights[model_index])

    #weighted_preds = np.zeros((len(use_df), len(target_day)))
    weighted_preds = [np.zeros(len(target_day)) for i in range(len(use_df))]
    for i in range(len(df)):
        for model_index in weights:
            weighted_preds[i] += np.array(
                predictions[model_index]
                [i]) * weights[model_index][i] / sum_weights[i]

    # print out the relative contribution of each model
    if verbose:
        print('--- Model Contributions ---')
        model_weight_counter = Counter()
        for model_index in weights:
            m_weights = 0
            for i in range(len(use_df)):
                m_weights += weights[model_index][i] / sum_weights[i]
            m_weights = m_weights / len(use_df)
            model_weight_counter[model_index] = m_weights
        for model_index, weight in model_weight_counter.most_common():
            print(str(methods[model_index]) + ': ' + str(weight))

    df[output_key] = weighted_preds
    return df
def fit_and_predict_ensemble(
        df,
        target_day: np.ndarray = np.array([1]),
        outcome: str = 'deaths',
        methods: list = [exponential, shared_exponential, demographics],
        mode: str = 'predict_future',
        output_key: str = None):
    """
    Function for ensemble prediction
    Input:
        df: pd.DataFrame
        target_day: array
        outcome: str 
        method: list of dictionary
            each dictionary specify the type and parameters of the model
        mode: str
        output_key: str
    Output:
        df with ensemble prediction
    """
    if output_key is None:
        output_key = f'predicted_{outcome}_ensemble_{target_day[-1]}'
    predictions = {}
    for (i, model) in enumerate(methods):

        if 'demographic_vars' in model:
            demographic_vars = model['demographic_vars']
        else:
            demographic_vars = []

        predictions[i] = fit_and_predict(
            df,
            outcome=outcome,
            method=model['model_type'],
            mode=mode,
            target_day=target_day,
            output_key=f'y_preds_{i}',
            demographic_vars=demographic_vars)[f'y_preds_{i}'].values

    if mode == 'predict_future':
        use_df = df
    else:
        use_df = exponential_modeling.leave_t_day_out(df, target_day[-1])

    weights = pmdl_weight.compute_pmdl_weight(use_df,
                                              methods=methods,
                                              outcome=outcome)
    sum_weights = np.zeros(len(use_df))
    for model_index in weights:
        sum_weights = sum_weights + np.array(weights[model_index])

    #weighted_preds = np.zeros((len(use_df), len(target_day)))
    weighted_preds = [np.zeros(len(target_day)) for i in range(len(use_df))]
    for i in range(len(df)):
        for model_index in weights:
            weighted_preds[i] += np.array(
                predictions[model_index]
                [i]) * weights[model_index][i] / sum_weights[i]

    df[output_key] = weighted_preds
    return df
def fit_and_predict(train_df,
                    test_df,
                    outcome,
                    method,
                    mode,
                    target_day=np.array([1]),
                    demographic_vars=[]):
    """
    Trains a method (method) to predict a current number of days ahead (target_day)
    Predicts the values of the number of deaths for the final day of test_df and writes to the column
    'predicted_deaths_'+method+'_'+str(target_day[-1]) of the test_df
    
    Input:
    train_df, tests: dfs with county level deaths and cases
    method: string
    target_day = np.array([1,2,..,n]) predicts these number of days ahead (can just be np.array([3])) for example if you just want 3 days ahead)
    mode: either 'predict_future' or 'eval_mode'
    predict_future is predicting deaths on FUTURE days, so target_day=np.array([1])) means it predicts tomorrow's deaths
    eval_mode is for evaluating the performance of the classifier. target_day=np.array([k])) will predict the current days death count
    using information from k days ago. target_day= np.array([1,2,3,...,k]) will predict todays deaths, yesterdays deaths, deaths k-1 days ago
    using information from k days ago.


    Output:
    test_df 
    """

    assert mode == 'predict_future' or mode == 'eval_mode', 'unknown mode'
    if method == 'AR':
        print('currently deprecated')
        raise NotImplementedError
        loss, model, best_window = naive_autoreg_baselines.train_and_evaluate_model(
            train_df, test_df)
        return naive_autoreg_baselines.make_predictions(
            test_df, model, best_window)

    elif method == 'exponential':
        preds = exponential_modeling.exponential_fit(test_df[outcome].values,
                                                     mode=mode,
                                                     target_day=target_day)
        test_df[f'predicted_{outcome}_{method}_{target_day[-1]}'] = preds
        #del test_df['predicted_deaths_exponential']

        return test_df

    elif method == 'shared_exponential':

        # Fit a poisson GLM with shared parameters across counties. Input to the poisson GLM is demographic_vars and log(previous_days_deaths+1)
        cur_day_predictions = exponential_modeling.fit_and_predict_shared_exponential(
            train_df,
            test_df,
            mode,
            outcome=outcome,
            demographic_vars=demographic_vars,
            target_day=target_day)
        save_name = f'predicted_{outcome}_{method}_{target_day[-1]}'
        if len(demographic_vars) > 0:
            save_name += '_demographics'
        # import IPython
        # IPython.embed()
        test_df[save_name] = cur_day_predictions
        return test_df

    elif method == 'ensemble':
        #if target_day != np.array([1]):
        #    raise NotImplementedError
        shared_preds = exponential_modeling.fit_and_predict_shared_exponential(
            train_df,
            test_df,
            mode=mode,
            outcome=outcome,
            demographic_vars=demographic_vars,
            target_day=target_day)
        exp_preds = exponential_modeling.exponential_fit(
            test_df[outcome].values, mode=mode, target_day=target_day)
        if mode == 'predict_future':
            use_df = test_df
        else:
            use_df = exponential_modeling.leave_t_day_out(
                test_df, target_day[-1])
        weights = pmdl_weight.compute_pmdl_weight(
            use_df,
            methods=['exponential', 'shared_exponential'],
            outcome=outcome)
        weights_sum = weights['exponential'] + weights['shared_exponential']

        preds = [
            exp_preds[i] * weights['exponential'][i] / weights_sum[i] +
            np.array(shared_preds[i]) * weights['shared_exponential'][i] /
            weights_sum[i] for i in range(len(test_df))
        ]
        test_df[f'predicted_{outcome}_{method}_{target_day[-1]}'] = preds
        return test_df

    else:
        print('Unknown method')
        raise ValueError