示例#1
0
def qf_single_state_prediction(state, lookback, horizon, predictors):
    """
    RQF WITHOUT CLUSTER SERIES
    :param state: 2-letter code for state
    :param lookback: number of steps of history to use
    :param horizon: number of weeks ahead to predict
    :param predictors: predictor variables
    :return:
    """

    if state == "CE":
        s = 'Ceará'
    else:
        s = state
    cities = list(get_cities_from_state(s))

    for city in cities:
        if os.path.isfile('/saved_models/quantile_forest_no_cluster/{}/qf_metrics_{}.pkl'.format(state, city)):
            print(city, 'done')
            continue
        data = combined_data(city, DATA_TYPES)
        data = data[predictors]
        data.drop('casos', axis=1, inplace=True)

        target = 'casos_est'
        data_lag = build_lagged_features(data, lookback)
        data_lag.dropna()
        targets = {}
        for d in range(1, horizon + 1):
            if d == 1:
                targets[d] = data_lag[target].shift(-(d - 1))
            else:
                targets[d] = data_lag[target].shift(-(d - 1))[:-(d - 1)]

        X_data = data_lag.drop(target, axis=1)
        X_train, X_test, y_train, y_test = train_test_split(X_data, data_lag[target],
                                                            train_size=0.7, test_size=0.3, shuffle=False)

        city_name = get_city_names([city, 0])[0][1]
        preds = np.empty((len(data_lag), horizon))
        metrics = pd.DataFrame(index=('mean_absolute_error', 'explained_variance_score',
                                      'mean_squared_error', 'mean_squared_log_error',
                                      'median_absolute_error', 'r2_score'))
        for d in range(1, horizon + 1):
            tgt = targets[d][:len(X_train)]
            tgtt = targets[d][len(X_train):]

            model = rolling_forecasts(X_train, target=tgt, horizon=horizon)
            pred = model.predict(X_data[:len(targets[d])], quantile=50)

            dif = len(data_lag) - len(pred)
            if dif > 0:
                pred = list(pred) + ([np.nan] * dif)
            preds[:, (d - 1)] = pred

            pred_m = model.predict(X_test[(d - 1):])
            metrics[d] = calculate_metrics(pred_m, tgtt)

        metrics.to_pickle('{}/{}/qf_metrics_{}.pkl'.format('saved_models/quantile_forest_no_cluster', state, city))
        plot_prediction(preds, targets[1], city_name, len(X_train))
示例#2
0
def alocate_data(state):
    cities_list = list(get_cities_from_state(state))
    bad_cities = []
    for city in cities_list:
        try:
            full_city = combined_data(city, data_types=DATA_TYPES)
            full_city.to_pickle('{}/city_{}.pkl'.format(TMP_PATH, city))
        except TypeError as e:
            print("Skipping: ", city)
            bad_cities.append(city)
            continue
    for c in bad_cities:
        cities_list.remove(c)
    return cities_list
示例#3
0
def lasso_single_state_prediction(state, lookback, horizon, predictors):
    ##LASSO WITHOUT CLUSTER SERIES
    cities = list(get_cities_from_state('Ceará'))

    for city in cities:
        if os.path.isfile(
                '/home/elisa/Documentos/InfoDenguePredict/infodenguepredict/models/saved_models/lasso_no_cluster/{}/lasso_metrics_{}.pkl'
                .format(state, city)):
            print(city, 'done')
            continue
        data = combined_data(city, DATA_TYPES)
        data = data[predictors]
        data.drop('casos', axis=1, inplace=True)

        target = 'casos_est'
        data_lag = build_lagged_features(data, lookback)
        data_lag.dropna()
        targets = {}
        for d in range(1, horizon + 1):
            if d == 1:
                targets[d] = data_lag[target].shift(-(d - 1))
            else:
                targets[d] = data_lag[target].shift(-(d - 1))[:-(d - 1)]

        X_data = data_lag.drop(target, axis=1)
        X_train, X_test, y_train, y_test = train_test_split(X_data,
                                                            data_lag[target],
                                                            train_size=0.7,
                                                            test_size=0.3,
                                                            shuffle=False)

        city_name = get_city_names([city, 0])[0][1]
        preds = np.empty((len(data_lag), horizon))
        metrics = pd.DataFrame(index=('mean_absolute_error',
                                      'explained_variance_score',
                                      'mean_squared_error',
                                      'mean_squared_log_error',
                                      'median_absolute_error', 'r2_score'))
        for d in range(1, horizon + 1):
            model = LassoLarsCV(max_iter=15, n_jobs=-1, normalize=False)

            tgt = targets[d][:len(X_train)]
            tgtt = targets[d][len(X_train):]
            try:
                model.fit(X_train, tgt)
            except ValueError as err:
                print('-----------------------------------------------------')
                print(city, 'ERRO')
                print('-----------------------------------------------------')
                break
            pred = model.predict(X_data[:len(targets[d])])

            dif = len(data_lag) - len(pred)
            if dif > 0:
                pred = list(pred) + ([np.nan] * dif)
            preds[:, (d - 1)] = pred
            pred_m = model.predict(X_test[:(len(tgtt))])
            metrics[d] = calculate_metrics(pred_m, tgtt)

            metrics.to_pickle('{}/{}/lasso_metrics_{}.pkl'.format(
                'saved_models/lasso_no_cluster', state, city))
        plot_prediction(preds,
                        targets[1],
                        city_name,
                        len(X_train),
                        path='lasso_no_cluster')
        # plt.show()
    return None
示例#4
0
    if formula is None:
        formula = "casos~1"
    model = pf.GASX(data=data, ar=ar, sc=sc, family=family(), formula=formula)
    return model


if __name__ == "__main__":
    city = 3304557
    prediction_window = 5  # weeks
    # data = get_alerta_table(city)  # Nova Iguaçu: 3303609
    # Fetching exogenous vars
    # T = get_temperature_data(city)  # (3303500)
    # T = T[~T.index.duplicated()]
    # Tw = get_tweet_data(city)
    # Tw = Tw[~Tw.index.duplicated()]
    Full = combined_data(city)#data.join(T.resample('W-SUN').mean()).join(Tw.resample('W-SUN').sum()).dropna()
    # print(data.info())
    # data.casos.plot()
    # print(Full.info())
    # print(Full.describe())

    # Full.to_csv('data.csv.gz', compression='gzip')
    model = build_model(Full.dropna(), ar=4, sc=6, formula='casos~1+numero')
    fit = model.fit('PML')#'BBVI', iterations=1000, optimizer='RMSProp')

    print(fit.summary())
    model.plot_fit()
    plt.savefig('GASX_in_sample.png')
    model.plot_parameters()
    model.plot_predict(h=5, past_values=12)
    plt.savefig('GASX_prediction.png')
from datetime import datetime
import matplotlib.pyplot as plt
from infodenguepredict.data.infodengue import get_alerta_table, combined_data


def build_model(data):
    model = sm.tsa.regime_switching.markov_autoregression.\
        MarkovAutoregression(endog=data.casos, k_regimes=2,
                             exog=data[['p_rt1', 'p_inc100k', 'numero']],
                             order=2)
    return model


if __name__ == "__main__":
    prediction_window = 5  # weeks
    data = combined_data(3304557)  # Nova Iguaçu: 3303609
    data.casos.plot()
    # Graph data autocorrelation
    fig, axes = plt.subplots(1, 2, figsize=(15, 4))

    fig = graphics.tsa.plot_acf(data.ix[1:, 'casos'], lags=52, ax=axes[0])
    fig = graphics.tsa.plot_pacf(data.ix[1:, 'casos'], lags=52, ax=axes[1])

    model = build_model(data)
    fit = model.fit()  # 'BBVI',iterations=1000,optimizer='RMSProp')
    print(fit.summary())
    #todo: fix model, not fitting
    plt.figure()
    predict = fit.predict(start='2017-01-01', end='2017-03-01')
    predict_ci = predict.conf_int()
    predictdy = fit.predict(start='2017-01-01', end='2017-02-26')