Пример #1
0
def slave(dfs):
    """Fit the ARIMA model."""
    try:
        while True:
            status_ = MPI.Status()
            idx = COMM.recv(source=0, tag=MPI.ANY_TAG, status=status_)
            # check the tag of the received message
            if status_.tag == EXIT:
                return
            # do the work
            print(NAME + ": slave received", RANK, idx)
            df = utils.gluco_extract(dfs[idx], return_df=True)
            try:
                out = _worker(df)
            except:
                out = (None, None, None)  # fit failed for current patient
                print(NAME + ": slave fit failed", RANK, idx)
            COMM.send((idx, out[0], out[1]), dest=0, tag=0)

    except StandardError as exc:
        print("Quitting ... TB:", str(exc))
Пример #2
0
# load data set from pickle file
dfs_full = pkl.load(open('../../data/dfs_py3.pkl', 'rb'))

# ## 1.1 Remove short acquisitions

# In[31]:

# Keep only patients with more than 3.5 days of acquisition
thresh = datetime.timedelta(days=3.5)

# List of patients that satisfy inclusion criterion
ok_keys = []

for k in dfs_full.keys():
    df = dfs_full[k]
    time, gluco = gluco_extract(df)
    try:
        delta = time[-1] - time[0]
        if delta > thresh: ok_keys.append(k)
    except:
        pass
# Filter short time-series
dfs = {k: dfs_full[k] for k in ok_keys}

# In[32]:

# Example: plot a single patient
k = 0
idx = list(dfs.keys())[k]
print(idx)
Пример #3
0
burn_in = 300  # burn-in samples used to learn the best order via cv
n_splits = 15
ph = 18  # prediction horizon

# State-space model:
# transition matrix (double integration model)
F = np.array([[2, -1], [1, 0]])
# measures matrix
H = np.array([1, 0])

# Get patients list
patients = list(dfs.keys())

for idx in patients:
    df = utils.gluco_extract(dfs[idx], return_df=True)

    # Learn the best order via cv
    # lambda2_range = np.logspace(-12, -4, 10)
    lambda2_range = np.logspace(-12, -4, 3)
    sigma2_range = np.linspace(1, 40, 3)
    # sigma2_range = np.linspace(1, 40, 10)
    out = kf.grid_search(df,
                         lambda2_range,
                         sigma2_range,
                         burn_in=burn_in,
                         n_splits=15,
                         F=F,
                         H=H,
                         return_mean_vld_error=True,
                         return_initial_state_mean=True,
Пример #4
0
def main(args):
    """Run ARIMA experiments."""
    ### TODO: deleteme ###
    # List all completed patients
    completed = list(
        filter(
            lambda x: x.endswith('.pkl'),
            os.listdir(
                '/home/samu/projects/glicemie/experiments/cgm-tools/scripts')))
    completed = [x[-3] + '.csv' for x in completed]
    ### TODO: deleteme ###

    # Load full data set from pickle file (see data_wrangler.py)
    dfs_full = pkl.load(open(args.data_folder, 'rb'))

    # Keep only patients with more than `THRESHOLD` days of CGM acquisition
    _threshold = args.threshold
    if _threshold is None:
        _threshold = datetime.timedelta(days=3.5)  # default
    dfs = utils.filter_patients(dfs_full, _threshold)

    # ----------------- TEST ----------------------------- #
    # Experiment parameters
    burn_in = 300  # burn-in samples used to learn the best order via cv
    n_splits = 15
    # burn_in = 144  # burn-in samples used to learn the best order via cv
    # n_splits = 8
    w_size = 36  # Window-size
    ph = 18  # prediction horizon

    # Get patients list
    patients = list(dfs.keys())

    for count, idx in enumerate(patients):
        if idx not in completed:
            print("Evaluating patient: {} ({}/{}) ...".format(
                idx, count, len(patients)))
            df = utils.gluco_extract(dfs[idx], return_df=True)

            # Learn the best order via cv
            out = arima.grid_search(df,
                                    burn_in=burn_in,
                                    n_splits=n_splits,
                                    p_bounds=(1, 4),
                                    d_bounds=(1, 2),
                                    q_bounds=(1, 4),
                                    ic_score='AIC',
                                    return_order_rank=True,
                                    return_final_index=True,
                                    verbose=False)
            opt_order, order_rank, final_index = out

            print("Order rank:\n{}".format(order_rank))

            df = df.iloc[burn_in:]  # don't mix-up training/test

            errs = None
            # Try the order from best to worst
            for order in order_rank:
                p, d, q = order
                try:  # perform moving-window arma
                    print('Using ARIMA({}, {}, {}) ...'.format(p, d, q))
                    errs, forecast = arima.moving_window(df,
                                                         w_size=w_size,
                                                         ph=ph,
                                                         p=p,
                                                         d=d,
                                                         q=q,
                                                         start_params=None,
                                                         verbose=False)
                    print('ARIMA({}, {}, {}) success'.format(p, d, q))
                    break  # greedy beahior: take the first that works
                except Exception as e:
                    print('ARIMA({}, {}, {}) failure'.format(p, d, q))
                    print('arima.moving_window raised the following exception')
                    print(e)

            if errs is not None:
                # Save results reports
                error_summary = utils.forecast_report(errs)
                print(error_summary)
                # dump it into a pkl
                pkl.dump(error_summary, open(idx + '.pkl', 'wb'))

                try:
                    # Plot signal and its fit
                    plotting.cgm(df,
                                 forecast['ts'],
                                 title='Patient ' + idx,
                                 savefig=True)

                    # Plot residuals
                    plotting.residuals(df,
                                       forecast['ts'],
                                       skip_first=w_size,
                                       skip_last=ph,
                                       title='Patient ' + idx,
                                       savefig=True)
                except:
                    print("Plotting failed for patient {}".format(idx))
        else:
            print("{} already completed".format(idx))
Пример #5
0
def worker(idx):
    """
    spawn the work process
    """
    import os
    my_rank_ = comm.Get_rank()

    t1_ = time.time()
    burn_in = 300  # burn-in samples used to learn the best order via cv
    w_size = 36

    # print("Evaluating patient {}".format(idx))
    # Train/test split
    df = utils.gluco_extract(dfs[idx], return_df=True)
    train_df0 = df.iloc[:burn_in]
    test_df0 = df.iloc[burn_in:]

    # preprocess the dataset
    # BEWARE! Do not use the trainig set to learn the scaling parameters
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_data = scaler.fit_transform(train_df0)
    test_data = scaler.transform(test_df0)

    # Create LSTM suitable {X, Y} dataset
    X_tr, Y_tr = lstm.create_XY_dataset(train_data, window_size=w_size)
    X_ts, Y_ts = lstm.create_XY_dataset(test_data, window_size=w_size)

    # Create LSTM model
    # model = lstm.create_model(n_units=4)

    # Create cross-validated LSTM model
    param_grid = {'n_units': [4, 8, 16]}
    keras_regressor = KerasRegressor(build_fn=lstm.create_model,
                                     batch_size=1,
                                     verbose=0,
                                     nb_epoch=50)
    model = GridSearchCV(keras_regressor, param_grid=param_grid)

    tic = time.time()
    # Fit the model
    # model.fit(X_tr, Y_tr, nb_epoch=50, batch_size=1, verbose=1)
    model.fit(X_tr, Y_tr)
    print("Fitting time: {} seconds".format(time.time() - tic))

    # Predict the ph and save the errors
    tic = time.time()
    errs, forecast = lstm.online_forecast(X_ts,
                                          Y_ts,
                                          model,
                                          scaler,
                                          ph=18,
                                          verbose=True)
    print("Predicting time: {} seconds".format(time.time() - tic))
    error_summary = utils.forecast_report(errs)
    print(error_summary)
    pkl.dump(error_summary,
             open(os.path.join(ROOT, 'results', idx + '.pkl'), 'wb'))
    #model.save(os.path.join(ROOT, 'results', idx+'_model_.h5'))

    # -- Plotting -- #
    try:
        import statsmodels.api as sm
        import numpy as np
        import matplotlib
        matplotlib.use('agg')
        import matplotlib.pyplot as plt
        Y_pred_tr = model.predict(X_tr)
        Y_pred_ts = model.predict(X_ts)  # maybe its just forecast['ts']
        Y_pred_tr_plot = scaler.inverse_transform(Y_pred_tr)
        Y_pred_ts_plot = scaler.inverse_transform(Y_pred_ts)
        plt.figure(figsize=(10, 6), dpi=300)
        plt.subplot(211)
        plt.plot(df.index, df.values, label='real cgm')
        plt.plot(df.index[w_size:burn_in],
                 Y_pred_tr_plot.ravel(),
                 '--',
                 label='y_tr')
        plt.plot(df.index[burn_in + w_size:],
                 Y_pred_ts_plot.ravel(),
                 '--',
                 label='y_tr')
        plt.legend()

        residuals = Y_pred_ts_plot.ravel() - df.values[burn_in +
                                                       w_size:].ravel()
        mae = np.mean(residuals)
        rmse = np.sqrt(np.mean(residuals**2))
        DW = sm.stats.durbin_watson(residuals)

        plt.subplot(212)
        plt.plot(df.index[burn_in:-w_size], residuals)
        plt.title("MAE {:2.5f} | RMSE {:2.5f} | DW {:2.5f}".format(
            mae, rmse, DW))
        plt.tight_layout()
        plt.savefig(os.path.join(ROOT, 'results', idx + '.png'))
    except:
        print('Plotting failed')

    # Do the work
    # time.sleep(2)

    t2_ = time.time()

    if VERBOSITY:
        print(' ---> processor %s has calculated for %s' %
              (my_rank_, t2_ - t1_))
    return t2_ - t1_