Exemplo n.º 1
0
def build_model(
    X,
    y,
    X_train,
    y_train,
    X_test,
    y_test,
    seed,
    method,
):
    model = model.Model()
    evc_meta = model.model_ensemble(X, y, method=method)
    model.model_predict(evc_meta, X_train, y_train, X_test, y_test, seed=seed)
    model.cross_validate(evc_meta, X, y, seed)
    print("Start dumping Meta classifier...")
    joblib.dump(evc_meta, "meta_clf.pkl")
    print("Done dumping Meta classifier ! \n")
    return evc_meta
Exemplo n.º 2
0
def task():
    year, months = 2019, [10, 11, 12]
    select = None
    radius, aperture_size, incident_interval, time_step = '100', '6', '25', '1'
    config = {
        'year': year,
        'months': months,
        'radius': radius,
        'aperture_size': aperture_size,
        'incident_interval': incident_interval,
        'time_step': time_step,
        'select': select
    }
    month = months[0]
    df = pd.read_pickle(
        f'output/waze/{year}_{month}_{radius}_{aperture_size}_features.pkl')
    for month in months[1:]:
        temp_df = pd.read_pickle(
            f'output/waze/{year}_{month}_{radius}_{aperture_size}_features.pkl'
        )
        df = df.append(temp_df, sort=False)
    df.fillna(0, inplace=True)
    # if select is not None:
    #     df = df[-1 * int(select):]
    if os.path.exists(
            f'output/{year}_{month}_{radius}_{aperture_size}_predict_proba.pkl'
    ):
        x = pd.read_pickle(
            f'output/{year}_{month}_{radius}_{aperture_size}_predict_proba.pkl'
        )
    else:
        x = model.predict_proba(df, int(incident_interval), time_step)
        x = model.extract_features(x, df)
        x.to_pickle(
            f'output/{year}_{month}_{radius}_{aperture_size}_predict_proba.pkl'
        )
    incident_df = load_incidents(aperture_size)
    if os.path.exists(f'output/{year}_{month}_{radius}_{aperture_size}_y.pkl'):
        y = pd.read_pickle(
            f'output/{year}_{month}_{radius}_{aperture_size}_y.pkl')
    else:
        y = label_mapper(x, incident_df)
        pd.Series(y).to_pickle(
            f'output/{year}_{month}_{radius}_{aperture_size}_y.pkl')
    models = [
        'LogisticRegression', 'DecisionTreeClassifier',
        'RandomForestClassifier'
    ]
    for m in models:
        print('model', m)
        cv_results, y_pred = model.cross_validate(x, y, m)
        for k, v in config.items():
            print(k, ',', v)
        for k, v in cv_results.items():
            print(k, ',', np.average(v))
        print()
Exemplo n.º 3
0
def predict(year, month, radius, aperture_size, incident_interval, time_step,
            select):
    config = {
        'year': year,
        'month': month,
        'radius': radius,
        'aperture_size': aperture_size,
        'incident_interval': incident_interval,
        'time_step': time_step,
        'select': select
    }
    df = pd.read_pickle(
        f'output/waze/{year}_{month}_{radius}_{aperture_size}_features.pkl')
    if select is not None:
        df = df[-1 * int(select):]
    x = model.predict_proba(df, int(incident_interval), time_step)
    incident_df = load_incidents(aperture_size)
    y = label_mapper(x, incident_df)
    print(np.unique(y, return_counts=True))
    cv_results = model.cross_validate(x, y)
    for k, v in config.items():
        print(k, v)
    for k, v in cv_results.items():
        print(k, np.average(v))
Exemplo n.º 4
0
    train_data, train_labels = load_data('../data/data-splits/data.train',
                                         n_features=n_features,
                                         neg_labels=True,
                                         bias=True,
                                         preprocessor=preprocessor)
    test_data, test_labels = load_data('../data/data-splits/data.test',
                                       n_features=n_features,
                                       neg_labels=True,
                                       bias=True,
                                       preprocessor=preprocessor)
    cv_data = np.array_split(np.hstack((train_data, train_labels)), 5)

    cv_acc = 0
    if max_param is None:
        cv_acc, max_param = cross_validate(cv_data, weights, update_weights,
                                           params, update_params)

    max_weights = train(train_data,
                        train_labels,
                        weights,
                        update_weights,
                        max_param,
                        update_params,
                        epochs=10)
    train_acc = classify(train_data, train_labels, max_weights)

    test_acc = classify(test_data, test_labels, max_weights)

    def predictor(row):
        label = np.sign(np.dot(row, max_weights))
        if label == -1: