Exemplo n.º 1
0
def test_stateful_transform():
    data_train = patsy.demo_data("x1", "x2", "y")
    data_train['x1'][:] = 1
    # mean of x1 is 1
    data_test = patsy.demo_data("x1", "x2", "y")
    data_test['x1'][:] = 0

    # center x1
    est = PatsyTransformer("center(x1) + x2")
    est.fit(data_train)
    data_trans = est.transform(data_test)
    # make sure that mean of training, not test data was removed
    assert_array_equal(data_trans[:, 0], -1)
Exemplo n.º 2
0
def test_stateful_transform():
    data_train = patsy.demo_data("x1", "x2", "y")
    data_train['x1'][:] = 1
    # mean of x1 is 1
    data_test = patsy.demo_data("x1", "x2", "y")
    data_test['x1'][:] = 0

    # center x1
    est = PatsyTransformer("center(x1) + x2")
    est.fit(data_train)
    data_trans = est.transform(data_test)
    # make sure that mean of training, not test data was removed
    assert_array_equal(data_trans[:, 0], -1)
Exemplo n.º 3
0
def test_error_on_y_transform():
    data = patsy.demo_data("x1", "x2", "x3", "y")
    est = PatsyTransformer("y ~ x1 + x2")
    msg = ("encountered outcome variables for a model"
           " that does not expect them")
    assert_raise_message(patsy.PatsyError, msg, est.fit, data)
    assert_raise_message(patsy.PatsyError, msg, est.fit_transform, data)
Exemplo n.º 4
0
def test_stateful_transform_dataframe():
    data_train = pd.DataFrame(patsy.demo_data("x1", "x2", "y"))
    data_train['x1'][:] = 1
    # mean of x1 is 1
    data_test = pd.DataFrame(patsy.demo_data("x1", "x2", "y"))
    data_test['x1'][:] = 0

    # center x1
    est = PatsyTransformer("center(x1) + x2", return_type='dataframe')
    est.fit(data_train)
    data_trans = est.transform(data_test)

    # make sure result is pandas dataframe
    assert type(data_trans) is pd.DataFrame

    # make sure that mean of training, not test data was removed
    assert_array_equal(data_trans['center(x1)'][:],-1)
Exemplo n.º 5
0
def test_stateful_transform_dataframe():
    data_train = pd.DataFrame(patsy.demo_data("x1", "x2", "y"))
    data_train['x1'][:] = 1
    # mean of x1 is 1
    data_test = pd.DataFrame(patsy.demo_data("x1", "x2", "y"))
    data_test['x1'][:] = 0

    # center x1
    est = PatsyTransformer("center(x1) + x2", return_type='dataframe')
    est.fit(data_train)
    data_trans = est.transform(data_test)

    # make sure result is pandas dataframe
    assert type(data_trans) is pd.DataFrame

    # make sure that mean of training, not test data was removed
    assert_array_equal(data_trans['center(x1)'][:], -1)
Exemplo n.º 6
0
def test_intercept_transformer():
    data = patsy.demo_data("x1", "x2", "x3", "y")

    # check wether X contains only the two features, no intercept
    est = PatsyTransformer("x1 + x2")
    est.fit(data)
    assert_equal(est.transform(data).shape[1], 2)

    # check wether X does contain intercept
    est = PatsyTransformer("x1 + x2", add_intercept=True)
    est.fit(data)
    data_transformed = est.transform(data)
    assert_array_equal(data_transformed[:, 0], 1)
    assert_equal(est.transform(data).shape[1], 3)
Exemplo n.º 7
0
def test_scope_transformer():
    data = patsy.demo_data("x1", "x2", "x3", "y")

    def myfunc(x):
        tmp = np.ones_like(x)
        tmp.fill(42)
        return tmp

    est = PatsyTransformer("x1 + myfunc(x2)")
    est.fit(data)
    data_trans = est.transform(data)
    assert_array_equal(data_trans[:, 1], 42)

    est = PatsyTransformer("x1 + myfunc(x2)")
    data_trans = est.fit_transform(data)
    assert_array_equal(data_trans[:, 1], 42)

    # test feature names
    assert_equal(est.feature_names_, ["x1", "myfunc(x2)"])
Exemplo n.º 8
0
def test_coxph_model():

    data = lifelines.datasets.load_dd()

    # create sklearn pipeline
    coxph_surv_ppl = make_pipeline(
        PatsyTransformer('un_continent_name + regime + start_year -1',
                         return_type='dataframe'),
        CoxPHFitterModel(duration_column='duration', event_col='observed'))

    #split data to train and test
    data_train, data_test = train_test_split(data)

    #fit CoxPH model
    coxph_surv_ppl.fit(data_train, y=data_train)

    #use pipeline to predict expected lifetime
    exp_lifetime = coxph_surv_ppl.predict(data_test[0:1])
    assert (exp_lifetime > 4)
Exemplo n.º 9
0
def test_scope_transformer():
    data = patsy.demo_data("x1", "x2", "x3", "y")

    def myfunc(x):
        tmp = np.ones_like(x)
        tmp.fill(42)
        return tmp

    est = PatsyTransformer("x1 + myfunc(x2)")
    est.fit(data)
    data_trans = est.transform(data)
    assert_array_equal(data_trans[:, 1], 42)

    est = PatsyTransformer("x1 + myfunc(x2)")
    data_trans = est.fit_transform(data)
    assert_array_equal(data_trans[:, 1], 42)
    feature_extraction_model.load_weights(cfg.VGG16_pretrained_model_path,
                                          by_name=True)
    sampled_cell_Indexs = random_sample_index(
        cells_lists, 2000)  #random sample 2000 cell features
    feature_lists = []
    for index in sampled_cell_Indexs:
        a_cell_feature = feature_extraction_model.predict(cells_lists[index])
        feature_lists.append(a_cell_feature)
    feature_lists = np.array(feature_lists)
    C = llc(feature_lists)  # the codes
    f = np.sum(C, axis=1)  # the single vector of the patient.
    #f=np.max(C,axis=1)#the single vector of the patient.
    data = lifelines.datasets.load_dd()

    # create sklearn pipeline
    coxph_surv_ppl = make_pipeline(PatsyTransformer('un_continent_name + regime + start_year -1', \
                                                    return_type='dataframe'),
                                   CoxPHFitterModel(duration_column='duration', event_col='observed'))

    # split data to train and test
    data_train, data_test = train_test_split(data)

    # fit CoxPH model
    coxph_surv_ppl.fit(data_train, y=data_train)
    # use pipeline to predict expected lifetime
    exp_lifetime = coxph_surv_ppl.predict(data_test[0:1])
    print('expected lifetime: ' + str(exp_lifetime))

    # or you can extract the model from the pipeline to access more methods
    coxmodel = coxph_surv_ppl.named_steps['coxphfittermodel'].estimator
    coxmodel.print_summary()
Exemplo n.º 11
0
def test_intercept_transformer():
    data = patsy.demo_data("x1", "x2", "x3", "y")

    # check wether X contains only the two features, no intercept
    est = PatsyTransformer("x1 + x2")
    est.fit(data)
    assert_equal(est.transform(data).shape[1], 2)

    # check wether X does contain intercept
    est = PatsyTransformer("x1 + x2", add_intercept=True)
    est.fit(data)
    data_transformed = est.transform(data)
    assert_array_equal(data_transformed[:, 0], 1)
    assert_equal(est.transform(data).shape[1], 3)
Exemplo n.º 12
0
# Schedule delivery time during the entire working hours i.e. between 8 am to 6 pm
data["Is_delivery_working_hours"] = (data["time_slot_from"].str[:2].astype(
    int) >= 9) & (data["time_slot_to"].str[:2].astype(int) <= 18)
# Whether committed delivery day is  friday (weekend) or not
data["Is_committed_delivery_friday"] = (
    data["commitment_date_weekday"] == "Friday").astype(int)

# dataframe of predictor variables
feature_df = data[[
    "Schedule_loc_type", "schedule_channel", "Is_committed_delivery_friday",
    "Is_delivery_working_hours"
]]
y_df = data["accurate"]

interaction_transformer = PatsyTransformer(
    "C(Schedule_loc_type) + C(schedule_channel)+ Is_committed_delivery_friday + Is_delivery_working_hours + C(Schedule_loc_type):C(schedule_channel)"
)
non_interaction_transformer = PatsyTransformer(
    "C(Schedule_loc_type) + C(schedule_channel)+ Is_committed_delivery_friday + Is_delivery_working_hours"
)

naive_bayes_clf = Pipeline(
    [("encoding", non_interaction_transformer), ("nb_clf", GaussianNB())]
)  # Because we assume conditional independence among diffrent predictor variables in Naive Bayes, interaction terms are not necessary
logistic_clf = Pipeline([('int_feature', interaction_transformer),
                         ('log_clf',
                          LogisticRegression(C=10.0,
                                             fit_intercept=True,
                                             random_state=100))])
supportvector_clf = Pipeline([('int_feature', interaction_transformer),
                              ('svm_clf',