def test_treefoo_duh(self): bundle, datasets, stations_df = bltu.make_basic_minimal_model() holdout_df = datasets['holdout_df'] y_predictions, y_test, metrics = blc.run_model_predict(bundle, holdout_df, stations_df, labeled=True) proportion_correct_labeled_true = bltu.get_basic_proportion_correct( y_test, y_predictions) # Again but unlabeled now. contracted_df = blc.contract_df(holdout_df) widened_df = blc.widen_df_with_other_cols(contracted_df, s.ALL_COLUMNS) y_predictions_from_widened, _, _ = blc.run_model_predict(bundle, widened_df, stations_df, labeled=False) proportion_correct_labeled_false = bltu.get_basic_proportion_correct( y_test, y_predictions_from_widened) # and assert, evaluation should be the same.. assert proportion_correct_labeled_false == proportion_correct_labeled_true pass
def test_basic(self): csvdata = '10/1/2015 00:00:02,W 26 St & 10 Ave,Subscriber,1973,1\n10/1/2015 00:00:02,E 39 St & 2 Ave,Subscriber,1990,1' df = blc.hydrate_and_widen(csvdata) bundle, datasets, stations_df = bltu.make_basic_minimal_model() y_predictions, _, _ = blc.run_model_predict(bundle, df, stations_df, labeled=False) pass
def test_assertion_happens_for_nan(self): _, datasets, stations_df = bltu.make_basic_minimal_model() df = pd.DataFrame({s.NEW_END_NEIGHBORHOOD: ['foo', 'nan']}) feature_encoding_dict = {s.NEW_END_NEIGHBORHOOD: str} asserted = False try: _, label_encoders = pl.make_simple_df_from_raw( df, stations_df, feature_encoding_dict) except Exception: asserted = True assert asserted
def test_treefoo_with_pure_input_data(self): # csvdata = 'starttime,start station name,usertype,birth year,gender\n10/1/2015 00:00:02,W 26 St & 10 Ave,Subscriber,1973,1\n10/1/2015 00:00:02,E 39 St & 2 Ave,Subscriber,1990,1' csvdata = '10/1/2015 00:00:02,W 26 St & 10 Ave,Subscriber,1973,1\n10/1/2015 00:00:02,E 39 St & 2 Ave,Subscriber,1990,1' df = blc.hydrate_csv_to_df(csvdata) bundle, datasets, stations_df = bltu.make_basic_minimal_model() widened_df = blc.widen_df_with_other_cols(df, s.ALL_COLUMNS) y_predictions, _, _ = blc.run_model_predict(bundle, widened_df, stations_df, labeled=False) pass
def test_basic(self): from nose.tools import set_trace set_trace() csvdata = '10/1/2015 00:00:02,E 39 St & 2 Ave,Subscriber,1990,1' df = blc.hydrate_and_widen(csvdata) bundle, datasets, stations_df = bltu.make_basic_minimal_model() _, X_test = blc.df_to_np_for_clf(bundle, df, stations_df, labeled=False) clf = bundle['clf'] path = clf.decision_path(X_test)
def test_predict_matches_proba(self): bundle, datasets, stations_df = bltu.make_basic_minimal_model() df = datasets['holdout_df'] clf = bundle['clf'] prepared = blc.predict_prepare(bundle, df, stations_df, labeled=True) y_topk1_outputs = blmu.get_sorted_predict_proba_predictions( prepared['y_predict_proba'], clf.classes_, k=1) y_predictions, y_test, metrics = blc.run_model_predict(bundle, df, stations_df, labeled=True) l = [ x[1] for x in sorted(metrics['rank_k_proba_scores'].items(), key=lambda x: x[0]) ] assert all([l[i] <= l[i + 1] for i in range(len(l) - 1)]) assert [x[0] for x in y_topk1_outputs] == list(y_predictions)