Exemplo n.º 1
0
def test_cash_module():
    from alphaml.engine.components.data_manager import DataManager
    from alphaml.estimators.classifier import Classifier
    import random
    from sklearn.metrics import roc_auc_score
    result = []
    for i in range(1):
        import xlrd
        sheet = xlrd.open_workbook("lyqdata.xlsx")
        sheet = sheet.sheet_by_index(0)
        nrows = sheet.nrows
        X_train = []
        y_train = []
        for i in range(2, nrows):
            X_train.append(sheet.row_values(i, start_colx=1))
            y_train.append(int(sheet.cell_value(i, 0)))

        dm = DataManager(X_train, y_train)
        cls = Classifier(
            # include_models=['liblinear_svc', 'libsvm_svc', 'random_forest', 'logistic_regression', 'mlp'],
            include_models=['mlp'],
            optimizer='smbo',
            cross_valid=False,
            ensemble_method='ensemble_selection',
            ensemble_size=args.ensemble_size,
            save_dir='data/save_models'
        )
        cls.fit(dm, metric='auc', runcount=args.run_count)

        sheet = xlrd.open_workbook("lyqtestdata.xlsx")
        sheet = sheet.sheet_by_index(0)
        nrows = sheet.nrows
        X_test = []
        y_test = []
        for i in range(1, nrows):
            X_test.append(sheet.row_values(i, start_colx=1))
            y_test.append(int(sheet.cell_value(i, 0)))

        pred = cls.predict_proba(X_test)
        result.append(roc_auc_score(y_test, pred[:, 1:2]))
        print(result)

    import pickle
    with open('result.pkl', 'wb') as f:
        pickle.dump(result, f)
Exemplo n.º 2
0
def test_cash_module():
    from alphaml.engine.components.data_manager import DataManager
    from alphaml.estimators.classifier import Classifier
    import random
    from sklearn.metrics import roc_auc_score
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import OneHotEncoder

    result = []
    for i in range(1):
        import xlrd
        sheet = xlrd.open_workbook("ybai_Keratoconus_TJ_20190425.xlsx")
        sheet = sheet.sheet_by_index(0)
        nrows = sheet.nrows
        X_train = []
        y_train = []

        for i in range(1, nrows):
            X_train.append(sheet.row_values(i, start_colx=1))
            y_train.append(int(sheet.cell_value(i, 0)))

        encoder = OneHotEncoder()
        encoder.fit(np.reshape(y_train, (len(y_train), 1)))
        X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, stratify=y_train)

        dm = DataManager(X_train, y_train)
        cls = Classifier(
            # include_models=['liblinear_svc', 'libsvm_svc', 'xgboost', 'random_forest', 'logistic_regression', 'mlp'],
            optimizer='smbo',
            ensemble_method='bagging',
            ensemble_size=args.ensemble_size,
        )
        cls.fit(dm, metric='auc', runcount=args.run_count)

        pred = cls.predict_proba(X_test)
        print(pred)
        y_test = encoder.transform(np.reshape(y_test, (len(y_test), 1))).toarray()
        result.append(roc_auc_score(y_test, pred))
        print(result)

        import pickle
        with open('result.pkl', 'wb') as f:
            pickle.dump(result, f)
Exemplo n.º 3
0
from time import time

warnings.filterwarnings("ignore")

parser = argparse.ArgumentParser()
parser.add_argument("--generated_feature", type=int, default=1)
parser.add_argument("--dataset", type=str)
args = parser.parse_args()

x, y, c = load_data(args.dataset)

dm = DataManager(x, y)

lr = LogisticRegression()
lr.fit(dm.train_X, dm.train_y)
y_pred = lr.predict(dm.val_X)
print("original lr accu:", accuracy_score(dm.val_y, y_pred), flush=True)

if args.generated_feature > 0:
    af = AutoFeature("accuracy", "auto_cross")
    af.fit(dm, args.generated_feature)
    dm = af.transform(dm)

clf = Classifier()
start_time = time()
clf.fit(dm, metric="accuracy", runcount=50)
print("alphaml time:", time() - start_time)
print("dataset:", args.dataset)
print("generated data:", args.generated_feature, ", alphaml score:",
      clf.score(dm.val_X, dm.val_y))