from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from sklearn.metrics import auc, roc_curve from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import cross_val_score from sklearn.preprocessing import OneHotEncoder from sklearn.svm import SVC import gcforest.data_load_phy as load2 import gcforest.data_load as load from gcforest.gcforest import GCForest from gcforest.utils.log_utils import get_logger import json import pandas as pd LOGGER = get_logger('cascade_clf.lib.plot_roc_all') def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def conv2d(x, w): return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
Requirements: This package is developed with Python 2.7, please make sure all the demendencies are installed, which is specified in requirements.txt ATTN: This package is free for academic usage. You can run it at your own risk. For other purposes, please contact Prof. Zhi-Hua Zhou ATTN2: This package was developed by Mr.Ji Feng([email protected]). The readme file and demo roughly explains how to use the codes. For any problem concerning the codes, please feel free to contact Mr.Feng. """ import sys, os, os.path as osp import argparse import numpy as np import xgboost as xgb sys.path.insert(0, 'lib') from gcforest.utils.log_utils import get_logger, update_default_level, update_default_logging_dir from gcforest.fgnet import FGNet, FGTrainConfig from gcforest.utils.config_utils import load_json from gcforest.exp_utils import concat_datas from gcforest.datasets import get_dataset LOGGER = get_logger("tools.tarin_xgb") def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--model', dest='model', type=str, required=True, help='gcfoest Net Model File') args = parser.parse_args() return args def train_xgb(X_train, y_train, X_test, y_test): n_trees = 1000
dest='log_dir', type=str, default=None, help='Log file directory') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() config = load_json(args.model) if args.log_dir is not None: update_default_logging_dir(args.log_dir) from gcforest.cascade.cascade_classifier import CascadeClassifier from gcforest.datasets import get_dataset LOGGER = get_logger("tools.train_cascade") LOGGER.info("tools.train_cascade") LOGGER.info( "\n" + json.dumps(config, sort_keys=True, indent=4, separators=(',', ':'))) data_train = get_dataset(config["dataset"]["train"]) data_test = get_dataset(config["dataset"]["test"]) cascade = CascadeClassifier(config["cascade"]) if not hasattr(data_train, 'test'): data_train.test = data_train.X opt_layer_id, X_train, y_train, X_test, y_test, a, b = cascade.fit_transform( data_train.X, data_train.y, data_test.X, data_test.y, data_train.test) # y_proba_cv = cascade.predict_test(data_train.test, data_train.test_id, opt_layer_id) cascade.save_test_result(data_train.test_id, b)
parser.add_argument('--model', dest='model', type=str, required=True, help='gcfoest Net Model File') parser.add_argument('--save_outputs', dest='save_outputs', action="store_true", help="Save outputs") parser.add_argument('--log_dir', dest='log_dir', type=str, default=None, help='Log file directory') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() config = load_json(args.model) update_default_level(logging.DEBUG) if args.log_dir is not None: update_default_logging_dir(args.log_dir) from gcforest.fgnet import FGNet, FGTrainConfig from gcforest.exp_utils import prec_ets, prec_rf, prec_log, prec_xgb, concat_datas from gcforest.datasets import get_dataset LOGGER = get_logger("tools.train_fg") LOGGER.info("tools.train_fg") LOGGER.info("\n" + json.dumps(config, sort_keys=True, indent=4, separators=(',', ':'))) train_config = FGTrainConfig(config["train"]) if args.save_outputs: assert train_config.data_cache.cache_dir is not None, \ "Data cache dir must be set in model's json config when save_outputs option is on!!" data_train = get_dataset(config["dataset"]["train"]) data_test = get_dataset(config["dataset"]["test"]) net = FGNet(config["net"], train_config.data_cache) net.fit_transform(data_train.X, data_train.y, data_test.X, data_test.y, train_config) if args.save_outputs:
import argparse import numpy as np import sys import os from keras.datasets import mnist import pickle from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score sys.path.insert(0, "/home/qiang/repo/python/cascade_clf/lib") from gcforest.gcforest import GCForest from gcforest.utils.config_utils import load_json from gcforest.utils.log_utils import get_logger from gcforest.datasets import t2d, obesity, cirrhosis LOGGER = get_logger('gcforest.cascade.cascade_classifier') def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--model", dest="model", type=str, default=None, help="gcfoest Net Model File") args = parser.parse_args() return args if __name__ == "__main__": args = parse_args()