def train_svm(name='svm', dataset='wine', C=1.0, problem='classification', **kwargs): data = get_dataset(dataset, split=True, discrete=False, one_hot=True) train_x, train_y, test_x, test_y, feature_names = \ data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names'] if rebalance: print("balancing training data") train_x, train_y = sample_balance(train_x, train_y) print("#data after balancing:", len(train_y)) one_hot_encoder, is_categorical = data['one_hot_encoder'], data['is_categorical'] model_name = '-'.join([dataset, name]) svm = SVM(name=model_name, problem=problem, C=C, one_hot_encoder=one_hot_encoder, **kwargs) svm.train(train_x, train_y) svm.evaluate(train_x, train_y, stage='train') acc, loss, auc = svm.test(test_x, test_y) return svm, acc
def train_nn(name='nn', dataset='wine', neurons=(20,), alpha=0.01, problem='classification', **kwargs): data = get_dataset(dataset, split=True, discrete=False, one_hot=True) train_x, train_y, test_x, test_y, feature_names = \ data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names'] if rebalance: print("balancing training data") train_x, train_y = sample_balance(train_x, train_y) print("#data after balancing:", len(train_y)) one_hot_encoder, is_categorical = data['one_hot_encoder'], data['is_categorical'] model_name = '-'.join([dataset, name] + [str(neuron) for neuron in neurons]) nn = NeuralNet(name=model_name, problem=problem, neurons=neurons, max_iter=5000, alpha=alpha, one_hot_encoder=one_hot_encoder, **kwargs) nn.train(train_x, train_y) nn.evaluate(train_x, train_y, stage='train') acc, loss, auc = nn.test(test_x, test_y) return nn, acc
def train_nn(name='nn', dataset='wine', neurons=(20,), alpha=0.01, **kwargs): from sklearn.neural_network import MLPClassifier data = get_dataset(dataset, split=True, discrete=False, one_hot=True) train_x, train_y, test_x, test_y, feature_names = \ data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names'] if rebalance: print("balancing training data") train_x, train_y = sample_balance(train_x, train_y) print("#data after balancing:", len(train_y)) one_hot_encoder, is_categorical = data['one_hot_encoder'], data['is_categorical'] model_name = '-'.join([dataset, name] + [str(neuron) for neuron in neurons]) model = MLPClassifier(hidden_layer_sizes=neurons, max_iter=5000, alpha=alpha, **kwargs) nn = SKClassifier(model, name=model_name, standardize=True, one_hot_encoder=one_hot_encoder) nn.train(train_x, train_y) nn.evaluate(train_x, train_y, stage='train') nn.test(test_x, test_y) nn.save()
def train_svm(name='svm', dataset='wine', C=1.0, problem='classification', **kwargs): from sklearn.svm import SVC data = get_dataset(dataset, split=True, discrete=False, one_hot=True) train_x, train_y, test_x, test_y, feature_names = \ data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names'] if rebalance: print("balancing training data") train_x, train_y = sample_balance(train_x, train_y) print("#data after balancing:", len(train_y)) one_hot_encoder, is_categorical = data['one_hot_encoder'], data['is_categorical'] model_name = '-'.join([dataset, name]) model = SVC(C=C, probability=True, **kwargs) svm = SKClassifier(model, name=model_name, one_hot_encoder=one_hot_encoder) svm.train(train_x, train_y) svm.evaluate(train_x, train_y, stage='train') svm.test(test_x, test_y) svm.save()
def train_tree(name='tree', dataset='wine', max_depth=None, min_samples_leaf=0.005, **kwargs): data = get_dataset(dataset, split=True, discrete=False, one_hot=True) train_x, train_y, test_x, test_y, feature_names, one_hot_encoder = \ data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names'], data['one_hot_encoder'] if rebalance: print("balancing training data") train_x, train_y = sample_balance(train_x, train_y) print("#data after balancing:", len(train_y)) model_name = '-'.join([dataset, name]) tree = Tree(name=model_name, max_depth=max_depth, min_samples_leaf=min_samples_leaf, one_hot_encoder=one_hot_encoder, **kwargs) tree.train(train_x, train_y) tree.evaluate(train_x, train_y, stage='train') tree.test(test_x, test_y) tree.describe() tree.export(get_path('models', '{}.json'.format(model_name))) tree.save()
def train_rule(name='rule', dataset='breast_cancer', rule_max_len=2, **kwargs): data = get_dataset(dataset, split=True, discrete=True) train_x, train_y, test_x, test_y, feature_names = \ data['train_x'], data['train_y'], data['test_x'], data['test_y'], data['feature_names'] from iml.models.rule_model import RuleList if rebalance: print("balancing training data") train_x, train_y = sample_balance(train_x, train_y) print("#data after balancing:", len(train_y)) # print(train_x.shape, train_x.dtype) discretizer = data['discretizer'] model_name = '-'.join([dataset, name]) brl = RuleList(name=model_name, rule_maxlen=rule_max_len, discretizer=discretizer, **kwargs) brl.train(train_x, train_y) brl.evaluate(train_x, train_y, stage='train') # print(brl.infer(test_x)) brl.test(test_x, test_y) brl.describe(feature_names=feature_names) brl.save()