Exemplo n.º 1
0
def train(snapshotroot, ensembleType, numTrees, depth, seed=0):
    x, y = datasets.load_iris()

    xtrain, ytrain, xtest, ytest = balanced_shuffle(x, y, 100)
    xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 50)

    metric = "multi_logloss"

    earlyStop = max(1, int(0.1 * numTrees))

    clf = ensembleType(boosting_type="gbdt",
                       max_depth=depth,
                       num_leaves=2**depth,
                       n_estimators=numTrees,
                       objective=metric,
                       random_state=seed)
    clf.fit(xtrain,
            ytrain,
            eval_set=[(xval, yval)],
            eval_metric=metric,
            early_stopping_rounds=earlyStop,
            verbose=False)

    best_score = clf.best_score_["valid_0"][metric]
    print(f"best iteration = {clf.best_iteration_}, best_score = {best_score}")

    ypred = clf.predict(xtest)

    acc = (ypred == ytest).mean()

    return acc, np.array(clf.evals_result_["valid_0"][metric])
Exemplo n.º 2
0
def main():
    # load data
    iris = load_iris()
    lb, y = binarize_labels(iris.target)  # stage 1
    nn = build_nn()  # stage 2
    theta_start = initialize(nn)  # stage 3
    theta = gradient_descent(nn, theta_start, iris.data, y)
Exemplo n.º 3
0
def main():
    # load data
    iris = load_iris()
    lb, y = binarize_labels(iris.target)    # stage 1
    nn = build_nn()                         # stage 2
    theta_start = initialize(nn)                  # stage 3
    theta = gradient_descent(nn, theta_start, iris.data, y)
Exemplo n.º 4
0
def main():
    iris = load_iris()
    T, lb = binarize_labels(iris.target)
    with ex.optionset("big") as o:
        nn, theta = o.create_neural_network(in_size=iris.data.shape[1], out_size=T.shape[1])

    theta = many_epochs_decrease_lr(nn, theta, iris.data, T)
Exemplo n.º 5
0
def main():
    iris = load_iris()
    T, lb = binarize_labels(iris.target)
    with ex.optionset("big") as o:
        nn, theta = o.create_neural_network(in_size=iris.data.shape[1],
                                            out_size=T.shape[1])

    theta = many_epochs_decrease_lr(nn, theta, iris.data, T)
Exemplo n.º 6
0
def train(snapshotroot, ensembleType, numTrees, depth, seed=0):
    x, y = datasets.load_iris()

    xtrain, ytrain, xtest, ytest = balanced_shuffle(x, y, 100)

    clf = ensembleType(random_state=seed,
                       n_estimators=numTrees,
                       max_features="sqrt",
                       max_depth=depth)
    clf.fit(xtrain, ytrain)

    acc = clf.score(xtest, ytest)

    return acc
Exemplo n.º 7
0
def train(snapshotroot, ensembleType, numTrees, depth, seed=0):
    x, y = datasets.load_iris()

    xtrain, ytrain, xtest, ytest = balanced_shuffle(x, y, 100)
    xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 50)
    
    metric="mlogloss"

    earlyStop=max(1,int(0.1*numTrees))

    #clf = xgb.XGBClassifier(max_depth=depth, tree_method="exact", n_estimators=numTrees, random_state=seed)
    clf = ensembleType(max_depth=depth, use_label_encoder=False, tree_method="exact", n_estimators=numTrees, random_state=seed)
    clf.fit(xtrain, ytrain, eval_set=[(xtrain, ytrain), (xval, yval)], eval_metric=metric, verbose=False, early_stopping_rounds=earlyStop)

    print(f"best iteration = {clf.best_iteration}, best_score = {clf.best_score}, best_ntree_limit = {clf.best_ntree_limit}")

    results = clf.evals_result()
    ypred = clf.predict(xtest)

    acc = (ypred == ytest).mean()

    return acc, np.array(results["validation_1"][metric])
Exemplo n.º 8
0
import numpy as np
import pandas as pd
import datasets as dt # https://github.com/vauxgomes/datasets

from lad.lad import LADClassifier

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, cross_validate

# Load
df = dt.load_iris()

X = df[df.columns[:-1]]
y = df[df.columns[-1]]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

# Clasisfier
clf = LADClassifier(mode='eager')
clf.fit(X_train, y_train)

y_hat = clf.predict(X_test)

print(classification_report(y_test, y_hat))

print(clf)

# scores = cross_validate(LADClassifier(mode='eager'), X, y, scoring=['accuracy'])

# print(np.mean(scores['test_accuracy']))
Exemplo n.º 9
0
graph = pydotplus.graph_from_dot_data(dot)
graph.write_png('sample.org')


ASSOCIATION RULES:
import pandas as pd
data = pd.read('MLRSMBAEX2-DataSet.csV')


--------------------DIMENSIONALITY REDUCTION :----------------------------
from sklearn import datasets
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

dataset = datasets.load_iris()
x = dataset.data
x = pd.DataFrame(x)

y = dataset.target
y = pd.DataFrame(y)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)
x = pd.DataFrame(x)

from sklearn.decomposition import PCA
pca = PCA()
x = pca.fit_transform(x)
Exemplo n.º 10
0
In [82]: from sklearn import cluster, datasets

In [83]: iris = datasets.load_iris()

In [84]: k_means = cluster.KMeans(k=3)

In [85]: k_means.fit(iris.data) 
Out[85]: 
KMeans(copy_x=True, init='k-means++', k=3, max_iter=300, n_init=10, n_jobs=1,
    precompute_distances=True,
    random_state=<mtrand.RandomState object at 0x7f4d860642d0>, tol=0.0001,
    verbose=0)

In [86]: print k_means.labels_[::10]
[1 1 1 1 1 2 2 2 2 2 0 0 0 0 0]

In [87]: print iris.target[::10]
Exemplo n.º 11
0
def test_load_iris_wellformed():
    iris = load_iris()
    assert_dataset_wellformed(iris)
Exemplo n.º 12
0
def run_experiment(settings):
    ############################################################################
    exponential_family = EinsumNetwork.BinomialArray

    K = 2

    # structure = 'poon-domingos'
    structure = 'binary-trees'

    # 'poon-domingos'
    pd_num_pieces = [2]

    # 'binary-trees'
    depth = 1
    num_repetitions = 2

    width = 4

    num_epochs = 2
    batch_size = 3
    online_em_frequency = 1
    online_em_stepsize = 0.05

    print_weights = False
    print_weights = True

    ############################################################################

    exponential_family_args = None
    if exponential_family == EinsumNetwork.BinomialArray:
        exponential_family_args = {'N': 80}
    if exponential_family == EinsumNetwork.CategoricalArray:
        exponential_family_args = {'K': 256}
    if exponential_family == EinsumNetwork.NormalArray:
        exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1}

    iris = datasets.load_iris()
    train_x = iris.data * 10
    train_labels = iris.target
    
    # self generated data
    # train_x = np.array([np.array([1, 1, 1, 1]) for i in range(50)] + [np.array([3, 3, 3, 3]) for i in range(50)] + [np.array([8, 8, 8, 8]) for i in range(50)])
    # train_labels = [0 for i in range(50)] + [1 for i in range(50)] + [2 for i in range(50)]

    if not exponential_family != EinsumNetwork.NormalArray:
        train_x /= 255.
        train_x -= .5

    classes = np.unique(train_labels).tolist()

    train_x = torch.from_numpy(train_x).to(torch.device(device))

    # Make EinsumNetwork
    ######################################
    if structure == 'poon-domingos':
        pd_delta = [[width / d] for d in pd_num_pieces]
        graph = Graph.poon_domingos_structure(shape=(width), delta=pd_delta)
    elif structure == 'binary-trees':
        graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions)
    else:
        raise AssertionError("Unknown Structure")

    args = EinsumNetwork.Args(
            num_var=train_x.shape[1],
            num_dims=1,
            num_classes=1,
            num_sums=K,
            num_input_distributions=K,
            exponential_family=exponential_family,
            exponential_family_args=exponential_family_args,
            online_em_frequency=online_em_frequency,
            online_em_stepsize=online_em_stepsize)

    einet = EinsumNetwork.EinsumNetwork(graph, args)
    print(einet)

    init_dict = get_init_dict(einet, train_x)
    einet.initialize(init_dict)
    einet.to(device)

    einet = einet.float()

    num_params = EinsumNetwork.eval_size(einet)

    # Train
    ######################################

    train_N = train_x.shape[0]

    start_time = time.time()

    for epoch_count in range(num_epochs):
        idx_batches = torch.randperm(train_N, device=device).split(batch_size)

        total_ll = 0.0
        for idx in idx_batches:
            batch_x = train_x[idx, :].float()
            # exit()
            outputs = einet.forward(batch_x)
            ll_sample = EinsumNetwork.log_likelihoods(outputs)
            log_likelihood = ll_sample.sum()
            log_likelihood.backward()

            einet.em_process_batch()
            total_ll += log_likelihood.detach().item()
        einet.em_update()
        print(f'[{epoch_count}]   total log-likelihood: {total_ll/train_N}')

    end_time = time.time()

    ################
    # Experiment 1 #
    ################
    einet.eval()
    train_ll = EinsumNetwork.eval_loglikelihood_batched(einet, train_x.float(), batch_size=batch_size)
    print()
    print("Experiment 1: Log-likelihoods  --- train LL {}".format(
            train_ll / train_N))

    ################
    # Experiment 2 #
    ################
    train_labels = torch.tensor(train_labels).to(torch.device(device))

    acc_train = EinsumNetwork.eval_accuracy_batched(einet, classes, train_x.float(), train_labels, batch_size=batch_size)
    print()
    print("Experiment 2: Classification accuracies  --- train acc {}".format(
            acc_train))

    print()
    print(f'Network size: {num_params} parameters')
    print(f'Training time: {end_time - start_time}s')

    if print_weights:
        for l in einet.einet_layers:
            print()
            if isinstance(l, FactorizedLeafLayer.FactorizedLeafLayer):
                print(l.ef_array.params)
            else:
                print(l.params)

    return {
        'train_ll': train_ll / train_N,
        'train_acc': acc_train,
        'network_size': num_params,
        'training_time': end_time - start_time,
    }
Exemplo n.º 13
0
def train(snapshotroot, device, forestType, numTrees, depth):
    x, y = datasets.load_iris()

    xtrain, ytrain, xtest, ytest = balanced_shuffle(x, y, 100)
    xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 50)

    # Transfer this data to the device
    xtrain = torch.from_numpy(xtrain).type(torch.float32).to(device)
    ytrain = torch.from_numpy(ytrain).type(torch.long).to(device)
    xval = torch.from_numpy(xval).type(torch.float32).to(device)
    yval = torch.from_numpy(yval).type(torch.long).to(device)
    xtest = torch.from_numpy(xtest).type(torch.float32).to(device)
    ytest = torch.from_numpy(ytest).type(torch.long).to(device)

    net = Net(forestType, numTrees, depth).to(device)
    criterion = nn.CrossEntropyLoss().to(device)

    optimizer = optim.Adam(net.parameters(), lr=0.05)

    numEpochs = 500
    batchSize = 50

    indices = [i for i in range(xtrain.shape[0])]

    bestEpoch = numEpochs - 1
    bestAccuracy = 0.0
    bestLoss = 1000.0

    valLosses = np.zeros([numEpochs])

    for epoch in range(numEpochs):
        random.shuffle(indices)

        xtrain = xtrain[indices, :]
        ytrain = ytrain[indices]

        runningLoss = 0.0
        count = 0
        for xbatch, ybatch in batches(xtrain, ytrain, batchSize):
            optimizer.zero_grad()

            outputs = net(xbatch)
            loss = criterion(outputs, ybatch)

            loss.backward()

            optimizer.step()

            runningLoss += loss
            count += 1

        meanLoss = runningLoss / count

        snapshotFile = os.path.join(snapshotroot, f"epoch_{epoch}")
        torch.save(net.state_dict(), snapshotFile)

        runningLoss = 0.0
        count = 0

        with torch.no_grad():
            net.train(False)
            #for xbatch, ybatch in batches(xval, yval, batchSize):
            for xbatch, ybatch in zip([xval], [yval]):
                outputs = net(xbatch)
                loss = criterion(outputs, ybatch)

                runningLoss += loss
                count += 1

            net.train(True)

        valLoss = runningLoss / count

        if valLoss < bestLoss:
            bestLoss = valLoss
            bestEpoch = epoch

        #print(f"Info: Epoch = {epoch}, loss = {meanLoss}, validation loss = {valLoss}")
        valLosses[epoch] = valLoss

    snapshotFile = os.path.join(snapshotroot, f"epoch_{bestEpoch}")

    net = Net(forestType, numTrees, depth)
    net.load_state_dict(torch.load(snapshotFile, map_location="cpu"))
    net = net.to(device)

    totalCorrect = 0
    count = 0

    with torch.no_grad():
        net.train(False)
        #for xbatch, ybatch in batches(xtest, ytest, batchSize):
        for xbatch, ybatch in zip([xtest], [ytest]):
            outputs = net(xbatch)
            outputs = torch.argmax(outputs, dim=1)

            tmpCorrect = torch.sum(outputs == ybatch)

            totalCorrect += tmpCorrect
            count += xbatch.shape[0]

    accuracy = float(totalCorrect) / float(count)
    print(
        f"Info: Best epoch = {bestEpoch}, test accuracy = {accuracy}, misclassification rate = {1.0 - accuracy}"
    )

    return accuracy, valLosses
def run_experiment(settings):
    ############################################################################
    exponential_family = EinsumNetwork.BinomialArray

    K = 1

    # structure = 'poon-domingos'
    structure = 'binary-trees'

    # 'poon-domingos'
    pd_num_pieces = [2]

    # 'binary-trees'
    depth = 1
    num_repetitions = 2

    width = 4

    num_epochs = 2
    batch_size = 3
    online_em_frequency = 1
    online_em_stepsize = 0.05
    SGD_learning_rate = 0.05

    ############################################################################

    exponential_family_args = None
    if exponential_family == EinsumNetwork.BinomialArray:
        exponential_family_args = {'N': 80}
    if exponential_family == EinsumNetwork.CategoricalArray:
        exponential_family_args = {'K': 256}
    if exponential_family == EinsumNetwork.NormalArray:
        exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1}

    iris = datasets.load_iris()
    train_x = iris.data * 10
    train_labels = iris.target

    # print(train_x[0])
    # print(train_labels[0])
    # exit()

    # self generated data
    # train_x = np.array([np.array([1, 1, 1, 1]) for i in range(50)] + [np.array([3, 3, 3, 3]) for i in range(50)] + [np.array([8, 8, 8, 8]) for i in range(50)])
    # train_labels = np.array([0 for i in range(50)] + [1 for i in range(50)] + [2 for i in range(50)])

    if not exponential_family != EinsumNetwork.NormalArray:
        train_x /= 255.
        train_x -= .5

    classes = np.unique(train_labels).tolist()

    train_x = torch.from_numpy(train_x).to(torch.device(device))

    # Make EinsumNetwork
    ######################################

    einets = []
    ps = []
    for c in classes:
        if structure == 'poon-domingos':
            pd_delta = [[width / d] for d in pd_num_pieces]
            graph = Graph.poon_domingos_structure(shape=(width),
                                                  delta=pd_delta)
        elif structure == 'binary-trees':
            graph = Graph.random_binary_trees(num_var=train_x.shape[1],
                                              depth=depth,
                                              num_repetitions=num_repetitions)
        else:
            raise AssertionError("Unknown Structure")

        args = EinsumNetwork.Args(
            num_var=train_x.shape[1],
            num_dims=1,
            num_classes=1,
            num_sums=K,
            num_input_distributions=K,
            exponential_family=exponential_family,
            exponential_family_args=exponential_family_args,
            online_em_frequency=online_em_frequency,
            online_em_stepsize=online_em_stepsize)

        einet = EinsumNetwork.EinsumNetwork(graph, args)
        print(einet)

        init_dict = get_init_dict(einet,
                                  train_x,
                                  train_labels=train_labels,
                                  einet_class=c)
        einet.initialize(init_dict)
        einet.to(device)

        einet = einet.float()
        einets.append(einet)

        ps.append(np.count_nonzero(train_labels == c))

    ps = [p / sum(ps) for p in ps]
    ps = torch.tensor(ps).to(torch.device(device))
    mixture = EinetMixture(ps, einets, classes=classes)

    num_params = mixture.eval_size()

    # Train
    ######################################
    """ Generative training """

    start_time = time.time()

    for (einet, c) in zip(einets, classes):
        train_x_c = train_x[[l == c for l in train_labels]]

        train_N = train_x_c.shape[0]

        for epoch_count in range(num_epochs):
            idx_batches = torch.randperm(train_N,
                                         device=device).split(batch_size)

            total_ll = 0.0
            for idx in idx_batches:
                batch_x = train_x_c[idx, :].float()
                outputs = einet.forward(batch_x)
                ll_sample = EinsumNetwork.log_likelihoods(outputs)
                log_likelihood = ll_sample.sum()
                log_likelihood.backward()

                einet.em_process_batch()
                total_ll += log_likelihood.detach().item()
            einet.em_update()
            print(
                f'[{epoch_count}]   total log-likelihood: {total_ll/train_N}')

    end_time = time.time()
    """ Discriminative training """
    def discriminative_learning():
        sub_net_parameters = None
        for einet in mixture.einets:
            if sub_net_parameters is None:
                sub_net_parameters = list(einet.parameters())
            else:
                sub_net_parameters += list(einet.parameters())
        sub_net_parameters += list(mixture.parameters())

        optimizer = torch.optim.SGD(sub_net_parameters, lr=SGD_learning_rate)
        loss_function = torch.nn.CrossEntropyLoss()

        train_N = train_x.shape[0]

        start_time = time.time()

        for epoch_count in range(num_epochs):
            idx_batches = torch.randperm(train_N,
                                         device=device).split(batch_size)

            total_loss = 0
            for idx in idx_batches:
                batch_x = train_x[idx, :].float()
                optimizer.zero_grad()
                outputs = mixture.forward(batch_x)
                target = torch.tensor([
                    classes.index(train_labels[i]) for i in idx
                ]).to(torch.device(device))
                loss = loss_function(outputs, target)
                loss.backward()
                optimizer.step()
                total_loss += loss.detach().item()

            print(f'[{epoch_count}]   total loss: {total_loss}')

        end_time = time.time()

    ################
    # Experiment 1 #
    ################
    mixture.eval()
    train_ll = EinsumNetwork.eval_loglikelihood_batched(mixture,
                                                        train_x.float(),
                                                        batch_size=1)
    print()
    print("Experiment 1: Log-likelihoods  --- train LL {}".format(train_ll /
                                                                  train_N))

    ################
    # Experiment 2 #
    ################
    train_labels = torch.tensor(train_labels).to(torch.device(device))

    acc_train = EinsumNetwork.eval_accuracy_batched(mixture,
                                                    classes,
                                                    train_x.float(),
                                                    train_labels,
                                                    batch_size=1)
    print()
    print("Experiment 2: Classification accuracies  --- train acc {}".format(
        acc_train))

    print()
    print(f'Network size: {num_params} parameters')
    print(f'Training time: {end_time - start_time}s')

    return {
        'train_ll': train_ll / train_N,
        'train_acc': acc_train,
        'network_size': num_params,
        'training_time': end_time - start_time,
    }
    	    print 'corrupt jpg file %s' % filepath
        #image_paths_paint.append(filepath)
    if idx == NUM_SKETCH:
        break
    if idx % 1000 == 0:
        print idx
print image_paths_paint[0]
print len(image_paths_paint)


paint_ids = np.empty(len(image_paths_real), dtype=int)
paint_ids.fill(1)
real_ids = np.empty(len(image_paths_real), dtype=int)
real_ids.fill(0)

data_x = image_paths_real+image_paths_paint
data_y = real_ids.tolist()+paint_ids.tolist(




# train with SVM
from sklearn import datasets
iris = datasets.load_iris()
digits = datasets.load_digits()

from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)

clf.fit(digits.data[:-1], digits.target[:-1])
print clf.predict(digits.data[-1:])