def train(snapshotroot, ensembleType, numTrees, depth, seed=0): x, y = datasets.load_iris() xtrain, ytrain, xtest, ytest = balanced_shuffle(x, y, 100) xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 50) metric = "multi_logloss" earlyStop = max(1, int(0.1 * numTrees)) clf = ensembleType(boosting_type="gbdt", max_depth=depth, num_leaves=2**depth, n_estimators=numTrees, objective=metric, random_state=seed) clf.fit(xtrain, ytrain, eval_set=[(xval, yval)], eval_metric=metric, early_stopping_rounds=earlyStop, verbose=False) best_score = clf.best_score_["valid_0"][metric] print(f"best iteration = {clf.best_iteration_}, best_score = {best_score}") ypred = clf.predict(xtest) acc = (ypred == ytest).mean() return acc, np.array(clf.evals_result_["valid_0"][metric])
def main(): # load data iris = load_iris() lb, y = binarize_labels(iris.target) # stage 1 nn = build_nn() # stage 2 theta_start = initialize(nn) # stage 3 theta = gradient_descent(nn, theta_start, iris.data, y)
def main(): iris = load_iris() T, lb = binarize_labels(iris.target) with ex.optionset("big") as o: nn, theta = o.create_neural_network(in_size=iris.data.shape[1], out_size=T.shape[1]) theta = many_epochs_decrease_lr(nn, theta, iris.data, T)
def train(snapshotroot, ensembleType, numTrees, depth, seed=0): x, y = datasets.load_iris() xtrain, ytrain, xtest, ytest = balanced_shuffle(x, y, 100) clf = ensembleType(random_state=seed, n_estimators=numTrees, max_features="sqrt", max_depth=depth) clf.fit(xtrain, ytrain) acc = clf.score(xtest, ytest) return acc
def train(snapshotroot, ensembleType, numTrees, depth, seed=0): x, y = datasets.load_iris() xtrain, ytrain, xtest, ytest = balanced_shuffle(x, y, 100) xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 50) metric="mlogloss" earlyStop=max(1,int(0.1*numTrees)) #clf = xgb.XGBClassifier(max_depth=depth, tree_method="exact", n_estimators=numTrees, random_state=seed) clf = ensembleType(max_depth=depth, use_label_encoder=False, tree_method="exact", n_estimators=numTrees, random_state=seed) clf.fit(xtrain, ytrain, eval_set=[(xtrain, ytrain), (xval, yval)], eval_metric=metric, verbose=False, early_stopping_rounds=earlyStop) print(f"best iteration = {clf.best_iteration}, best_score = {clf.best_score}, best_ntree_limit = {clf.best_ntree_limit}") results = clf.evals_result() ypred = clf.predict(xtest) acc = (ypred == ytest).mean() return acc, np.array(results["validation_1"][metric])
import numpy as np import pandas as pd import datasets as dt # https://github.com/vauxgomes/datasets from lad.lad import LADClassifier from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split, cross_validate # Load df = dt.load_iris() X = df[df.columns[:-1]] y = df[df.columns[-1]] # Split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) # Clasisfier clf = LADClassifier(mode='eager') clf.fit(X_train, y_train) y_hat = clf.predict(X_test) print(classification_report(y_test, y_hat)) print(clf) # scores = cross_validate(LADClassifier(mode='eager'), X, y, scoring=['accuracy']) # print(np.mean(scores['test_accuracy']))
graph = pydotplus.graph_from_dot_data(dot) graph.write_png('sample.org') ASSOCIATION RULES: import pandas as pd data = pd.read('MLRSMBAEX2-DataSet.csV') --------------------DIMENSIONALITY REDUCTION :---------------------------- from sklearn import datasets import numpy as np import pandas as pd import matplotlib.pyplot as plt dataset = datasets.load_iris() x = dataset.data x = pd.DataFrame(x) y = dataset.target y = pd.DataFrame(y) from sklearn.preprocessing import StandardScaler sc = StandardScaler() x = sc.fit_transform(x) x = pd.DataFrame(x) from sklearn.decomposition import PCA pca = PCA() x = pca.fit_transform(x)
In [82]: from sklearn import cluster, datasets In [83]: iris = datasets.load_iris() In [84]: k_means = cluster.KMeans(k=3) In [85]: k_means.fit(iris.data) Out[85]: KMeans(copy_x=True, init='k-means++', k=3, max_iter=300, n_init=10, n_jobs=1, precompute_distances=True, random_state=<mtrand.RandomState object at 0x7f4d860642d0>, tol=0.0001, verbose=0) In [86]: print k_means.labels_[::10] [1 1 1 1 1 2 2 2 2 2 0 0 0 0 0] In [87]: print iris.target[::10]
def test_load_iris_wellformed(): iris = load_iris() assert_dataset_wellformed(iris)
def run_experiment(settings): ############################################################################ exponential_family = EinsumNetwork.BinomialArray K = 2 # structure = 'poon-domingos' structure = 'binary-trees' # 'poon-domingos' pd_num_pieces = [2] # 'binary-trees' depth = 1 num_repetitions = 2 width = 4 num_epochs = 2 batch_size = 3 online_em_frequency = 1 online_em_stepsize = 0.05 print_weights = False print_weights = True ############################################################################ exponential_family_args = None if exponential_family == EinsumNetwork.BinomialArray: exponential_family_args = {'N': 80} if exponential_family == EinsumNetwork.CategoricalArray: exponential_family_args = {'K': 256} if exponential_family == EinsumNetwork.NormalArray: exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1} iris = datasets.load_iris() train_x = iris.data * 10 train_labels = iris.target # self generated data # train_x = np.array([np.array([1, 1, 1, 1]) for i in range(50)] + [np.array([3, 3, 3, 3]) for i in range(50)] + [np.array([8, 8, 8, 8]) for i in range(50)]) # train_labels = [0 for i in range(50)] + [1 for i in range(50)] + [2 for i in range(50)] if not exponential_family != EinsumNetwork.NormalArray: train_x /= 255. train_x -= .5 classes = np.unique(train_labels).tolist() train_x = torch.from_numpy(train_x).to(torch.device(device)) # Make EinsumNetwork ###################################### if structure == 'poon-domingos': pd_delta = [[width / d] for d in pd_num_pieces] graph = Graph.poon_domingos_structure(shape=(width), delta=pd_delta) elif structure == 'binary-trees': graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions) else: raise AssertionError("Unknown Structure") args = EinsumNetwork.Args( num_var=train_x.shape[1], num_dims=1, num_classes=1, num_sums=K, num_input_distributions=K, exponential_family=exponential_family, exponential_family_args=exponential_family_args, online_em_frequency=online_em_frequency, online_em_stepsize=online_em_stepsize) einet = EinsumNetwork.EinsumNetwork(graph, args) print(einet) init_dict = get_init_dict(einet, train_x) einet.initialize(init_dict) einet.to(device) einet = einet.float() num_params = EinsumNetwork.eval_size(einet) # Train ###################################### train_N = train_x.shape[0] start_time = time.time() for epoch_count in range(num_epochs): idx_batches = torch.randperm(train_N, device=device).split(batch_size) total_ll = 0.0 for idx in idx_batches: batch_x = train_x[idx, :].float() # exit() outputs = einet.forward(batch_x) ll_sample = EinsumNetwork.log_likelihoods(outputs) log_likelihood = ll_sample.sum() log_likelihood.backward() einet.em_process_batch() total_ll += log_likelihood.detach().item() einet.em_update() print(f'[{epoch_count}] total log-likelihood: {total_ll/train_N}') end_time = time.time() ################ # Experiment 1 # ################ einet.eval() train_ll = EinsumNetwork.eval_loglikelihood_batched(einet, train_x.float(), batch_size=batch_size) print() print("Experiment 1: Log-likelihoods --- train LL {}".format( train_ll / train_N)) ################ # Experiment 2 # ################ train_labels = torch.tensor(train_labels).to(torch.device(device)) acc_train = EinsumNetwork.eval_accuracy_batched(einet, classes, train_x.float(), train_labels, batch_size=batch_size) print() print("Experiment 2: Classification accuracies --- train acc {}".format( acc_train)) print() print(f'Network size: {num_params} parameters') print(f'Training time: {end_time - start_time}s') if print_weights: for l in einet.einet_layers: print() if isinstance(l, FactorizedLeafLayer.FactorizedLeafLayer): print(l.ef_array.params) else: print(l.params) return { 'train_ll': train_ll / train_N, 'train_acc': acc_train, 'network_size': num_params, 'training_time': end_time - start_time, }
def train(snapshotroot, device, forestType, numTrees, depth): x, y = datasets.load_iris() xtrain, ytrain, xtest, ytest = balanced_shuffle(x, y, 100) xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 50) # Transfer this data to the device xtrain = torch.from_numpy(xtrain).type(torch.float32).to(device) ytrain = torch.from_numpy(ytrain).type(torch.long).to(device) xval = torch.from_numpy(xval).type(torch.float32).to(device) yval = torch.from_numpy(yval).type(torch.long).to(device) xtest = torch.from_numpy(xtest).type(torch.float32).to(device) ytest = torch.from_numpy(ytest).type(torch.long).to(device) net = Net(forestType, numTrees, depth).to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(net.parameters(), lr=0.05) numEpochs = 500 batchSize = 50 indices = [i for i in range(xtrain.shape[0])] bestEpoch = numEpochs - 1 bestAccuracy = 0.0 bestLoss = 1000.0 valLosses = np.zeros([numEpochs]) for epoch in range(numEpochs): random.shuffle(indices) xtrain = xtrain[indices, :] ytrain = ytrain[indices] runningLoss = 0.0 count = 0 for xbatch, ybatch in batches(xtrain, ytrain, batchSize): optimizer.zero_grad() outputs = net(xbatch) loss = criterion(outputs, ybatch) loss.backward() optimizer.step() runningLoss += loss count += 1 meanLoss = runningLoss / count snapshotFile = os.path.join(snapshotroot, f"epoch_{epoch}") torch.save(net.state_dict(), snapshotFile) runningLoss = 0.0 count = 0 with torch.no_grad(): net.train(False) #for xbatch, ybatch in batches(xval, yval, batchSize): for xbatch, ybatch in zip([xval], [yval]): outputs = net(xbatch) loss = criterion(outputs, ybatch) runningLoss += loss count += 1 net.train(True) valLoss = runningLoss / count if valLoss < bestLoss: bestLoss = valLoss bestEpoch = epoch #print(f"Info: Epoch = {epoch}, loss = {meanLoss}, validation loss = {valLoss}") valLosses[epoch] = valLoss snapshotFile = os.path.join(snapshotroot, f"epoch_{bestEpoch}") net = Net(forestType, numTrees, depth) net.load_state_dict(torch.load(snapshotFile, map_location="cpu")) net = net.to(device) totalCorrect = 0 count = 0 with torch.no_grad(): net.train(False) #for xbatch, ybatch in batches(xtest, ytest, batchSize): for xbatch, ybatch in zip([xtest], [ytest]): outputs = net(xbatch) outputs = torch.argmax(outputs, dim=1) tmpCorrect = torch.sum(outputs == ybatch) totalCorrect += tmpCorrect count += xbatch.shape[0] accuracy = float(totalCorrect) / float(count) print( f"Info: Best epoch = {bestEpoch}, test accuracy = {accuracy}, misclassification rate = {1.0 - accuracy}" ) return accuracy, valLosses
def run_experiment(settings): ############################################################################ exponential_family = EinsumNetwork.BinomialArray K = 1 # structure = 'poon-domingos' structure = 'binary-trees' # 'poon-domingos' pd_num_pieces = [2] # 'binary-trees' depth = 1 num_repetitions = 2 width = 4 num_epochs = 2 batch_size = 3 online_em_frequency = 1 online_em_stepsize = 0.05 SGD_learning_rate = 0.05 ############################################################################ exponential_family_args = None if exponential_family == EinsumNetwork.BinomialArray: exponential_family_args = {'N': 80} if exponential_family == EinsumNetwork.CategoricalArray: exponential_family_args = {'K': 256} if exponential_family == EinsumNetwork.NormalArray: exponential_family_args = {'min_var': 1e-6, 'max_var': 0.1} iris = datasets.load_iris() train_x = iris.data * 10 train_labels = iris.target # print(train_x[0]) # print(train_labels[0]) # exit() # self generated data # train_x = np.array([np.array([1, 1, 1, 1]) for i in range(50)] + [np.array([3, 3, 3, 3]) for i in range(50)] + [np.array([8, 8, 8, 8]) for i in range(50)]) # train_labels = np.array([0 for i in range(50)] + [1 for i in range(50)] + [2 for i in range(50)]) if not exponential_family != EinsumNetwork.NormalArray: train_x /= 255. train_x -= .5 classes = np.unique(train_labels).tolist() train_x = torch.from_numpy(train_x).to(torch.device(device)) # Make EinsumNetwork ###################################### einets = [] ps = [] for c in classes: if structure == 'poon-domingos': pd_delta = [[width / d] for d in pd_num_pieces] graph = Graph.poon_domingos_structure(shape=(width), delta=pd_delta) elif structure == 'binary-trees': graph = Graph.random_binary_trees(num_var=train_x.shape[1], depth=depth, num_repetitions=num_repetitions) else: raise AssertionError("Unknown Structure") args = EinsumNetwork.Args( num_var=train_x.shape[1], num_dims=1, num_classes=1, num_sums=K, num_input_distributions=K, exponential_family=exponential_family, exponential_family_args=exponential_family_args, online_em_frequency=online_em_frequency, online_em_stepsize=online_em_stepsize) einet = EinsumNetwork.EinsumNetwork(graph, args) print(einet) init_dict = get_init_dict(einet, train_x, train_labels=train_labels, einet_class=c) einet.initialize(init_dict) einet.to(device) einet = einet.float() einets.append(einet) ps.append(np.count_nonzero(train_labels == c)) ps = [p / sum(ps) for p in ps] ps = torch.tensor(ps).to(torch.device(device)) mixture = EinetMixture(ps, einets, classes=classes) num_params = mixture.eval_size() # Train ###################################### """ Generative training """ start_time = time.time() for (einet, c) in zip(einets, classes): train_x_c = train_x[[l == c for l in train_labels]] train_N = train_x_c.shape[0] for epoch_count in range(num_epochs): idx_batches = torch.randperm(train_N, device=device).split(batch_size) total_ll = 0.0 for idx in idx_batches: batch_x = train_x_c[idx, :].float() outputs = einet.forward(batch_x) ll_sample = EinsumNetwork.log_likelihoods(outputs) log_likelihood = ll_sample.sum() log_likelihood.backward() einet.em_process_batch() total_ll += log_likelihood.detach().item() einet.em_update() print( f'[{epoch_count}] total log-likelihood: {total_ll/train_N}') end_time = time.time() """ Discriminative training """ def discriminative_learning(): sub_net_parameters = None for einet in mixture.einets: if sub_net_parameters is None: sub_net_parameters = list(einet.parameters()) else: sub_net_parameters += list(einet.parameters()) sub_net_parameters += list(mixture.parameters()) optimizer = torch.optim.SGD(sub_net_parameters, lr=SGD_learning_rate) loss_function = torch.nn.CrossEntropyLoss() train_N = train_x.shape[0] start_time = time.time() for epoch_count in range(num_epochs): idx_batches = torch.randperm(train_N, device=device).split(batch_size) total_loss = 0 for idx in idx_batches: batch_x = train_x[idx, :].float() optimizer.zero_grad() outputs = mixture.forward(batch_x) target = torch.tensor([ classes.index(train_labels[i]) for i in idx ]).to(torch.device(device)) loss = loss_function(outputs, target) loss.backward() optimizer.step() total_loss += loss.detach().item() print(f'[{epoch_count}] total loss: {total_loss}') end_time = time.time() ################ # Experiment 1 # ################ mixture.eval() train_ll = EinsumNetwork.eval_loglikelihood_batched(mixture, train_x.float(), batch_size=1) print() print("Experiment 1: Log-likelihoods --- train LL {}".format(train_ll / train_N)) ################ # Experiment 2 # ################ train_labels = torch.tensor(train_labels).to(torch.device(device)) acc_train = EinsumNetwork.eval_accuracy_batched(mixture, classes, train_x.float(), train_labels, batch_size=1) print() print("Experiment 2: Classification accuracies --- train acc {}".format( acc_train)) print() print(f'Network size: {num_params} parameters') print(f'Training time: {end_time - start_time}s') return { 'train_ll': train_ll / train_N, 'train_acc': acc_train, 'network_size': num_params, 'training_time': end_time - start_time, }
print 'corrupt jpg file %s' % filepath #image_paths_paint.append(filepath) if idx == NUM_SKETCH: break if idx % 1000 == 0: print idx print image_paths_paint[0] print len(image_paths_paint) paint_ids = np.empty(len(image_paths_real), dtype=int) paint_ids.fill(1) real_ids = np.empty(len(image_paths_real), dtype=int) real_ids.fill(0) data_x = image_paths_real+image_paths_paint data_y = real_ids.tolist()+paint_ids.tolist( # train with SVM from sklearn import datasets iris = datasets.load_iris() digits = datasets.load_digits() from sklearn import svm clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-1], digits.target[:-1]) print clf.predict(digits.data[-1:])