Exemplo n.º 1
0
def P3_d():
    num = 100
    test_num = 100
    count = 0
    C = 50
    label = np.array([0, 1, 2, 3, 4])
    label_train = np.repeat(label, num)
    Mat_train = np.zeros((5 * num * 30, 64)).astype(float)

    for item in category:
        path = "../Problem3/train-" + str(num) + "/" + item + "/"
        collect(path, Mat_train, count)
        count += num

    Km = KMeans(n_clusters=C, max_iter=1000).fit(Mat_train)

    table = np.zeros((5 * num, 30, C))
    HS = np.zeros((5 * num, C))
    SS = np.zeros((5 * num, C))
    SM = np.zeros((5 * num, C))

    for i in range(5 * num):
        table[i] = create_table(Mat_train[(i * 30):][:30], Km)
        HS[i] = HardSum(table[i])
        SS[i] = SoftSum(table[i])
        SM[i] = SoftMax(table[i])


# testing data
    count_test = 0
    Mat_test = np.zeros((5 * test_num * 30, 64)).astype(float)
    label_test = np.repeat(label, test_num)
    table_test = np.zeros((5 * test_num, 30, C))
    HS_test = np.zeros((5 * test_num, C))
    SS_test = np.zeros((5 * test_num, C))
    SM_test = np.zeros((5 * test_num, C))

    for test_item in category:
        path = "../Problem3/test-" + str(test_num) + "/" + test_item + "/"
        collect(path, Mat_test, count_test)
        count_test += test_num

    for i in range(5 * test_num):
        table_test[i] = create_table(Mat_test[(i * 30):][:30], Km)
        HS_test[i] = HardSum(table_test[i])
        SS_test[i] = SoftSum(table_test[i])
        SM_test[i] = SoftMax(table_test[i])

    print("doing KNN...")
    nei = 11
    neigh_HS = KNC(n_neighbors=nei).fit(HS, label_train)
    print("HardSum: " + str(neigh_HS.score(HS_test, label_test)))
    neigh_SS = KNC(n_neighbors=nei).fit(SS, label_train)
    print("SoftSum: " + str(neigh_SS.score(SS_test, label_test)))
    neigh_SM = KNC(n_neighbors=nei).fit(SM, label_train)
    print("SoftMax: " + str(neigh_SM.score(SM_test, label_test)))
Exemplo n.º 2
0
def run_training(corruption_chance, perplexity, batch_size):

  global train_data, test_data
  corrupt = lambda x: 0 if np.random.uniform() <= corruption_chance else x
  train_data = np.vectorize(corrupt)(train_data)
  test_data = np.vectorize(corrupt)(test_data)

  def hook(args):
    print(args)
    if np.isnan(args[2]):
      raise Exception
    if isinstance(args[0], PTSNE) and args[2] <= 0.0:
      raise Exception

  vae = VAE(
    [n_input_dimensions],
    get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions),
    gaussian_prior_supplier,
    gaussian_supplier,
    get_bernoulli_network_builder(vae_decoder_layers, n_input_dimensions),
    bernoulli_supplier)

  ptsne = PTSNE(
    [n_input_dimensions],
    get_feed_forward_network_builder(vptsne_layers, batch_normalization=False),
    perplexity=perplexity)

  vptsne = VPTSNE(
    vae,
    get_feed_forward_network_builder(vptsne_layers, batch_normalization=False),
    perplexity=perplexity)

  ptsne.fit(train_data, n_iters=1500, batch_size=batch_size, hook_fn=hook)
  vptsne.fit(train_data, n_iters=1500, n_vae_iters=10000, batch_size=batch_size, vae_batch_size=1000, hook_fn=hook)

  knn_score = KNC(n_neighbors=1).fit(
    ptsne.transform(train_data), train_labels).score(
    ptsne.transform(test_data), test_labels)
  knn_score_vptsne = KNC(n_neighbors=1).fit(
    vptsne.transform(train_data), train_labels).score(
    vptsne.transform(test_data), test_labels)

  tw = trustworthiness(
    test_data,
    ptsne.transform(test_data),
    n_neighbors=12)
  tw_vptsne = trustworthiness(
    test_data,
    vptsne.transform(test_data),
    n_neighbors=12)

  train_data = np.copy(non_corrupted_train_data)
  test_data = np.copy(non_corrupted_test_data)

  return knn_score, tw, knn_score_vptsne, tw_vptsne
Exemplo n.º 3
0
def main():
    df = pd.read_csv("/home/saxobeat/PythonML/MLCodes/Spambase/Dataset/spamdata.csv")
    features = df.iloc[:,0:57].values
    labels = df.iloc[:,57].values
    X_train, X_test, y_train, y_test = tts(features, labels, test_size=0.25, shuffle=True, random_state=8)
    models = []
    models.append(('LR', LR(solver='lbfgs', max_iter=2000, tol=0.0001)))
    models.append(('LDA', LDA()))
    models.append(('DTC', DTC()))
    models.append(('KNC', KNC()))
    models.append(('MNB', MNB()))
    models.append(('RFC', RFC(n_estimators=100)))
    models.append(('SVC', SVC(gamma='scale', kernel='rbf', probability=True)))
    x0 = np.linspace(0,1,10)
    plt.plot([0,1],[0,1],'k',linestyle='--')
    for name,model in models:
        model.fit(X_train, y_train)
        y_pred = model.predict_proba(X_test)
        y_score = y_pred[:,1]
        fpr, tpr, thresholds = roc_curve(y_test, y_score)
        label = "{}({})".format(name,auc(fpr, tpr))
        plt.plot(fpr,tpr,label=label)
        plt.legend()
        # plt.legend(name)

    plt.title("Reciever Operating Characteristics")
    plt.grid()
    plt.cool()
    plt.xlabel("Fasle Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.savefig("roc.pdf")
Exemplo n.º 4
0
def run():
    data = np.load('../data/data.npy')
    X = data[:284805, :-1]
    y = data[:284805, -1]

    for method in ['nb', 'knn', 'svm']:
        for features in ['original', 'lmm', 'chi2']:
            print method, '\t', features, '\t',
            startTime = time.time()
            if method == 'nb':
                model = GaussianNB()
            elif method == 'knn':
                model = KNC()
            else:
                model = SVC(probability=True, kernel='linear', tol=1)
            if features == 'original':
                pred = cross_validation(model, X, y)
            elif features == 'lmm':
                clf = trLMM(helperModel=model)
                pred = cross_validation(clf, X, y)
            else:
                Xtmp = np.abs(X)
                Xtmp = SelectKBest(chi2, k=10).fit_transform(Xtmp, y)
                pred = cross_validation(model, Xtmp, y)
            np.save('../result/pred_' + method + '_' + features, pred)

            seconds = time.time() - startTime

            np.save('../result/seconds_' + method + '_' + features, seconds)

            score = evaluation_aucPR(pred, y)

            print score
Exemplo n.º 5
0
 def __init__(self,
              n_neighbors=5,
              weights='uniform',
              algorithm='auto',
              leaf_size=30,
              p=2,
              metric='minkowski',
              metric_params=None,
              n_jobs=None):
     self.p = p
     self.weights = weights
     self.metric = metric
     self.algorithm = algorithm
     self.n_jobs = n_jobs
     self.metric_params = metric_params
     self.leaf_size = leaf_size
     self.n_neighbors = n_neighbors
     self.model = KNC(weights=self.weights,
                      metric_params=self.metric_params,
                      p=self.p,
                      metric=self.metric,
                      leaf_size=self.leaf_size,
                      n_jobs=self.n_jobs,
                      n_neighbors=self.n_neighbors,
                      algorithm=self.algorithm)
Exemplo n.º 6
0
def model(X_train, y_train, X_test=[], y_test=[], method="LR"):
    #X_train входы модели для обучения
    #X_test входы модели для тестирования
    #y_train -выходы модели для обучения
    #y_test - выводы модели для тестирования
    #method - модели машинного обучения

    if method == "LR":
        lr = LR()
    elif method == "KNC":
        lr = KNC()
    elif method == "RFC":
        lr = RFC()
    elif method == "GBC":
        lr = GBC()
    elif method == "DTC":
        lr = DTC()
    elif method == "MLPClassifier":
        lr = MLPClassifier()
    elif method == "LinearSVC":
        lr = LinearSVC()
    elif method == "SVC":
        lr = SVC()
    else:
        print("unknown method")
        return False

    if ((type(X_test) != list) & (type(y_test) != list)):
        lr = lr.fit(X_train, y_train.iloc[:, 0])
        y_mod_train = lr.predict(X_train)
        y_mod_test = lr.predict(X_test)
        #average - параметр для рассчёта f-меры (micro, macro, weighted, samples)
        f1_train = f1_score(y_train, y_mod_train, average='macro')
        f1_test = f1_score(y_test, y_mod_test, average='macro')
        out = {
            "model": lr,
            "f1_train": f1_train,
            "f1_test": f1_test,
            "y_mod_train": y_mod_train,
            "y_mod_test": y_mod_test
        }
        scores_train = cross_val_score(lr,
                                       X_train,
                                       y_train.iloc[:, 0],
                                       cv=5,
                                       scoring='f1_macro')
        for i in range(len(scores_train)):
            out["cros" + str(i)] = scores_train[i]
        return out

    else:
        #считаем кросс-валидацию
        scores_train = cross_val_score(lr,
                                       X_train,
                                       y_train.iloc[:, 0],
                                       cv=5,
                                       scoring='f1_macro')
        return np.mean(scores_train)
    return False
 def NLMmodelexp1():
     modelExperiment(
         nlmInsampleData, nlmOutsampleData, 'NLMdata/', fullFV,
         [LR(), DT(), KNC(), RF(),
          ABC(), GNB(), QDA()], [
              'LogisticRegression', 'DTree', 'KNN', 'RandomForest',
              'AdaBoosted', 'GaussianNB', 'QuadraticDiscriminantAnalysis'
          ], 'NLMmodelExperiment1.csv', 'NLMclassifier_plot1.png', True)
Exemplo n.º 8
0
def simple_pipeline_demo():
    return Pipeline([
        ('norm_image', normalize_transform),
        ('threshold', thresh_step),
        ('shape_analysis', shape_step),
        ('feature', feature_step),
        ('KNN', KNC(1))
        # use just the 1st one (more is often better)
    ])
Exemplo n.º 9
0
def KNN(X_train, y_train, X_test, nbs):
    pred = []
    for i in range(len(nbs)):
        neigh = KNC(n_neighbors=nbs[i]
                    )  #Built in function that chooses the best method
        neigh.fit(X_train, y_train)
        preda = list(neigh.predict(X_test))
        pred.append(preda)  #code with all of the predictions

    #print(pred)
    return pred
 def SOmodelexp1():
     modelExperiment(
         SOInsampleData, SOOutsampleData, 'stackoverflowdata/', fullFV,
         [LR(),
          DT(),
          KNC(),
          RF(n_estimators=200),
          ABC(),
          GNB(),
          QDA()], [
              'LogisticRegression', 'DTree', 'KNN', 'RandomForest',
              'AdaBoosted', 'GaussianNB', 'QuadraticDiscriminantAnalysis'
          ], 'SOmodelExperiment1.csv', 'SOclassifier_plot1.png', True)
Exemplo n.º 11
0
def recluster_hungmatch_aware(data,
                              Y,
                              problemclusters,
                              n_clust=2,
                              rnlog=None,
                              debug=False,
                              showset={},
                              roind=None,
                              coind=None,
                              target_cls=None,
                              Y2=None,
                              algo='knn'):

    indices = [y in problemclusters for y in Y]
    data2 = data[indices]

    #  OLD CRAB
    # yh = predictgmm(n_clust, data2)

    # Nu start
    roco = dict(zip(roind, coind))
    indices_int_y1 = np.nonzero(indices)[0]
    indices_int_y2 = np.array([roco.get(a, -1) for a in indices_int_y1])
    target_ok_mask = [(Y2[i] in target_cls) if i >= 0 else False
                      for i in indices_int_y2]
    indices_int_y1 = indices_int_y1[target_ok_mask]
    indices_int_y2 = indices_int_y2[target_ok_mask]
    if algo == 'copkmeans':
        grps = [
            indices_int_y1[[Y2[i] == targetcls for i in indices_int_y2]]
            for targetcls in target_cls
        ]
        mustlink = [(a, b) for grp in grps for a in grp
                    for b in grp]  # i hope this produces all the contraints
        yh = CKM.cop_kmeans(data2, ml=mustlink, k=n_clust)[0]
    else:
        model = KNC(weights='distance')  # might also try uniform
        train_y = Y2[indices_int_y2]
        s = spacemap(np.unique(train_y))
        model.fit(data[indices_int_y1], [s.getint[y] for y in train_y])
        yh = model.predict(data2)

    ##### Nu end
    maxy = np.max(Y)
    Y[indices] = yh + maxy + 1

    rnlog.log(problemclusters, np.unique(yh) + maxy + 1)
    if debug or 'renaming' in showset:
        print('ranaming: set%s' % rnlog.dataset, problemclusters,
              np.unique(yh) + maxy + 1)
    return Y
Exemplo n.º 12
0
def make_classifier(group):
    # rescale data to make height and weight of equal weight (currently height is like 30 times smaller than weight)
    x_data = group['samples']
    heights = [x[0] for x in x_data]
    weights = [x[1] for x in x_data]
    max_h = max(heights) + 0.003
    max_w = max(weights) + 1
    scaled_heights = [h/max_h for h in heights]
    scaled_weights = [w/max_w for w in weights]

    # train and create classifier
    classifier = KNC(n_neighbors=2)
    classifier.fit(list(zip(scaled_heights, scaled_weights)), group['classes'])

    return classifier
Exemplo n.º 13
0
def predict():
    iris = pd.read_csv("iris.csv")
    neigh = KNC(n_neighbors=3)
    neigh.fit(iris.iloc[:, 0:4], iris.iloc[:, 4])

    if request.method == 'POST':
        sl = request.form['sl']
        sw = request.form['sw']
        pl = request.form['pl']
        pw = request.form['pw']
        pred = pd.DataFrame([[sl, sw, pl, pw]])
        pred.columns = [
            'Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'
        ]
        my_pred = neigh.predict(pred)
    return render_template('result.html', prediction=my_pred)
Exemplo n.º 14
0
    def training(self):

        self.lr = KNC(n_neighbors=int(self.neighbours.text()),
                      weights=self.weights.currentText(),
                      algorithm=self.algorithm.currentText())
        self.lr.fit(self.x_train, self.y_train)

        self.pre = self.lr.predict(self.x_test)
        self.mae.setText(
            str(metrics.mean_absolute_error(self.y_test, self.pre)))
        self.mse.setText(str(metrics.mean_squared_error(self.y_test,
                                                        self.pre)))
        self.rmse.setText(
            str(np.sqrt(metrics.mean_squared_error(self.y_test, self.pre))))
        self.accuracy.setText(str(accuracy_score(self.pre, self.y_test)))

        text = steps.classification_(self.y_test, self.pre)
        self.report.setPlainText(text)
Exemplo n.º 15
0
 def log_fn(args):
     print(args)
     if isinstance(args[0], VAE):
         return
     loss_file.write(str(args[2]) + "\n")
     if args[1] % 400 == 0:
         transformed_train = args[0].transform(mnist_train_images)
         transformed_test = args[0].transform(mnist_test_images)
         trustworthiness_file.write(
             str(
                 trustworthiness(mnist_test_images,
                                 transformed_test,
                                 n_neighbors=12)) + "\n")
         knn_file.write(
             str(
                 KNC(n_neighbors=1).fit(
                     transformed_train, mnist_train_labels).score(
                         transformed_test, mnist_test_labels)) + "\n")
Exemplo n.º 16
0
def RunSkMethod(s='ppn'):
    isTree = False
    if s == 'ppn':
        method = Perceptron(n_iter=40, eta0=0.1, random_state=0, shuffle=True)
    elif s == 'lr':
        method = LogisticRegression(C=100.0, random_state=0)
    elif s == 'svc':
        method = SVC(kernel='linear', C=1.0, random_state=0)
    elif s == 'svm':
        method = SVC(kernel='rbf',
                     random_state=0,
                     gamma=float(args[2]),
                     C=float(args[3]))
    elif s == 'tree':
        method = DTC(criterion='entropy', max_depth=3, random_state=0)
        isTree = True
    elif s == 'forest':
        method = RFC(criterion='entropy',
                     n_estimators=10,
                     random_state=1,
                     n_jobs=2)
    elif s == 'knn':
        method = KNC(n_neighbors=5, p=2, metric='minkowski')
    elif s == 'pca':
        method = PCA(n_components=2)
        return

    dd = ir.IrisDataSets()
    dd.useFit(method)
    pdr.plot_decision_regions(X=dd.X_combined_std,
                              y=dd.y_combined,
                              classifier=method,
                              test_idx=range(105, 150))
    dd.drawGraph()

    if s == 'lr':
        print(method.predict_proba(dd.X_test_std[0, :].reshape(1, -1)))

    # after this function, execute following command on terminal
    # dot -Tpng tree.dot -o tree.png
    if isTree == True:
        export_graphviz(method,
                        out_file='tree.dot',
                        feature_names=['petal length', 'petal width'])
Exemplo n.º 17
0
    def __init__(self, **kwargs):
        r"""Initialize KNeighbors instance.
        """
        warnings.filterwarnings(action='ignore',
                                category=ChangedBehaviorWarning)
        warnings.filterwarnings(action='ignore', category=ConvergenceWarning)
        warnings.filterwarnings(action='ignore',
                                category=DataConversionWarning)
        warnings.filterwarnings(action='ignore',
                                category=DataDimensionalityWarning)
        warnings.filterwarnings(action='ignore', category=EfficiencyWarning)
        warnings.filterwarnings(action='ignore', category=FitFailedWarning)
        warnings.filterwarnings(action='ignore', category=NonBLASDotWarning)
        warnings.filterwarnings(action='ignore',
                                category=UndefinedMetricWarning)

        self._params = dict(weights=ParameterDefinition(
            ['uniform', 'distance']),
                            algorithm=ParameterDefinition(
                                ['auto', 'ball_tree', 'kd_tree', 'brute']))
        self.__kn_classifier = KNC()
	def kncScores(self,Xn,y,cv=5,param_name='n_neighbors',paramRange=(1,10,1),trainW=1,testW=2,title='KNC',clfArg={},plot=False):
		"""
		Perform the validation_curve function using K neighbors classifier (KNC)
		 and get the best param value based on the highest test_score. 
		cv indicates the cross validation k-fold. Default param_name to optimize is n_neighbors. 
		paramRange=(a,b,c) is the range to evaluate the param_name. a start degree, b end degree, c step.
		After the function gets the best param value, associated test_score and
		 train_score are used to calculated a weighted_score.
		trainW and testW are the weights used to calculated a 
		weighted_score=test_score*testW+train_score*trainW)/(testW+trainW).
		clfArg is a dictionary to add any additional parameters to the KNC. 
		To see how the best score is collected set plot=True.
		"""
		clf = KNC(**clfArg)
		model_scores=list()
		param_range=np.arange(paramRange[0],paramRange[1],paramRange[2])
		train_sc, test_sc = validation_curve(clf,Xn,y,param_name=param_name,param_range=param_range,cv=cv)
		param_score= self.plotTrainTest(train_sc,test_sc,param_range,t=title,xlabel=param_name,plot=plot)
		scoreDic={'model':title,'param_name':param_name}
		scoreDic.update(param_score)
		model_scores.append(scoreDic.copy())
		return self.scoreModelListDf(model_scores,trainW=trainW,testW=testW)
Exemplo n.º 19
0
vae = VAE([n_input_dimensions],
          get_gaussian_network_builder(vae_encoder_layers,
                                       n_latent_dimensions),
          gaussian_prior_supplier, gaussian_supplier,
          get_bernoulli_network_builder(vae_decoder_layers,
                                        n_input_dimensions),
          bernoulli_supplier)

vae.fit(mnist.train._images, n_iters=14000, batch_size=1000, hook_fn=print)

from sklearn.neighbors import KNeighborsClassifier as KNC
print(
    "1-NN, test set",
    KNC(n_neighbors=1).fit(vae.transform(mnist.train._images),
                           mnist.train._labels).score(
                               vae.transform(mnist.test._images),
                               mnist.test._labels))


def tt():

    vptsne_layers = LayerDefinition.from_array([(200, tf.nn.relu),
                                                (200, tf.nn.relu),
                                                (2000, tf.nn.relu), (2, None)])

    from vptsne import PTSNE
    vptsne = VPTSNE(
        #    [n_input_dimensions],
        vae,
        get_feed_forward_network_builder(vptsne_layers),
        perplexity=30)
Exemplo n.º 20
0
km_cluster = KM(n_clusters=10, random_state=37).fit(X_train_mnist)
X_train_mnist_km = km_cluster.transform(X_train_mnist)
X_test_mnist_km = km_cluster.transform(X_test_mnist)
print('km\n')
"""
##############################Classification##############################
"""
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier as MLPC
#People DONE#
###DONE###
knc_people = KNC(n_neighbors=10, weights='distance',
                 p=1).fit(X_train_people_nmf, y_train_people)
print("Test set score of kNN people: {:.3f}".format(
    knc_people.score(X_test_people_nmf, y_test_people)))

###DONE###
dtc_people = DTC(random_state=37).fit(X_train_people_stand, y_train_people)
print("Test set score of DTC people: {:.3f}".format(
    dtc_people.score(X_test_people_stand, y_test_people)))

###DONE###
rfc_people = RFC(n_estimators=100,
                 max_depth=25,
                 bootstrap=False,
                 random_state=37).fit(X_train_people_norm, y_train_people)
print("Test set score of RFC people: {:.3f}".format(
    rfc_people.score(X_test_people_norm, y_test_people)))
Exemplo n.º 21
0
            print "Failed saving predicted results."

    return predict_df


if __name__ == "__main__":
    # Prepare train data and test data
    train_df, train_target_df, test_df, predict_df = pre_proc_all()

    # Train and evaluate the model
    n_neighbors_set = range(1, 30)
    cols_for_res = ["clf", "score_mean", "score_std"]  #######
    results_set = pd.DataFrame(columns=cols_for_res)  #######
    for i, n_neighbors in enumerate(n_neighbors_set):
        # Define the classifyer model
        clf = KNC(n_neighbors=n_neighbors)

        # Assess the model with cross validation data
        tgt_arr = train_target_df.as_matrix().reshape(-1)
        scores = cross_val_score(clf, train_df, tgt_arr, cv=4)

        temp = pd.DataFrame(
            [[clf, scores.mean(), scores.std()]],
            columns=cols_for_res,
            index=[i])
        results_set = results_set.append(temp)  #######
        print "{0:4d}/{1:4d} Param n_neighbors: {2:2d}, ".format(
            i, len(n_neighbors_set), n_neighbors),
        print "Score mean: {0:0.3f}, std: {1:0.3f}".format(
            scores.mean(), scores.std())
Exemplo n.º 22
0
data['catsize'] = pd.factorize(data['catsize'])[0]
data['type'] = pd.factorize(data['type'])[0]




# Training and Test data using 
from sklearn.model_selection import train_test_split
train,test = train_test_split(data,test_size = 0.2) # 0.2 => 20 percent of entire data 

# KNN using sklearn 
# Importing Knn algorithm from sklearn.neighbors
from sklearn.neighbors import KNeighborsClassifier as KNC

# for 3 nearest neighbours 
neigh = KNC(n_neighbors= 3)

# Fitting with training data 
neigh.fit(train.iloc[:,0:17],train.iloc[:,16])

# train accuracy 
train_acc = np.mean(neigh.predict(train.iloc[:,0:17])==train.iloc[:,16]) # 94 %

# test accuracy
test_acc = np.mean(neigh.predict(test.iloc[:,0:17])==test.iloc[:,16]) # 100%


# for 5 nearest neighbours
neigh = KNC(n_neighbors=5)

# fitting with training data
Exemplo n.º 23
0
@author: Rohith
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.model_selection import train_test_split

iris = pd.read_csv("E:\\Data Science\\Assignments\\Python code\\KNN\\iris.csv")
help(train_test_split)

train, test = train_test_split(iris, test_size=0.2)
#for 3 neighbors
neigh = KNC(n_neighbors=3)
#Fitting with training data
neigh.fit(train.iloc[:, 0:4], train.iloc[:, 4])

#to find train accuracy

train_acc = np.mean(neigh.predict(train.iloc[:, 0:4]) == train.iloc[:, 4])
#to find train accuracy

test_acc = np.mean(neigh.predict(test.iloc[:, 0:4]) == test.iloc[:, 4])

#for 5 neighbors

neigh1 = KNC(n_neighbors=5)
neigh1.fit(train.iloc[:, 0:4], train.iloc[:, 4])
Exemplo n.º 24
0
def tt():

    vptsne_layers = LayerDefinition.from_array([(200, tf.nn.relu),
                                                (200, tf.nn.relu),
                                                (2000, tf.nn.relu), (2, None)])

    from vptsne import PTSNE
    vptsne = VPTSNE(
        #    [n_input_dimensions],
        vae,
        get_feed_forward_network_builder(vptsne_layers),
        perplexity=30)

    fit_params = {
        "hook_fn": print,
        "n_iters": 2000,
        "batch_size": 200,
        "deterministic": False,
        "fit_vae": False,
        "n_vae_iters": 14000,
        "vae_batch_size": 1000
    }

    #vptsne.load_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt")
    vptsne.fit(mnist.train._images, **fit_params)
    #vptsne.save_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt")
    #display_reconstructions(10)

    #from sklearn.decomposition import PCA
    #p = PCA(n_components=3).fit(mnist.train._images)
    #train = p.transform(mnist.train._images)
    #vptsne.fit(train, **fit_params)

    #transformed = vptsne.transform(train)
    transformed = vptsne.transform(mnist.train._images, reconstruct=True)
    #transformed = vae.transform(mnist.train._images)

    transformed_test = vptsne.transform(mnist.test._images, reconstruct=True)

    print(
        "Trustworthiness, test set",
        trustworthiness(mnist.test._images, transformed_test, n_neighbors=12))

    #print(
    #  "Trustworthiness, first 10k",
    #  trustworthiness(
    #    mnist.train._images[:10000],
    #    vptsne.transform(mnist.train._images[:10000]),
    #    n_neighbors=12))

    from sklearn.neighbors import KNeighborsClassifier as KNC
    print(
        "1-NN, test set",
        KNC(n_neighbors=1).fit(transformed, mnist.train._labels).score(
            transformed_test, mnist.test._labels))

    plt.clf()
    color_palette = np.random.rand(100, 3)
    for label in np.unique(mnist.train._labels):
        tmp = transformed[mnist.train._labels == label]
        plt.scatter(tmp[:, 0], tmp[:, 1], s=0.2, c=color_palette[label])
    plt.show()
Exemplo n.º 25
0
confusion_matrix(Y_train,Y_pred)
accuracy_score(Y_train, Y_pred)

# Predicting test set results
Y_pred = clf.predict(X_test)
confusion_matrix(Y_test,Y_pred)
accuracy_score(Y_test, Y_pred)

# Another method without Hyperparameter tunning

# running KNN algorithm for 3 to 50 nearest neighbours(odd numbers) and 
# storing the accuracy values 
accuracy = []
from sklearn.neighbors import KNeighborsClassifier as KNC
for i in range(3,50,2):
    neigh = KNC(n_neighbors=i)
    neigh.fit(X_train,Y_train)
    train_acc = np.mean(neigh.predict(X_train)==Y_train)
    test_acc = np.mean(neigh.predict(X_test)==Y_test)
    accuracy.append([train_acc,test_acc])

import matplotlib.pyplot as plt # library to do visualizations 

# train accuracy plot 
plt.plot(np.arange(3,50,2),[i[0] for i in accuracy],"bo-")
# test accuracy plot
plt.plot(np.arange(3,50,2),[i[1] for i in accuracy],"ro-")
plt.legend(["train","test"])

print(accuracy)
#Feature scaling

from sklearn.preprocessing import RobustScaler as RS
scaler = RS()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# In[44]:

#Training

from sklearn.neighbors import KNeighborsClassifier as KNC

classifier = KNC(n_neighbors=21)
classifier.fit(X_train, Y_train)

# In[45]:

import pickle

# In[46]:

#Saving

filename = 'model_loc_time_knn.pkl'

pickle.dump(classifier, open(filename, 'wb'))

# In[47]:
Exemplo n.º 27
0
                 #"GNB",
                 "QDA"]
model_types = [LR,
               RFC,
               #ABC,
               MLPC,
               KNC,
               SVC,
               #DTC,
               #GNB,
               QDA]
models = [LR(),
          RFC(n_estimators=30),
          #ABC(),
          MLPC(),
          KNC(),
          SVC(probability=True),
          #DTC(),
          #GNB(),
          QDA()]
models2 = copy.deepcopy(models)


### experiment bright students math finance
N = 10000 ## 1000 of each group (groups S and T)

minority_percent = 0.3
MIN = int(minority_percent * N)
MAJ = int((1 - minority_percent) * N)
# print(MIN, MAJ)
# p_S_brightmath = 0.9
Exemplo n.º 28
0
# all variables are binary in nature, shown as integers. But Type should
# be factor, so converting it into factors
zoo['type'] = zoo['type'].astype('category')

zoo.type.unique()
zoo.type.value_counts()
# 1, 4, 2, 7, 6, 5, 3      there are 7 types

# splitting the data into train:test 70:30
random.seed(123)
train, test = train_test_split(zoo, test_size = 0.3)

##################### Step 2. KNN Classification 

###### Building model using k=3
knn = KNC(n_neighbors = 3)

x = train.iloc[:,1:17]
y = train.iloc[:,17]

knn.fit(x, y)
# train accuracy
train_acc = np.mean(knn.predict(train.iloc[:,1:17]) ==train.iloc[:,17]) # 98.57
# test accuracy
test_acc = np.mean(knn.predict(test.iloc[:,1:17]) == test.iloc[:,17]) # 87.10

####### Running the model for various values of k
accuracy = []

for i in range(1,50,1):
    knn = KNC(n_neighbors =i)
Exemplo n.º 29
0
grr = pdplt.scatter_matrix(iris_dataframe,
                           c=y_train,
                           figsize=(15, 15),
                           marker='o',
                           hist_kwds={'bins': 20},
                           s=60,
                           alpha=.8,
                           cmap=mglearn.cm3)

# ### Building the model: K-Nearest neighbours

# In[67]:

# importing the classifier
from sklearn.neighbors import KNeighborsClassifier as KNC
knn = KNC(n_neighbors=1)

# In[96]:

knn.fit(X_train, y_train)

# ### Making Predictions
# We can use the data from an iris we found in the wild yo! We can fit its data into an nparray and see some yo!

# In[97]:

X_new1 = np.array([[5, 2.9, 1, 0.2]])

# In[98]:

# To make a predictionm we call the predict method of the knn obj
Exemplo n.º 30
0
# PLOTTING
# sns.pairplot(df)
# plt.show()

# SCALING
scaler = SS()
scaler.fit(df.drop('TARGET CLASS',axis=1))
scaled = scaler.transform(df.drop('TARGET CLASS',axis=1))
df_scale = pd.DataFrame(scaled,columns=df.columns[:-1])
print(df_scale.head())

# SPLIT DATA INTO TRAINING AND TESTING
X_train,X_test,y_train,y_test = TTS(df_scale,df['TARGET CLASS'],test_size=0.3,random_state=101)

# KNN
model = KNC(n_neighbors=1)
model.fit(X_train,y_train)
pred = model.predict(X_test)

print(CR(y_test,pred))
print(CM(y_test,pred))

# CHOOSE K VALUE (ELBOW METHOD)
error_rate = []

for i in range(1,40):
	model = KNC(n_neighbors=i)
	model.fit(X_train,y_train)
	pred_i = model.predict(X_test)
	error_rate.append(np.mean(y_test != pred_i))