Пример #1
0
def main(RUNS = 10, numH = 2):
    """
    Cancer
    """

    # try:
    print ">>STARTING...";
    proben = proben1();
    D = proben.breast_cancer();

    DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS);
    netConfig = {'numI': D['train']['INFO']['num_inputs'],
                 'numO': D['train']['INFO']['num_outputs'],
                 'numH': numH
    };
    for ri in xrange(RUNS):
        print ">>>> RUN {0} of {1}".format(ri, RUNS);
        print "ON :", D['name'];
        coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = D['name'],
                                            train_set = DCrossVal[ri][0],
                                            valid_set = DCrossVal[ri][1],
                                            test_set = D['test'],
                                            netConfig = netConfig);
        coevo.init_populations();
        coevo.coevolve();
Пример #2
0
def main(RUNS = 10, numH = 5):
    """
    Parkinsons
    """

    # try:
    print ">>STARTING...";
    lb_bench = lab_bencmark();
    D = lb_bench.parkinsons();
    D['name'] = 'Parkinsons';
    DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS);
    netConfig = {'numI': 22,
                 'numO': 1,
                 'numH': numH
    };
    for ri in xrange(RUNS):
        print ">>>> RUN {0} of {1}".format(ri, RUNS);
        print "ON :", D['NAME'];
        coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = D['NAME'],
                                            train_set = DCrossVal[ri][0],
                                            valid_set = DCrossVal[ri][1],
                                            test_set = D['test'],
                                            netConfig = netConfig);
        coevo.init_populations();
        coevo.coevolve();
Пример #3
0
def main(RUNS=10, numH=2):
    """
    Lung Cancer
    """
    FOLDS = 2
    # try:
    print ">>STARTING..."
    for i in xrange(RUNS):

        lb_bench = lab_bencmark()
        D = lb_bench.lung_cancer()
        D["name"] = "Lung_cancer"
        DCrossVal = kfold.kfold(D=D["train"], numFolds=FOLDS)
        netConfig = {"numI": 56, "numO": 1, "numH": numH}
        for ri in xrange(FOLDS):
            print ">>>> RUN {0} of {1}".format(ri, RUNS)
            print "ON :", D["NAME"]
            coevo = ndmCoevoOptim.ndmCoevoOptim(
                dataset_name=D["NAME"],
                train_set=DCrossVal[ri][0],
                valid_set=DCrossVal[ri][1],
                test_set=D["test"],
                netConfig=netConfig,
            )
            coevo.init_populations()
            coevo.coevolve()
Пример #4
0
def test_iris(RUNS = 10):
    """
    test for the iris dataset
    """

    lab_data = lab_bencmark();

    print ">>>IRIS";

    D = lab_data.iris();
    DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS);
    netConfig = {'numI': 4,
                 'numO': 1,
                 'numH': 2
    };
    for ri in xrange(RUNS):
        coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = 'IRIS',
                                            train_set = DCrossVal[ri][0],
                                            valid_set = DCrossVal[ri][1],
                                            test_set = D['test'],
                                            netConfig = netConfig);
        #disable random inject
        coevo.params['randomNodesInject'] = False;
        coevo.init_populations();
        coevo.coevolve();

    del lab_data;
Пример #5
0
def test_glass(RUNS = 10):
    """
    GLASS
    """

    proben = proben1();
    D = proben.glass();
    DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS);
    netConfig = {'numI': D['test']['INFO']['num_inputs'],
                 'numO': D['test']['INFO']['num_outputs'],
                 'numH': 2
    };
    for ri in xrange(RUNS):
        coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name =  D['name'],
                                            train_set = DCrossVal[ri][0],
                                            valid_set = DCrossVal[ri][1],
                                            test_set = D['test'],
                                            netConfig = netConfig);

        #disable random inject
        coevo.params['randomNodesInject'] = False;

        coevo.init_populations();
        m = coevo.coevolve();


    del proben;
Пример #6
0
def main(RUNS = 10, numH = 2):
    """
    Card
    """

    try:
        print ">>STARTING...";
        proben = proben1();
        D = proben.australian_cc();

        DCrossVal = kfold.kfold(D = D['train'], numFolds = RUNS);
        netConfig = {'numI': D['train']['INFO']['num_inputs'],
                 'numO': D['train']['INFO']['num_outputs'],
                 'numH': numH
         };
        for ri in xrange(RUNS):
            print ">>>> RUN {0} of {1}".format(ri, RUNS);
            print "ON :", D['name'];
            coevo = ndmCoevoOptim.ndmCoevoOptim(dataset_name = D['name'],
                                            train_set = DCrossVal[ri][0],
                                            valid_set = DCrossVal[ri][1],
                                            test_set = D['test'],
                                            netConfig = netConfig);
            coevo.init_populations();
            coevo.coevolve();

        #send notification
        #notify.noticeEMail(D['name']+' DONE');
    except:
        """ """
        print "ERROR";
Пример #7
0
def decision_tree(frame):
    port_DATA = frame

    # instantiate encoder
    lb = LabelEncoder()

    # make a copy of the dataset
    port_DATA_copy = port_DATA.copy()

    # set up a list to replace the action categorical values with numerical ones
    replace_list = {'Action': {'allow': 0, 'deny': 1, 'drop': 2, 'reset-both': 3}}

    # replace the values
    port_DATA_copy.replace(replace_list, inplace=True)

    # Select our Independent Features
    feature = ['Source Port', 'Destination Port', 'Packets', 'pkts_received', 'Bytes', 'Bytes Received']

    # Set x values to the independent features
    X = port_DATA_copy[feature]

    # set y values to the target feature
    Y = port_DATA_copy['Action']

    # set up our test and train values with sklearn. Test size will be 30% of the data
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1)

    # Set up our Decision Tree Classifier
    tree = DecisionTreeClassifier(criterion="entropy", max_depth=5)

    # Fit our training data to the classifier
    tree = tree.fit(X_train, Y_train)

    # use the prediction function to make a prediction based on the x test set
    predict = tree.predict(X_test)

    # use the y test set with the predictions based off of the x test set to find an accuracy percentage
    print("Accuracy of Split Test Model: ", metrics.accuracy_score(Y_test, predict))

    # display the tree
    print_tree(tree, feature)

    # create and display confusion matrix
    confusion.confusionMatrix(Y_test, predict)

    # Resample to Evaluate the Model
    x_train, x_test, y_train, y_test = kfold.kfold(port_DATA_copy)

    # retrain model with the kfold cross validation sets
    tree.fit(x_train, y_train)

    # predict the accuracy of the tree with the kfold sets
    pred = tree.predict(x_test)

    # print the decision tree generated by the kfold sets
    print_tree(tree, feature)

    # evaluate accuracy of model with the kfold set
    print("Accuracy of KFold Test Model: ", metrics.accuracy_score(y_test, pred))
Пример #8
0
def grid_search(model, params, folds):
  '''plt.plot(C,C_accuracies)
  plt.title("gama vs Accuracy for gamma:"+str(gama))
  plt.xlabel("C")
  plt.ylabel("Accuracy")
  plt.show()
  Grid Search takes four arguments, model which can be 'ovr' or 'ovo',
  params is a dictionary with keys gammas and C,
  folds gives the number of folds,
  and type of kernel
  '''
  
  gammas = params['gamma']
  C = params['C']
  kernel = params['kernel']
  folds = kfold(df,5,True)
  max_acc = 0
  best_C = None
  best_gamma = None
  #  gama_accuracies = []
  for gama in gammas:
      C_accuracies =[]
      for cs in C:
        accuracies = []
        i=0
        for fold in folds:
          test_fold_df = df.iloc[fold,:]    # Create a dataframe with with the index values which is for the fold
          #GET X AND y FROM DATAFRAME
          train_fold_df = df.drop(fold, axis=0) # get all rows in training set for fold which are not in test set for the fold
          #GET TRAINING X AND y
          X_train = train_fold_df.drop(['label'],axis=1)
          y_train = train_fold_df.filter(['label']).to_numpy()
          #GET TESTING X AND y
          X_test = test_fold_df.drop(['label'],axis=1)
          y_test = test_fold_df.filter(['label']).to_numpy()
          sv = MSVM(model,cs,gama,kernel)
          sv.fit(X_train,y_train) #FITS THE MODEL
          pred = sv.predict(X_test) #MAKE PREDICTIONS
          acc = measure_accuracy(y_test,pred)# MEASURES ACCURACY USING USER DEFINED FUNCTION
          accuracies.append(acc)  
          print("Accuracy for Gamma:",gama," and C:",cs," and Fold: ",i+1," is:",acc)
          i+=1
        accuracies = np.array(accuracies) #NOW CALCULATE MEAN ACCURACY FOR ALL FOLDS and GIVEN GAMMA AND C
        print("MEAN ACCURACY FOR GAMMA:",gama," and C:",cs," is ",np.mean(accuracies))
        if max_acc < np.mean(accuracies):
          max_acc = np.mean(accuracies)
          best_C = cs
          best_gamma = gama
        C_accuracies.append(np.mean(accuracies)) 
      plt.plot(C,C_accuracies)
      plt.title("C vs Accuracy for gamma:"+str(gama))
      plt.xlabel("C")
      plt.ylabel("Accuracy")
      plt.show()
  print("BEST ACCURACY: ",max_acc," FOR C:",best_C," AND GAMMA:",best_gamma)
Пример #9
0
def main(RUNS=10, numH=2):
    """
    Horse
    """

    # try:
    print ">>STARTING..."
    proben = proben1()
    D = proben.horse()

    DCrossVal = kfold.kfold(D=D["train"], numFolds=RUNS)
    netConfig = {"numI": D["train"]["INFO"]["num_inputs"], "numO": D["train"]["INFO"]["num_outputs"], "numH": numH}
    for ri in xrange(RUNS):
        print ">>>> RUN {0} of {1}".format(ri, RUNS)
        print "ON :", D["name"]
        coevo = ndmCoevoOptim.ndmCoevoOptim(
            dataset_name=D["name"],
            train_set=DCrossVal[ri][0],
            valid_set=DCrossVal[ri][1],
            test_set=D["test"],
            netConfig=netConfig,
        )
        coevo.init_populations()
        coevo.coevolve()
Пример #10
0
import kfold;
import profile;
import visualisation.visualiseOutputs2D as vis2d;
from PyQt4 import QtCore, QtGui
from visualiseNDMNet import *;

coevo = ndmCoevoOptim.ndmCoevoOptim();
errors_train =[];
errors_test = [];

benchmark = proben1();
lab_bencmark = lab_bencmark();
K  = 10;

# D = kfold.kfold(D = benchmark.mushroom()['train'],numFolds = K);
D2 = kfold.kfold(D = lab_bencmark.iris()['train'],numFolds = K);
for i in xrange(1):

    print ">>>", i;

    coevo.init_populations();
    # coevo.train_set = D2[i][0];
    # coevo.validation_set = D2[i][1];
    profile.run("coevo.coevolve()");






Пример #11
0
# coding: utf-8
get_ipython().magic(u'cd rrna/src')
import numpy as np
mean_pair_probs = np.load(
    "../data/rnafold_results/rnafold_mean_pair_probs.npy")
rrna_pair_probs = np.load(
    "../data/rnafold_results/rnafold_rrna_pair_probs.npy")
mean_pair_probs.shape
rrna_pair_probs.shape
labels = np.zeros(mean_pair_probs.shape[0] + rrna_pair_probs[0])
labels = np.zeros(mean_pair_probs.shape[0] + rrna_pair_probs.shape[0])
labels.shape
for i in range(mean_pair_probs.shape[0], -1):
    print i

for i in range(mean_pair_probs.shape[0], labels.shape[0] - 1):
    labels[i] = 1

np.count_nonzero(labels)
for i in range(mean_pair_probs.shape[0] - 1, labels.shape[0] - 1):
    labels[i] = 1

np.count_nonzero(labels)
pair_probs = np.hstack(mean_pair_probs, rrna_pair_probs)
pair_probs = np.hstack([mean_pair_probs, rrna_pair_probs])
pair_probs = np.vstack([mean_pair_probs, rrna_pair_probs])
import kfold
kfold.kfold(labels, pair_probs)
get_ipython().magic(u'save')
Пример #12
0
# coding: utf-8
get_ipython().magic(u'cd rrna/src')
import kfold
import numpy as np
rrna_pair_probs = np.load("/projects/bio/rrna/data/rnafold_results/rrna_by1_pair_probs.npy") # these will change for mouse
not_rrna_pair_probs = np.load("/projects/bio/rrna/data/rnafold_results/not_rrna_mean_pair_probs.npy")
not_rrna_pair_probs.shape
rrna_pair_probs.shape
kfold.kfold(rrna_pair_probs, not_rrna_pair_probs, save_folder="/projects/bio/rrna/data/rnafold_results/", n_partitions=10, sampling="under")
kfold.kfold(rrna_pair_probs, not_rrna_pair_probs, save_folder="/projects/bio/rrna/data/rnafold_results/", n_partitions=10, sampling="over")
Пример #13
0
def do_training_testing(clf, X, y, filename, show=False):
    """
	fungsi untuk melakukan training dan testing
	baik itu dengan atau tanpa resampling
	return per_clf: model terbaik dari masing-masing fitur
	
	parameter:
	clf = array object classifier
	X = data per jenis fitur
	y = label dari data
	kf = object K-Fold
	show = boolean, untuk mencetak proses pencarian model terbaik
	"""

    try:
        os.remove(filename)

    except OSError:
        pass

    first_row = ['Clf-Fitur']
    for i in range(10):
        first_row.append('Fold ' + str(i + 1))
    first_row.append('Avg')
    with open(filename, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(first_row)

    per_clf = {}

    train_indices_all, test_indices_all = kfold(y, n_splits=10)
    kf = np.array(list(zip(train_indices_all, test_indices_all)))

    for c in clf:  # untuk masing-masing jenis classifier
        if c == 'gauss_nb':
            continue

        for index, fitur in enumerate(X):  # untuk masing-masing jenis fitur
            y_train = y

            c1 = False
            if c == 'multi_nb' and fitur == 'tfidf':
                c1 = True
                c = 'gauss_nb'

            if show:  # show process
                print('\t', c, fitur)
                per_clf[(c, fitur)] = get_best_model(X[fitur],
                                                     y_train,
                                                     clf[c],
                                                     kf,
                                                     c,
                                                     fitur,
                                                     filename,
                                                     show=True)
            else:
                per_clf[(c, fitur)] = get_best_model(X[fitur], y_train, clf[c],
                                                     kf, c, fitur, filename)

            if c1:
                c = 'multi_nb'

    return per_clf