test_data_file = pd.read_csv('test.csv')

features_train, labels_train = train_data(train_data_file)
features_valid, labels_valid = valid_data(train_data_file)

features_test = test_data(test_data_file)

nn = Classifier(layers=[
    Layer("Sigmoid", units=429),
    Layer("Sigmoid", units=300),
    Layer("Sigmoid", units=150),
    Layer("Softmax", units=92)
],
                n_iter=1,
                n_stable=40,
                batch_size=25,
                learning_rate=0.003,
                learning_rule="momentum",
                valid_size=0.25,
                regularize="L2",
                normalize="weights",
                weight_decay=0.0001,
                loss_type="mcc",
                verbose=1)

nn.fit(features_train, labels_train)

predicts1 = nn.predict(features_valid)
correctness = correct_rate(predicts1, labels_valid)
print correctness
示例#2
0
logging.info('%s place names and %s other words' % (len(places), len(non_places)))

place_vectors = repvecs(places, nlp)
non_place_vectors = repvecs(non_places, nlp)

logging.info('%s place name vectors and %s other word vectors' % (len(place_vectors), len(non_place_vectors)))

place_outputs = [1] * len(place_vectors)
non_place_outputs = [0] * len(non_place_vectors)

x = place_vectors + non_place_vectors
y = place_outputs + non_place_outputs

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=984)

logging.info('training classifier')

clf = Classifier(
  layers=[
    Layer("Rectifier", units=100),
    Layer("Softmax")
  ],
  regularize='dropout',
  dropout_rate=0.5,
  learning_rate=0.02,
  n_iter=20)
clf.fit(np.asarray(x_train), np.asarray(y_train))

print(classification_report(np.asarray(y_test), clf.predict(np.asarray(x_test))))
示例#3
0
        test_size=1.0/7.0, random_state=1234)


classifiers = []

if 'sknn' in sys.argv:
    from sknn.platform import gpu32
    from sknn.mlp import Classifier, Layer, Convolution

    clf = Classifier(
        layers=[
            # Convolution("Rectifier", channels=10, pool_shape=(2,2), kernel_shape=(3, 3)),
            Layer('Rectifier', units=200),
            Layer('Softmax')],
        learning_rate=0.01,
        learning_rule='nesterov',
        learning_momentum=0.9,
        batch_size=300,
        valid_size=0.0,
        n_stable=10,
        n_iter=10,
        verbose=True)
    classifiers.append(('sknn.mlp', clf))

if 'nolearn' in sys.argv:
    from sknn.platform import gpu32
    from nolearn.lasagne import NeuralNet, BatchIterator
    from lasagne.layers import InputLayer, DenseLayer
    from lasagne.nonlinearities import softmax
    from lasagne.updates import nesterov_momentum
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=0)

# scale the x_train dataset under the l2-norm
X_trainn = preprocessing.normalize(X_train, norm='l2')
# scale the x_test dataset under the l2-norm
X_testn = preprocessing.normalize(X_test, norm='l2')

# scale the x_trainn dataset around the mean
X_trainn = preprocessing.scale(X_trainn)
#scale the x_testn dataset around the mean
X_testn = preprocessing.scale(X_testn)

# use a classification neural network to create predictive model
clsfr = Classifier(
            layers=[
            # Rectifier is used for both nonlinear input activation layers using 13 units
            Layer("Rectifier", units=13),   
            Layer("Rectifier", units=13),   
            ''' Softmax is used as the linear output activation layer - form of linear regression
                 using mutually exclusive multi-class classification responses'''
            Layer("Softmax")],
            # learning rate parameter set at 0.001
            learning_rate=0.001,
            # learning rule using the stochastic gradient descent to minimize the objective function
            learning_rule='sgd',
            # random seed set for classification model
            random_state=201,
            # max number of iterations used to develop model (n_iter = epoch)
            n_iter=200)

# predictive model fit around the training set, evaluated over both the scaled x and unscaled y
model1=clsfr.fit(X_trainn, y_train)
示例#5
0
ipt_closed = f.read()
f.close()
ipt_closed = ipt_closed.split("\n")

for i in range(0, len(ipt_closed) - 1):
    ipt_closed[i] = ipt_closed[i].strip("[]").split(",")
    ipt_closed[i][0] = int(ipt_closed[i][0])
    ipt_closed[i][1] = int(ipt_closed[i][1])
    opt.append(0)

ipt = ipt_open[:-1] + ipt_closed[:-1]
ipt = np.asarray(ipt)
opt = np.asarray(opt)
print ":" + str(len(ipt))
print len(opt)

nn = Classifier(layers=[
    Layer("Softmax", units=2),
    Layer("Softmax", units=2),
    Layer("Softmax", units=2)
],
                learning_rate=0.05,
                n_iter=15)

nn.fit(ipt, opt)

a = np.asarray([[4, 30], [2, 30], [6, 300], [4, 300]])
# a =a.reshape(2,-1)
op = nn.predict(a)

print op
示例#6
0
文件: main.py 项目: saav/eth-lis-s16
print "Creating data\n"
# Grab the correct indices from the training data
X = train.ix[:, 1:129].as_matrix()
y = train.ix[:, 0:1].as_matrix()

A = train_unlabeled.ix[:, 0:128].as_matrix()

from sknn.mlp import Classifier, Layer

# This is the important stuff to adjust
print "Creating classifier\n"
nn = Classifier(layers=[
    Layer('Tanh', units=128),
    Layer('Sigmoid', units=128),
    Layer('Softmax', units=10)
],
                learning_rate=.04,
                n_iter=85,
                batch_size=10)
"""
Uncomment to actually train whole data and write file
"""
outfile = open('output.csv', 'w')  # change the file name
writer = csv.writer(outfile)
writer.writerow(['Id', 'y'])
print "About to fit\n"
nn.fit(X, y)
print "About to predict"
b = nn.predict(A)
nn.fit(A, b)
prediction = nn.predict(test.as_matrix())
示例#7
0
import numpy
from sknn.mlp import Classifier, Layer
X = numpy.array([[0, 1], [0, 0], [1, 0]])
print(X.shape)
y = numpy.array([[1], [0], [2]])
print(y.shape)
nn = Classifier(layers=[Layer("Sigmoid", units=2),
                        Layer("Sigmoid", units=3)],
                n_iter=10)
nn.fit(X, y)
示例#8
0
with open('label_val_sparse_mat.dat', 'rb') as infile:
    labels_val = pickle.load(infile)

labels = labels.transpose()
labels_val = labels_val.transpose()

labels = labels.toarray()
labels_val = labels_val.toarray()

################################# classifier 1######################
nn = Classifier(layers=[Layer("Tanh", units=100),
                        Layer("Softmax")],
                learning_rate=0.02,
                n_iter=50,
                batch_size=100,
                n_stable=20,
                debug=True,
                valid_set=(train_val, labels_val),
                verbose=True)

nn.fit(train, labels)

################################# classifier 1######################
nn = Classifier(layers=[Layer("Tanh", units=200),
                        Layer("Softmax")],
                learning_rate=0.02,
                n_iter=50,
                batch_size=100,
                n_stable=20,
                debug=True,
示例#9
0
eta = 0.001
iters = 45

rootdir = os.getcwd()
if not os.path.exists('sklearnTry'):
    os.makedirs('sklearnTry')
newdir = os.path.join(rootdir, 'sklearnTry')
fout = open(os.path.join(newdir, 'NeuralNetsOut.txt'), 'w+')

train_features, train_labels, test_features, test_labels, test_keys = GetData()

model = Classifier(
    layers=[Layer("Sigmoid", units=50),
            Layer("Softmax")],
    learning_rate=eta,
    n_iter=iters,
    weight_decay=0.00001,
    warning=None
)  #MPLClassifier(alpha = 1e-05, hidden_layer_sizes= (15,), epsilon = 1e-08)

gs = GridSearchCV(model,
                  param_grid={
                      'learning_rate': [0.005, 0.001, 0.0002],
                      'hidden0__units': [8, 25, 40, 45, 50],
                      'hidden0__type':
                      ["Rectifier", "Sigmoid", "Tanh", "ExpLin"],
                      'weight_decay': [0.00001, 0.001, 0.0001],
                      'output__type': ["Sigmoid", "Softmax"]
                  })
gs.fit(train_features, train_labels)
pred = gs.predict(test_features)
示例#10
0
def model_fitting(train_set, train_labels, classifier_name,
                  n_jobs=cpu_count()):
    """
    The fitting process with sklearn algorithms.
    :param train_set: numpy array, required
    :param train_labels: list, required
    :param classifier_name: string, required
    :param n_jobs: integer, required
    :return: object
        - Fit classifier model according to the given training data
    """
    classifier_list = {
        "svm_linear":
        SVC(probability=True, kernel='linear', C=1.0),
        "svm_poly":
        SVC(probability=True, kernel='poly', C=1.0),
        "svm_rbf":
        SVC(probability=True, kernel='rbf', C=1.0, gamma=0.01),
        "linear_svc":
        LinearSVC(penalty='l2',
                  loss='squared_hinge',
                  dual=True,
                  tol=0.1,
                  C=1.0,
                  multi_class='ovr',
                  fit_intercept=True,
                  intercept_scaling=1,
                  random_state=None,
                  max_iter=3000),
        "knn":
        KNeighborsClassifier(n_neighbors=100,
                             weights='distance',
                             leaf_size=30,
                             n_jobs=n_jobs),
        "random_forests":
        RandomForestClassifier(n_estimators=350,
                               criterion='entropy',
                               min_samples_split=2,
                               min_samples_leaf=1,
                               max_leaf_nodes=600,
                               n_jobs=n_jobs),
        "logistic_regression":
        LogisticRegression(penalty='l2',
                           dual=False,
                           tol=0.0001,
                           C=2.4,
                           fit_intercept=True,
                           intercept_scaling=1,
                           random_state=None,
                           solver='liblinear',
                           max_iter=1000,
                           multi_class='ovr',
                           warm_start=False,
                           n_jobs=n_jobs),
        "decision_trees":
        DecisionTreeClassifier(criterion='gini',
                               splitter='best',
                               max_depth=None,
                               min_samples_split=2,
                               min_samples_leaf=100,
                               min_weight_fraction_leaf=0.0,
                               max_features=None,
                               random_state=None,
                               max_leaf_nodes=None,
                               presort=False),
        "sgd":
        SGDClassifier(alpha=.0001,
                      n_iter=500,
                      penalty="elasticnet",
                      n_jobs=n_jobs),
        "neural_network":
        Classifier(layers=[
            Layer("Sigmoid", units=14),
            Layer("Sigmoid", units=13),
            Layer("Sigmoid", units=12),
            Layer("Sigmoid", units=10),
            Layer("Softmax")
        ],
                   learning_rate=0.01,
                   n_iter=200,
                   batch_size=10,
                   regularize='L1',
                   n_stable=50,
                   dropout_rate=0,
                   verbose=True),
        "GBC":
        GradientBoostingClassifier(max_depth=10,
                                   max_leaf_nodes=850,
                                   min_samples_leaf=15,
                                   learning_rate=0.1),
        "XGB":
        XGBClassifier(base_score=0.5,
                      colsample_bylevel=1,
                      colsample_bytree=1,
                      gamma=0,
                      learning_rate=0.1,
                      max_delta_step=0,
                      max_depth=10,
                      min_child_weight=2,
                      missing=None,
                      n_estimators=100,
                      nthread=n_jobs,
                      reg_alpha=0,
                      objective='binary:logistic',
                      reg_lambda=1,
                      scale_pos_weight=1,
                      seed=0,
                      silent=True,
                      subsample=1)
    }
    return classifier_list[classifier_name].fit(train_set, train_labels)
示例#11
0
        for hu in hidden_units:
            for ni in n_iters:
                for ii in range(1):
                    print('learning_rate = ', lrt, 'learning_rule = ', lr,
                          'hidden_units = ', hu, 'n_iters = ', ni, '## = ', ii)

                    # Train & Test
                    X_train, X_test, y_train, y_test = train_test_split(
                        X, y, test_size=0.3)

                    # ====================================================

                    clf = Classifier(layers=[
                        Layer('Sigmoid', units=hu),
                        Layer('Softmax', units=2)
                    ],
                                     learning_rule=lr,
                                     learning_rate=lrt,
                                     n_iter=ni)

                    startTime = datetime.now()

                    clf.fit(X_train, y_train)

                    endTime = datetime.now()

                    y_score = clf.predict_proba(X_test)
                    y_hat = clf.predict(X_test)
                    ys = [y_s[y_h] for y_s, y_h in zip(y_score, y_hat)]

                    tmp = np.append(X_test,
示例#12
0
def get_nn_pck(X_train,
               X_test,
               y_train,
               y_test,
               c1=16,
               k1=9,
               p1=2,
               c2=14,
               k2=7,
               p2=2,
               c3=10,
               k3=3,
               p3=2):
    currentTime = str(
        time.strftime('%Y%m%d %H%M%S', time.localtime(time.time())))
    dirPath = currentPath + currentTime + 'canshu' + '%d-%d-%d-%d-%d-%d-%d-%d-%d' % (
        c1, k1, p1, c2, k2, p2, c3, k3, p3) + "/"
    excelPath = dirPath + "resLog.xlsx"
    os.mkdir(dirPath)
    # 创建一个excel文件,当前时间命名
    workbook = xlsxwriter.Workbook(excelPath)
    # 创建一个工作表对象
    worksheet = workbook.add_worksheet()
    worksheet.write("A1", "epochs")
    worksheet.write("B1", "Train-Score")
    worksheet.write("C1", "Test-Score")
    result = []
    for i in range(1, 10):
        nn = Classifier(
            layers=[
                Convolution('Rectifier',
                            channels=c1,
                            kernel_shape=(k1, k1),
                            border_mode='full',
                            pool_shape=(p1, p1)),
                # border_mode = 'full',没有stride
                Convolution('Rectifier',
                            channels=c2,
                            kernel_shape=(k2, k2),
                            border_mode='full',
                            pool_shape=(p2, p2)),
                # border_mode = 'full',没有stride
                Convolution('Rectifier',
                            channels=c3,
                            kernel_shape=(k3, k3),
                            border_mode='full',
                            pool_shape=(p3, p3)),
                Layer(
                    'Rectifier',
                    units=32,
                ),
                Layer(
                    'Rectifier',
                    units=32,
                ),
                Layer('Softmax', units=2)
            ],
            learning_rule="sgd",
            learning_rate=0.015,
            learning_momentum=0.9,
            weight_decay=0.001,
            n_iter=i,
            n_stable=10,
            f_stable=0.001,
            valid_size=0.1,
            verbose=True)
        nn.fit(X_train, y_train)

        pickle.dump(nn, open(dirPath + "nn" + str(i) + ".pkl", 'wb'))

        worksheet.write("A" + str(i + 1), i)
        worksheet.write("B" + str(i + 1), nn.score(X_train, y_train))
        worksheet.write("C" + str(i + 1), nn.score(X_test, y_test))
        result.append(nn.score(X_test, y_test))
    workbook.close()
    return max(result)
示例#13
0
# Load the data and split it into subsets for training and testing.
digits = datasets.load_digits()
X = digits.images
y = digits.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)


# Create a neural network that uses convolution to scan the input images.
nn = Classifier(
    layers=[
        Convolution('Rectifier', channels=12, kernel_shape=(3, 3), border_mode='full'),
        Convolution('Rectifier', channels=10, kernel_shape=(3, 3), border_mode='valid'),
        Convolution('Rectifier', channels=4, kernel_shape=(3, 3), border_mode='valid'),
        Layer('Rectifier', units=64),
        Layer('Softmax')],
    learning_rate=0.002,
    valid_size=0.2,
    n_stable=10,
    verbose=True)

nn.fit(X_train, y_train)


# Determine how well it does on training data and unseen test data.
print('\nTRAIN SCORE', nn.score(X_train, y_train))
print('TEST SCORE', nn.score(X_test, y_test))

y_pred = nn.predict(X_test)
示例#14
0
def neuralCombo(data):
    pipeline = Pipeline([('min/max scaler',
                          MinMaxScaler(feature_range=(0.0, 1.0))),
                         ('nn',
                          Classifier(layers=[
                              Layer("Rectifier", units=100),
                              Layer("Sigmoid", units=100),
                              Layer("Softmax")
                          ],
                                     n_iter=25))])

    learningRate = [0.05, 0.005, 0.001, 0.0001, 0.00001]
    units = [5, 50, 100, 200]
    type = [
        'Rectifier', 'Sigmoid', 'Sigmoid', 'Tanh', 'Linear', 'Softmax',
        'Gaussian'
    ]
    #type = ['Rectifier', 'Linear', 'Gaussian']
    iterations = [25, 50, 100, 200]

    best = {}
    best['learningRate'] = 0.05
    best['units'] = 4
    best['type'] = 'Rectifier'
    best['iterations'] = 5
    best['trainingAccuracy'] = 0.0

    for l in learningRate:
        for i in iterations:
            for type0 in type:
                for u0 in units:

                    pipeline = Pipeline([
                        ('min/max scaler',
                         MinMaxScaler(feature_range=(0.0, 1.0))),
                        ('nn',
                         Classifier(
                             layers=[Layer(type0, units=u0),
                                     Layer("Softmax")],
                             n_iter=i))
                    ])

                    best = testModel(data, pipeline, best, l, u0, type0, i)

                    for type1 in type:
                        for u1 in units:

                            pipeline = Pipeline([
                                ('min/max scaler',
                                 MinMaxScaler(feature_range=(0.0, 1.0))),
                                ('nn',
                                 Classifier(layers=[
                                     Layer(type0, units=u0),
                                     Layer(type1, units=u1),
                                     Layer("Softmax")
                                 ],
                                            n_iter=i))
                            ])

                            best = testModel(data, pipeline, best, l,
                                             str(u0) + "," + str(u1),
                                             type0 + "," + type1, i)

                            for type2 in type:
                                for u2 in units:

                                    pipeline = Pipeline([
                                        ('min/max scaler',
                                         MinMaxScaler(feature_range=(0.0,
                                                                     1.0))),
                                        ('nn',
                                         Classifier(layers=[
                                             Layer(type0, units=u0),
                                             Layer(type1, units=u1),
                                             Layer(type2, units=u2),
                                             Layer("Softmax")
                                         ],
                                                    n_iter=i))
                                    ])

                                    best = testModel(
                                        data, pipeline, best, l,
                                        str(u0) + "," + str(u1) + "," +
                                        str(u2),
                                        type0 + "," + type1 + "," + type2, i)

    print "bestOverall===================================="
    print "trainingAccuracy" + " = " + str(best['trainingAccuracy'])
    print "learningRate" + " = " + str(best['units'])
    print "units" + " = " + str(best['type'])
    print "type" + " = " + str(best['iterations'])
    print "iterations" + " = " + str(best['learningRate'])
示例#15
0
文件: sent_mr.py 项目: PCJohn/Walrus
    X = np.vstack((pX, nX))
    Y = nn.predict(X)
    print Y[:3]
    print Y[-3:]


def clean(x):
    return ''.join([t for t in x.strip() if ord(t) < 128])


#_________MAIN___________#
g = load_glove(glove_path)
print 'GloVe loaded...'
X, Y = load_ds(g, senti_path)

nn = Classifier(layers=[
    Convolution('Rectifier', channels=1, kernel_shape=(1, embed_dim)),
    Layer('Rectifier', units=400),
    Layer('Tanh', units=600),
    Layer('Softmax')
],
                learning_rate=0.001,
                verbose=True)

train(nn, X, Y, model_path, 100, 25)

#___TEST___#
nn = pickle.load(open(model_path, 'r'))
print 'Model loaded...'
test_ds(g, nn, senti_path)
示例#16
0
文件: scikitNN.py 项目: jzpang/PyCorn
y_train, y_test = y[:trainCount], y[trainCount:]

print x_train,
print x_test,
print y_train,
print y_test

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

pipeline = Pipeline([
    ('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))),
    ('neural network',
     Classifier(layers=[Layer("Tanh", units=128),
                        Layer("Softmax", units=2)],
                n_iter=25,
                learning_rate=0.001,
                verbose=True))
])
pipeline.fit(x_train, y_train)

y_test2 = pipeline.predict(x_test)

tpr = [0.0]
fpr = [0.0]

positives = float(np.count_nonzero(y_test))
negatives = float(len(y_test) - positives)

tpCount = 0
fpCount = 0
示例#17
0
    elif f == 1:
        single_inside = matrix

    elif f == 2:
        double_outside = matrix

    elif f == 3:
        double_inside = matrix

frames = [single_outside, single_inside, double_outside, double_inside]
results = pd.concat(frames)
x = np.array(results.drop(['label'], 1))
x = preprocessing.scale(x)
y = np.array(results['label'])
# machine learning
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
    x, y, test_size=0.2)
# clf = MLPClassifier(solver='lbgfs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
clf = Classifier(
    layers=[Layer("Maxout", units=100, pieces=2),
            Layer("Softmax")],
    learning_rate=0.001,
    n_iter=25)
clf.fit(x_train, y_train)
# accuracy = clf.score(x_test, y_test)
print clf.predict(x_test)
print y_test
print clf.score(x_test, y_test)
with open('gesture_recognizeSVM_NN.pickle', 'wb') as f:
    pickle.dump(clf, f)
print(X.shape)
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(
    X, Y, test_size=0.2, random_state=0)

#Bernoaulli based RBM with number of units are 400(Conpresssed features get from 786 features)
rbm = BernoulliRBM(n_components=400,
                   learning_rate=0.01,
                   batch_size=10,
                   n_iter=10,
                   verbose=True,
                   random_state=None)

#Create a neural network that uses convolution to scan the input images with two fully connected layer
nn = Classifier(layers=[
    Convolution('Tanh', channels=20, kernel_shape=(5, 5), border_mode='valid'),
    Layer('Sigmoid', units=100),
    Layer('Softmax')
],
                learning_rate=0.002,
                valid_size=0.2,
                n_stable=5,
                verbose=True)

classifier = Pipeline(steps=[('rbm', rbm), ('cnn', nn)])

###############################################################################
# Training RBM-CNN Pipeline
classifier.fit(X_train, Y_train)
Y_pred = classifier.predict(X_test)
print('Score:  ', (metrics.classification_report(Y_test, Y_pred)))
示例#19
0
def mlp_kernel_constructor(kernel_option):
    return lambda: Classifier(**kernel_option)
示例#20
0
    testingSetLabels,
    probNB[:, 0])  #true positive rate, false positive rate (ROC curve)

print "Time = ", time.time() - startTime, "seconds"

startTime = time.time()

print
print

#------------------------NN---------------------------
print "Neural Network Classifer"

nn = Classifier(
    layers=[Layer("Sigmoid", units=100),
            Layer("Softmax")],
    learning_rate=0.00018,  #valid_set = ((X_valid, y_valid))
    n_iter=1000)
print "Neural network specifications:"
print nn

nn.fit(trainingSet, trainingSetLabels)

score1 = nn.score(trainingSet, trainingSetLabels)

score3 = nn.score(testingSet, testingSetLabels)

print "Training accuracy = ", score1

print "Testing accuracy = ", score3
示例#21
0
    learningRateAE = set[2]
    learningRateCL = set[3]
    nCyclesAE = set[4]
    nCyclesCL = set[5]
    outputFileNameAE = 'trained_fine/ae_' + str(hiddenUnitsAE) + '_' + str(
        learningRateAE) + '_' + str(nCyclesAE) + '.pkl'
    outputFileNameCL = 'trained_fine/cl_' + str(hiddenUnitsCL) + '_' + str(
        learningRateCL) + '_' + str(nCyclesCL) + '.pkl'

    # AUTOENCODER
    if (runAE == True):
        nn = Regressor(
            layers=[Layer("Rectifier", units=hiddenUnitsAE),
                    Layer("Linear")],
            learning_rate=learningRateAE,
            n_iter=nCyclesAE)
        nn.fit(X_train_background, Y_autoencoder)
        pickle.dump(nn, open(outputFileNameAE, 'wb'))

    # CLASSIFIER
    if (runCL == True):
        nn = Classifier(
            layers=[Layer("Rectifier", units=hiddenUnitsCL),
                    Layer("Softmax")],
            learning_rate=learningRateCL,
            n_iter=nCyclesCL)
        nn.fit(X_train, Y)
        pickle.dump(nn, open(outputFileNameCL, 'wb'))

    counter = counter + 1
示例#22
0
    print X.shape
    print Y.shape
    print '______'
    train(nn, X, Y, './senti_model', 20, 5)


#_________MAIN___________#
g = load_ds(glove_path)
print 'GloVe loaded...'

nn = Classifier(
    layers=[
        #Convolution('Rectifier',channels=1,kernel_shape=(3,embed_dim)),
        Layer('Rectifier', units=96),
        Layer('Rectifier', units=128),
        Layer('Rectifier', units=256),
        Layer('Rectifier', units=128),
        Layer('Rectifier', units=96),
        Layer('Softmax')
    ],
    learning_rate=0.001,
    verbose=True)

train_csv(g, nn, '/mnt/share/Senti_csv/Sentiment Analysis Dataset.csv', 100000)

#_______TESTING_________#
nn = pickle.load(open('./senti_model', 'r'))
while True:
    sent = raw_input('Enter text:')
    if len(sent) > 0:
        if sent == 'quit':
            break
示例#23
0
def plotation(clf_list):
    nlines = len(clf_list)
    plt.figure(figsize=(20, 10 * nlines))
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    X_train_plot = np.transpose([np.transpose(X_train)[i] for i in (4, 5)])
    Nlvl = 5
    c = 1
    mlp_Reg_type = type(
        Regressor(layers=[Layer("Rectifier", name="hiddenN")],
                  learning_rate=0.02,
                  n_iter=10))
    mlp_Cla_type = type(
        Classifier(layers=[Layer("Rectifier", name="hiddenN")],
                   learning_rate=0.02,
                   n_iter=10))
    robust_scaler = False
    for _, clf in enumerate(clf_list):
        if hasattr(clf, "predict_proba"):
            print("Classifieur")
            if type(clf) == mlp_Cla_type:
                robust_scaler = sklearn.preprocessing.RobustScaler()
                X_train_plot_scaled = robust_scaler.fit_transform(X_train_plot)
                clfY = clf.fit(X_train_plot_scaled, into_levels(Y_train, Nlvl))
                clfZ = clf.fit(X_train_plot_scaled, into_levels(Z_train, Nlvl))
            else:
                clfY = clf.fit(X_train_plot, into_levels(Y_train, Nlvl))
                clfZ = clf.fit(X_train_plot, into_levels(Z_train, Nlvl))
        else:
            print("Regresseur")
            if type(clf) == mlp_Reg_type:
                robust_scaler = sklearn.preprocessing.RobustScaler()
                X_train_plot_scaled = robust_scaler.fit_transform(X_train_plot)
                clfY = clf.fit(X_train_plot_scaled, Y_train)
                clfZ = clf.fit(X_train_plot_scaled, Z_train)
            else:
                clfY = clf.fit(X_train_plot, Y_train)
                clfZ = clf.fit(X_train_plot, Z_train)
        for _, clfdata in enumerate([clfY, clfZ]):
            axes = plt.subplot(nlines, 2, c)
            m = Basemap(llcrnrlon=x_min,
                        llcrnrlat=y_min,
                        urcrnrlon=x_max,
                        urcrnrlat=y_max,
                        resolution='i',
                        projection='cass',
                        lon_0=-74.00597,
                        lat_0=40.71427,
                        ax=axes)
            m.drawcoastlines()
            lons, lats = m.makegrid(100, 100)
            x, y = m(lons, lats)
            Z = np.zeros((100, 100))
            for l in range(100):
                for p in range(100):
                    LP = np.array([lons[l][p], lats[l][p]])
                    LP = np.array([LP])
                    if robust_scaler != False:
                        LP = robust_scaler.transform(LP)
                    Z[l][p] = clfdata.predict(LP)
            diff = np.max(Z) - np.min(Z)
            cs = m.contourf(
                x,
                y,
                Z, [np.min(Z) + diff * i / Nlvl for i in range(0, Nlvl + 1)],
                cmap=cm,
                alpha=.8)
            m.colorbar(cs, location='bottom', pad="5%")
            c += 1
        robust_scaler = False
示例#24
0
    percent = score / len(actual)
    print("Accuracy is: ", percent)


if __name__ == "__main__":
    print(
        "========================================================================"
    )
    trainingData = "G:/hackPrinceton/CHAI/data/chai/training/7.xlsx"
    data = nlpFile.getFeatures(trainingData, "Laughter")
    j = refineData(data)
    # print(j.xtrain)
    # print(j.ytrain)
    pipeline = Pipeline([
        ('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))),
        ('neural network', Classifier(layers=[Layer("Softmax")], n_iter=25))
    ])
    pipeline.fit(np.asarray(j.get('0')), np.asarray(j.get('1')))
    nn = Classifier(
        layers=[Layer("Maxout", units=100, pieces=2),
                Layer("Softmax")],
        learning_rate=0.001,
        n_iter=25)
    nn.fit(j.get('0'), j.get('1'))
    test = "G:/hackPrinceton/CHAI/data/chai/training/7.xlsx"
    testData = nlpFile.getFeatures(test, "Laughter")
    t = refineData(testData)
    y_actual = t.get('1')
    x_test = t.get('0')
    y_calculated = nn.predict(x_test)
    # print(y_actual)
示例#25
0
    for line in lines:
        int_line = [float(x) for x in line]
        TrainData.append(int_line)

TrainData = np.array(TrainData)

with open(test_file_name) as textFile:
    lines = [line.split() for line in textFile]
    for line in lines:
        int_line = [float(x) for x in line]
        TestData.append(int_line)

TestData = np.array(TestData)

nn = Classifier(layers=[Layer("Sigmoid", units=100),
                        Layer("Softmax")],
                learning_rate=0.02,
                n_iter=10)

X_train = TrainData[:, 1:]
y_train = TrainData[:, 0]

new_y_train = np.zeros((len(X_train), len(Classes)))

for i in range(0, len(TrainData)):
    new_y_train[i, int(y_train[i])] = 1

nn.fit(X_train, y_train)

#y_valid = nn.predict(TestData[:,1:])

X_test = TestData[:, 1:]
示例#26
0
if sample:
    classifiers = [
        KNeighborsClassifier(n_neighbors=100, weights='uniform', algorithm='auto', leaf_size=100, p=10, metric='minkowski'),
        RandomForestClassifier(n_estimators=100,verbose=True),
        GradientBoostingClassifier(n_estimators=10, learning_rate=1.0,max_depth=5, random_state=0),
        AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=20),
                         algorithm="SAMME.R",
                         n_estimators=10),
        Classifier(
            layers=[
                Layer("Tanh", units=200),
                Layer("Sigmoid", units=200),
                Layer('Rectifier', units=200),
                Layer('Softmax')],
            learning_rate=0.01,
            learning_rule='momentum',
            learning_momentum=0.9,
            batch_size=1000,
            valid_size=0.01,
            n_stable=100,
            n_iter=100,
            verbose=True)
    ]
else:
    classifiers = [# Other methods are underperformed yet take very long training time for this data set
        AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=20),
                     algorithm="SAMME.R",
                     n_estimators=10),
        Classifier(
            layers=[
                Layer("Tanh", units=200),
示例#27
0
		dropout_rate =("real", [0,0.5],    0)
		)

# We create the optimizer object
opt = pysmac.SMAC_optimizer( working_directory = './results/dataset5/smac/' % os.environ, persistent_files=True, debug = False)

# First we try the a MLP set to a default configuration, so we can see if SMAC can improve its performance
scores = []
for i in np.arange(n_validations):

	X_train, X_test, Y_train, Y_test = sklearn.cross_validation.train_test_split(X,Y, test_size=0.3, random_state=1)

	predictor = Classifier(
	    layers=[
	    Layer("Sigmoid", units=100, dropout = 0),
	    Layer("Sigmoid", units=100, dropout = 0),
	    Layer("Softmax",  units=2)],
	    learning_rate=0.001,
	    n_iter=25)
	predictor.fit(X_train, Y_train)
	scores.append(metrics.accuracy_score(Y_test, predictor.predict(X_test)))

print(('The default accuracy is %f'%median(scores)))

# We set some parameters for the optimizer
value, parameters = opt.minimize(mlp,
					n_iter, parameter_definition,	# number of evaluations
					num_runs = 2,					# number of independent SMAC runs
					seed = 2,						# random seed
					num_procs = 2,					# two cores
					mem_limit_function_mb=1000,		# Memory limit
示例#28
0
    19000:28000]
mini_dev_data, mini_dev_labels = X_final[49000:60000], y_final[49000:60000]

param_grid = {
    'learning_rate': [0.05, 0.01, 0.005, 0.001],
    'n_iter': [25, 50, 100, 200],
    'hidden0__units': [4, 8, 12, 16, 20],
    'hidden0__type': ["Rectifier", "Sigmoid", "Tanh"],
    'hidden0__dropout': [0.2, 0.3, 0.4],
    'hidden1__units': [4, 8, 12, 16, 20],
    'hidden1__type': ["Rectifier", "Sigmoid", "Tanh"],
    'hidden1__dropout': [0.2, 0.3, 0.4],
    'hidden2__units': [4, 8, 12, 16, 20],
    'hidden2__type': ["Rectifier", "Sigmoid", "Tanh"],
    'hidden2__dropout': [0.2, 0.3, 0.4]
}

nn = Classifier(layers=[
    Layer("Sigmoid", units=20),
    Layer("Sigmoid", units=20),
    Layer("Sigmoid", units=20),
    Layer("Softmax")
])

gs = GridSearchCV(sc, nn, param_grid)
start = time()
gs.fit(mini_train_data, mini_train_labels)
print("GridSearchCV took {:.2f} seconds for {:d} candidate settings.".format(
    time() - start, len(gs.grid_scores_)))
report(gs.grid_scores_)

from sknn.mlp import Classifier, Layer

valid_errors = []
train_errors = []
def store_stats(avg_valid_error, avg_train_error, **_):
    valid_errors.append(avg_valid_error)
    train_errors.append(avg_train_error)

from sklearn.model_selection  import GridSearchCV


nn = Classifier(
layers=[
    Layer('Sigmoid',dropout=0.20),
    Layer("Softmax")],
    valid_size=0.2,
    callback={'on_epoch_finish': store_stats})

gs = GridSearchCV(nn, param_grid={
    'n_iter': [100,500,1000],
    'learning_rate': [0.01, 0.001],
    'hidden0__units': [10, 20, 5],
    'hidden0__type': ["Rectifier", "Sigmoid", "Tanh"]},refit=True)

gs.fit(X_train,y_train)
print(gs.best_estimator_)

plt.figure()
plt.plot(range(len(train_errors)),train_errors,color="b",label="training scores")
plt.plot(range(len(valid_errors)),valid_errors,color="r",label="validation scores")
示例#30
0
# data conversion and normalization
mydata = mydata.replace(['yes', 'no'], [1, 0])


# taking the class variable in another column
y = mydata['y']
del mydata['y']
mynewdata = preprocessing.normalize(mydata)

# creating a model and splitting data set into training and testing
DefaultTrain, DefaultValidaiton, y_train, y_test = train_test_split(mynewdata, y, test_size=0.2, random_state=42)

nn = Classifier(layers=[
        Layer("Rectifier", units=100),
        Layer("Softmax")],
    learning_rate=0.003,
    n_iter=25)
nn.fit(DefaultTrain, y_train)
y_valid = nn.predict(DefaultValidaiton)
print('Accuracy: ',nn.score(DefaultValidaiton, y_test))
print confusion_matrix(y_test,y_valid)
fpr, tpr, thresholds =metrics.roc_curve(y_test, y_valid,pos_label=1)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, color='darkorange',
         lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')