示例#1
0
def SVM(X_train,y_train,X_test,y_test):
    '''fit a SVM model to the data
    '''
    t0 = time()
    # normalize
    min_max_scaler = MinMaxScaler()
    X_train = min_max_scaler.fit_transform(X_train)
    X_test = min_max_scaler.fit_transform(X_test)

    model = SVC(kernel = "rbf")
    model.fit(X_train, y_train)
    print ("training time:", round(time()-t0, 3), "s")

    # make predictions
    t0 = time()
    expected = y_test
    predicted = model.predict(X_test)
    print ("predicting time:", round(time()-t0, 3), "s")

    # summarize the fit of the model
    score = metrics.accuracy_score(expected, predicted)
    print(score)
    print(metrics.recall_score(expected,predicted))

    return model, score
示例#2
0
def DTree(X_train, y_train, X_test, y_test):
    model = tree.DecisionTreeClassifier(min_samples_split=40)
    t0 = time()
    model.fit(X_train, y_train)
    print("training time:", round(time() - t0, 3), "s")
    t0 = time()
    expected = y_test
    predicted = model.predict(X_test)
    print("predicting time:", round(time() - t0, 3), "s")
    # summarize the fit of the model
    score = metrics.accuracy_score(expected, predicted)
    print(score)
    print(metrics.recall_score(expected, predicted))
    return model, score
def SVM(X_train, y_train, X_test, y_test):
    # fit a SVM model to the data
    t0 = time()
    model = SVC(kernel="linear")
    model.fit(X_train, y_train)
    print("training time:", round(time() - t0, 3), "s")
    #print(model)
    # make predictions
    t0 = time()
    expected = y_test
    predicted = model.predict(X_test)
    print("predicting time:", round(time() - t0, 3), "s")
    # summarize the fit of the model
    score = metrics.accuracy_score(expected, predicted)
    print(score)
    print(metrics.recall_score(expected, predicted))
    return model, score
示例#4
0
        for j, word in enumerate(w for w in re.split(split_re, tweet) if w in word_num):
            if j >= maxlen:
                break
            X[i, j] = word_num[word]
    return X, df.pol

tweet = layers.Input((maxlen,), dtype='int32')
embedded = layers.Embedding(word_matrix.shape[0], 200, input_length=maxlen,
                            weights=[word_matrix], trainable=False)(tweet)                       
embedded_normalized = layers.BatchNormalization()(embedded)
lstm = layers.Bidirectional(layers.LSTM(150, dropout=.2, recurrent_dropout=.2))(embedded_normalized)
lstm_dropout = layers.Dropout(.5)(layers.BatchNormalization()(lstm))
result = layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(1e-3))(lstm_dropout)
model = models.Model(tweet, result)
model.compile(optimizer=optimizers.Adam(lr=1e-5), loss=losses.binary_crossentropy, metrics=['accuracy'])

if args.train:
    print 'processing data'
    X_train, y_train = df_to_matrix(prepare_dataframe(args.train))
    print 'data processed'
    model.fit(X_train, y_train, batch_size=256)
    if args.save is not None:
        model.save_weights(args.save)
else:
    model.load_weights(args.model)
print 'received a model'
X_test, y_test = df_to_matrix(prepare_dataframe(args.test))
y_pred = model.predict(X_test, batch_size=256)
print 'accuracy {}'.format(metrics.accuracy_score(y_test, y_pred > .5))
print 'ROC-AUC {}'.format(metrics.roc_auc_score(y_test, y_pred))