def SVM(X_train,y_train,X_test,y_test): '''fit a SVM model to the data ''' t0 = time() # normalize min_max_scaler = MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) X_test = min_max_scaler.fit_transform(X_test) model = SVC(kernel = "rbf") model.fit(X_train, y_train) print ("training time:", round(time()-t0, 3), "s") # make predictions t0 = time() expected = y_test predicted = model.predict(X_test) print ("predicting time:", round(time()-t0, 3), "s") # summarize the fit of the model score = metrics.accuracy_score(expected, predicted) print(score) print(metrics.recall_score(expected,predicted)) return model, score
def DTree(X_train, y_train, X_test, y_test): model = tree.DecisionTreeClassifier(min_samples_split=40) t0 = time() model.fit(X_train, y_train) print("training time:", round(time() - t0, 3), "s") t0 = time() expected = y_test predicted = model.predict(X_test) print("predicting time:", round(time() - t0, 3), "s") # summarize the fit of the model score = metrics.accuracy_score(expected, predicted) print(score) print(metrics.recall_score(expected, predicted)) return model, score
def SVM(X_train, y_train, X_test, y_test): # fit a SVM model to the data t0 = time() model = SVC(kernel="linear") model.fit(X_train, y_train) print("training time:", round(time() - t0, 3), "s") #print(model) # make predictions t0 = time() expected = y_test predicted = model.predict(X_test) print("predicting time:", round(time() - t0, 3), "s") # summarize the fit of the model score = metrics.accuracy_score(expected, predicted) print(score) print(metrics.recall_score(expected, predicted)) return model, score
for j, word in enumerate(w for w in re.split(split_re, tweet) if w in word_num): if j >= maxlen: break X[i, j] = word_num[word] return X, df.pol tweet = layers.Input((maxlen,), dtype='int32') embedded = layers.Embedding(word_matrix.shape[0], 200, input_length=maxlen, weights=[word_matrix], trainable=False)(tweet) embedded_normalized = layers.BatchNormalization()(embedded) lstm = layers.Bidirectional(layers.LSTM(150, dropout=.2, recurrent_dropout=.2))(embedded_normalized) lstm_dropout = layers.Dropout(.5)(layers.BatchNormalization()(lstm)) result = layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(1e-3))(lstm_dropout) model = models.Model(tweet, result) model.compile(optimizer=optimizers.Adam(lr=1e-5), loss=losses.binary_crossentropy, metrics=['accuracy']) if args.train: print 'processing data' X_train, y_train = df_to_matrix(prepare_dataframe(args.train)) print 'data processed' model.fit(X_train, y_train, batch_size=256) if args.save is not None: model.save_weights(args.save) else: model.load_weights(args.model) print 'received a model' X_test, y_test = df_to_matrix(prepare_dataframe(args.test)) y_pred = model.predict(X_test, batch_size=256) print 'accuracy {}'.format(metrics.accuracy_score(y_test, y_pred > .5)) print 'ROC-AUC {}'.format(metrics.roc_auc_score(y_test, y_pred))