# and learns the vocabulary; second, it transforms our training data # into feature vectors. The input to fit_transform should be a list of # strings. train_data_features = vectorizer.fit_transform(clean_train_LAPD) test_data_features = vectorizer.transform(clean_test_LAPD) # Numpy arrays are easy to work with, so convert the result to an array np.asarray(train_data_features) np.asarray(test_data_features) ################### # TRAIN THE MODEL # ################### classifier.fit(train_data_features.toarray(), train["Problematic"]) ######################################################################################################## # EVALUATE THE MODEL ######################################################################################################## Y_pred = classifier.predict(testDataVecs) Y_p = classifier.predict_proba(testDataVecs) Y_n = classifier.predict_proba_dict(testDataVecs) print(Y_n) print(Y_p) print(Y_p) print(Y_pred) print(test["Problematic"]) # print('Done.\nAccuracy: %f' % accuracy_score(test["Problematic"], Y_pred)) # res = [[Y_p[0, 0], Y_p[0, 1], Y_pred, test["Problematic"]]] # writer.writerows(res)
# Numpy arrays are easy to work with, so convert the result to an # array np.asarray(train_data_features) np.asarray(test_data_features) # Training classifier = SupervisedDBNClassification( hidden_layers_structure=[500, 250, 100], learning_rate_rbm=0.1, learning_rate=0.0001, n_epochs_rbm=50, n_iter_backprop=500, batch_size=16, activation_function='sigmoid', dropout_p=0) classifier.fit(train_data_features.toarray(), train["Problematic"]) # Test Y_pred = classifier.predict(test_data_features.toarray()) Y_p = classifier.predict_proba(test_data_features.toarray()) Y_n = classifier.predict_proba_dict(test_data_features.toarray()) print(Y_n) print(Y_p) print(Y_p) print(Y_pred) print(test["Problematic"]) print('Done.\nAccuracy: %f' % accuracy_score(test["Problematic"], Y_pred)) # res = [[Y_p[0, 0], Y_p[0, 1], Y_pred, test["Problematic"]]] # writer.writerows(res)
if (os.stat(filename).st_size != 0): X_Test = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) y_Test = np.transpose(np.asarray([1] * (len(list(sentences))))) # Truncate and pad input sequences X_Train = sequence.pad_sequences(X_Train, maxlen = max_review_length) X_Test = sequence.pad_sequences(X_Test, maxlen = max_review_length) # Training classifier = SupervisedDBNClassification(hidden_layers_structure=[500,250,100], learning_rate_rbm=0.1, learning_rate=0.0001, n_epochs_rbm=50, n_iter_backprop=500, batch_size=16, activation_function='sigmoid', dropout_p=0.25) classifier.fit(X_Train, y_Train) # Test Y_pred = classifier.predict(X_Test) Y_p = classifier.predict_proba(X_Test) Y_n = classifier.predict_proba_dict(X_Test) print(Y_n) print(Y_p) print(Y_p) print(Y_pred) print(y_Test) print('Done.\nAccuracy: %f' % accuracy_score(y_Test, Y_pred)) res = [[Y_p[0, 0], Y_p[0, 1], Y_pred, y_Test]] writer.writerows(res)