def main(): # trainLabelFile = '/tmp2/yucwang/data/mongo/train.csv' # trainPrefix = '/tmp2/yucwang/data/mongo/C1-P1_Train/' # validLabelFile = '/tmp2/yucwang/data/mongo/dev.csv' # validPrefix = '/tmp2/yucwang/data/mongo/C1-P1_Dev/' # # trainX, trainY = extractFeatures(trainLabelFile, trainPrefix) # validX, validY = extractFeatures(validLabelFile, validPrefix) # # np.save('./train_x.npy', trainX) # np.save('./train_y.npy', trainY) # np.save('./val_x.npy', validX) # np.save('./val_y.npy', validY) trainX = np.load('./bin/exp2/train_x.npz.npy') trainY = np.load('./bin/exp2/train_y.npz.npy') validX = np.load('./bin/exp2/val_x.npz.npy') validY = np.load('./bin/exp2/val_y.npz.npy') model = SVM(penalty='l2', loss='squared_hinge', C=0.85, maxIter=2000) print("SVM: Training get started.") model.train(trainX, trainY) print("SVM: Validation get started.") acc, metrics = model.valid(validX, validY, classNum=3) print(acc) print(metrics)
def main(arguments): # load the features of the dataset features = datasets.load_breast_cancer().data # standardize the features features = StandardScaler().fit_transform(features) # get the number of features num_features = features.shape[1] # load the corresponding labels for the features labels = datasets.load_breast_cancer().target # transform the labels to {-1, +1} labels[labels == 0] = -1 # split the dataset to 70/30 partition: 70% train, 30% test train_features, test_features, train_labels, test_labels = train_test_split( features, labels, test_size=0.3, stratify=labels) train_size = train_features.shape[0] test_size = test_features.shape[0] # slice the dataset as per the batch size train_features = train_features[:train_size - (train_size % BATCH_SIZE)] train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)] test_features = test_features[:test_size - (test_size % BATCH_SIZE)] test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)] # instantiate the SVM class model = SVM( alpha=LEARNING_RATE, batch_size=BATCH_SIZE, svm_c=arguments.svm_c, num_classes=NUM_CLASSES, num_features=num_features, ) # train the instantiated model model.train( epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels], train_size=train_features.shape[0], validation_data=[test_features, test_labels], validation_size=test_features.shape[0], result_path=arguments.result_path, ) test_conf, test_accuracy = utils.plot_confusion_matrix( phase="testing", path=arguments.result_path, class_names=["benign", "malignant"]) print("True negatives : {}".format(test_conf[0][0])) print("False negatives : {}".format(test_conf[1][0])) print("True positives : {}".format(test_conf[1][1])) print("False positives : {}".format(test_conf[0][1])) print("Testing accuracy : {}".format(test_accuracy))
def train_svm(): data_helper = DataHelper() train_text, train_labels, ver_text, ver_labels, test_text, test_labels = data_helper.get_data_and_labels() stopwords = data_helper.get_stopwords() svm = SVM(train_text, train_labels, ver_text, ver_labels, test_text, test_labels, stopwords) svm.train() svm.verification() print('ver_acc: {:.3}'.format(svm.ver_acc)) svm.test() print('test_acc: {:.3}'.format(svm.test_acc))
def main(arguments): # load the features of the dataset features = datasets.load_breast_cancer().data # standardize the features features = StandardScaler().fit_transform(features) # get the number of features num_features = features.shape[1] # load the corresponding labels for the features labels = datasets.load_breast_cancer().target # transform the labels to {-1, +1} labels[labels == 0] = -1 # split the dataset to 70/30 partition: 70% train, 30% test train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.3, stratify=labels) train_size = train_features.shape[0] test_size = test_features.shape[0] # slice the dataset as per the batch size train_features = train_features[:train_size - (train_size % BATCH_SIZE)] train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)] test_features = test_features[:test_size - (test_size % BATCH_SIZE)] test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)] # instantiate the SVM class model = SVM(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, svm_c=arguments.svm_c, num_classes=NUM_CLASSES, num_features=num_features) # train the instantiated model model.train(epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels], train_size=train_features.shape[0], validation_data=[test_features, test_labels], validation_size=test_features.shape[0], result_path=arguments.result_path) test_conf, test_accuracy = utils.plot_confusion_matrix(phase='testing', path=arguments.result_path, class_names=['benign', 'malignant']) print('True negatives : {}'.format(test_conf[0][0])) print('False negatives : {}'.format(test_conf[1][0])) print('True positives : {}'.format(test_conf[1][1])) print('False positives : {}'.format(test_conf[0][1])) print('Testing accuracy : {}'.format(test_accuracy))
dp = DecayingPerceptron() dp.train(learning_rates) dp.report() dp.evaluate() ap = AveragedPerceptron() ap.train(learning_rates) ap.report() ap.evaluate() ############################################ ###### Part II ########### ############################################ svm = SVM(verbose=True) svm.train(epochs=20) hm.report(svm) hm.evaluate(svm) lr = LogisticRegression(verbose=True) lr.train(epochs=20) hm.report(lr) hm.evaluate(lr) nb = NaiveBayes() nb.train(epochs=1) hm.report(nb) hm.evaluate(nb) # Logistic regression using sklearn import data as dt