def main():
    #    trainLabelFile = '/tmp2/yucwang/data/mongo/train.csv'
    #    trainPrefix = '/tmp2/yucwang/data/mongo/C1-P1_Train/'
    #    validLabelFile = '/tmp2/yucwang/data/mongo/dev.csv'
    #    validPrefix = '/tmp2/yucwang/data/mongo/C1-P1_Dev/'
    #
    #    trainX, trainY = extractFeatures(trainLabelFile, trainPrefix)
    #    validX, validY = extractFeatures(validLabelFile, validPrefix)
    #
    #    np.save('./train_x.npy', trainX)
    #    np.save('./train_y.npy', trainY)
    #    np.save('./val_x.npy', validX)
    #    np.save('./val_y.npy', validY)

    trainX = np.load('./bin/exp2/train_x.npz.npy')
    trainY = np.load('./bin/exp2/train_y.npz.npy')
    validX = np.load('./bin/exp2/val_x.npz.npy')
    validY = np.load('./bin/exp2/val_y.npz.npy')

    model = SVM(penalty='l2', loss='squared_hinge', C=0.85, maxIter=2000)
    print("SVM: Training get started.")
    model.train(trainX, trainY)

    print("SVM: Validation get started.")
    acc, metrics = model.valid(validX, validY, classNum=3)
    print(acc)
    print(metrics)
def main(arguments):
    # load the features of the dataset
    features = datasets.load_breast_cancer().data

    # standardize the features
    features = StandardScaler().fit_transform(features)

    # get the number of features
    num_features = features.shape[1]

    # load the corresponding labels for the features
    labels = datasets.load_breast_cancer().target

    # transform the labels to {-1, +1}
    labels[labels == 0] = -1

    # split the dataset to 70/30 partition: 70% train, 30% test
    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.3, stratify=labels)

    train_size = train_features.shape[0]
    test_size = test_features.shape[0]

    # slice the dataset as per the batch size
    train_features = train_features[:train_size - (train_size % BATCH_SIZE)]
    train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)]
    test_features = test_features[:test_size - (test_size % BATCH_SIZE)]
    test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)]

    # instantiate the SVM class
    model = SVM(
        alpha=LEARNING_RATE,
        batch_size=BATCH_SIZE,
        svm_c=arguments.svm_c,
        num_classes=NUM_CLASSES,
        num_features=num_features,
    )

    # train the instantiated model
    model.train(
        epochs=arguments.num_epochs,
        log_path=arguments.log_path,
        train_data=[train_features, train_labels],
        train_size=train_features.shape[0],
        validation_data=[test_features, test_labels],
        validation_size=test_features.shape[0],
        result_path=arguments.result_path,
    )

    test_conf, test_accuracy = utils.plot_confusion_matrix(
        phase="testing",
        path=arguments.result_path,
        class_names=["benign", "malignant"])

    print("True negatives : {}".format(test_conf[0][0]))
    print("False negatives : {}".format(test_conf[1][0]))
    print("True positives : {}".format(test_conf[1][1]))
    print("False positives : {}".format(test_conf[0][1]))
    print("Testing accuracy : {}".format(test_accuracy))
示例#3
0
def train_svm():
    data_helper = DataHelper()
    train_text, train_labels, ver_text, ver_labels, test_text, test_labels = data_helper.get_data_and_labels()
    stopwords = data_helper.get_stopwords()

    svm = SVM(train_text, train_labels, ver_text, ver_labels, test_text, test_labels, stopwords)

    svm.train()
    svm.verification()
    print('ver_acc: {:.3}'.format(svm.ver_acc))
    svm.test()
    print('test_acc: {:.3}'.format(svm.test_acc))
def main(arguments):
    # load the features of the dataset
    features = datasets.load_breast_cancer().data

    # standardize the features
    features = StandardScaler().fit_transform(features)

    # get the number of features
    num_features = features.shape[1]

    # load the corresponding labels for the features
    labels = datasets.load_breast_cancer().target

    # transform the labels to {-1, +1}
    labels[labels == 0] = -1

    # split the dataset to 70/30 partition: 70% train, 30% test
    train_features, test_features, train_labels, test_labels = train_test_split(features, labels,
                                                                                test_size=0.3, stratify=labels)

    train_size = train_features.shape[0]
    test_size = test_features.shape[0]

    # slice the dataset as per the batch size
    train_features = train_features[:train_size - (train_size % BATCH_SIZE)]
    train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)]
    test_features = test_features[:test_size - (test_size % BATCH_SIZE)]
    test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)]

    # instantiate the SVM class
    model = SVM(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, svm_c=arguments.svm_c, num_classes=NUM_CLASSES,
                num_features=num_features)

    # train the instantiated model
    model.train(epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels],
                train_size=train_features.shape[0], validation_data=[test_features, test_labels],
                validation_size=test_features.shape[0], result_path=arguments.result_path)

    test_conf, test_accuracy = utils.plot_confusion_matrix(phase='testing', path=arguments.result_path,
                                                           class_names=['benign', 'malignant'])

    print('True negatives : {}'.format(test_conf[0][0]))
    print('False negatives : {}'.format(test_conf[1][0]))
    print('True positives : {}'.format(test_conf[1][1]))
    print('False positives : {}'.format(test_conf[0][1]))
    print('Testing accuracy : {}'.format(test_accuracy))
    dp = DecayingPerceptron()
    dp.train(learning_rates)
    dp.report()
    dp.evaluate()

    ap = AveragedPerceptron()
    ap.train(learning_rates)
    ap.report()
    ap.evaluate()

    ############################################
    ###### Part II                   ###########
    ############################################

    svm = SVM(verbose=True)
    svm.train(epochs=20)
    hm.report(svm)
    hm.evaluate(svm)

    lr = LogisticRegression(verbose=True)
    lr.train(epochs=20)
    hm.report(lr)
    hm.evaluate(lr)

    nb = NaiveBayes()
    nb.train(epochs=1)
    hm.report(nb)
    hm.evaluate(nb)

    # Logistic regression using sklearn
    import data as dt