Exemplo n.º 1
0
def run_test(features, labels, train_classes, test_classes, train_index, test_index):
    cpu_time = 0
    features = np.array(features)
    labels = np.array(labels)
    xtrain = features[np.isin(labels,train_classes),:]
    ytrain = labels[np.isin(labels,train_classes)]

    xtest = features[np.isin(labels,test_classes),:]
    ytest = labels[np.isin(labels,test_classes)]

    X_train, X_test = xtrain[train_index], xtrain[test_index]
    y_train, y_test = ytrain[train_index], ytrain[test_index]

    y_test_l = y_test.tolist()

    model = sor.HierarchicalClassifierModel(input_size = X_train[0].size, num_classes = len(risk_class_files), learning_rate = 1e-3, num_epochs = 1000, batch_size = 100, l1 = 0, l2 = 0, train_classes = train_classes)

    model_s = pickle.load(open('test_results/trained_model_'+sys.argv[1] + '_joint.m', 'rb'))

    model.evt_fit_threshold(X_train, y_train)

    y_pred = model.predict(X_test, 0)
	
    np.savetxt('test_results/' +sys.argv[1] + '_joint_seen.pred', y_pred)

    for classk in range(len(test_classes)):
        print('test class', test_classes[classk])
        xtest_ri = xtest[ytest == test_classes[classk]]
        y_pred_ri = model.predict(xtest_ri, 0)
        np.savetxt('test_results/' + sys.argv[1] + '_joint_unseen_' + str(classk) + '.pred', y_pred_ri)
Exemplo n.º 2
0
def run_test(features, labels, train_classes, test_classes, train_index,
             test_index):
    cpu_time = 0
    features = np.array(features)
    labels = np.array(labels)
    xtrain = features[np.isin(labels, train_classes), :]
    ytrain = labels[np.isin(labels, train_classes)]

    xtest = features[np.isin(labels, test_classes), :]
    ytest = labels[np.isin(labels, test_classes)]

    RR = 0
    R2Ri = 0
    RNR = 0
    NRNR = 0
    NRR = 0
    RiR = np.zeros(len(test_classes))
    RiNR = np.zeros(len(test_classes))

    X_train, X_test = xtrain[train_index], xtrain[test_index]
    y_train, y_test = ytrain[train_index], ytrain[test_index]

    y_test_l = y_test.tolist()

    model = sor.HierarchicalClassifierModel(input_size=X_train[0].size,
                                            num_classes=len(risk_class_files),
                                            learning_rate=1e-3,
                                            num_epochs=1000,
                                            batch_size=100,
                                            l1=0,
                                            l2=0,
                                            train_classes=train_classes)

    parameters = {
        'l1': np.logspace(-2, 2, 5),
        'l2': np.append(np.logspace(-3, 1, 5), 0)
    }
    splitter = cv_split.UnseenTestSplit()
    cmodel = GridSearchCV(model, parameters, cv=splitter, verbose=5, n_jobs=10)
    cmodel.fit(X_train, y_train.ravel())

    print('best params: l1=', cmodel.best_params_['l1'], 'l2=',
          cmodel.best_params_['l2'])

    model = sor.HierarchicalClassifierModel(input_size=X_train[0].size,
                                            num_classes=len(risk_class_files),
                                            learning_rate=1e-3,
                                            num_epochs=1000,
                                            batch_size=100,
                                            l1=cmodel.best_params_['l1'],
                                            l2=cmodel.best_params_['l2'],
                                            train_classes=train_classes)

    model.fit(X_train, y_train)

    np.savetxt('test_results/' + sys.argv[1] + '_loss.out', model.loss_trace)
    np.savetxt('test_results/' + sys.argv[1] + '_grad_norm.out',
               model.grad_norm)

    y_pred = model.predict(X_test, 0)
    y_pred_score = model.predict_score(X_test, 0)
    np.savetxt('test_results/' + sys.argv[1] + '_joint_seen.out', y_pred_score)

    for j in range(len(y_test)):
        if y_test_l[j] >= 1:
            if y_pred[j] == 1:
                RR += 1
                y_pred_class = model.predict(X_test[j, :], int(y_test[j]))
                if y_pred_class == 1:
                    R2Ri += 1
            else:
                RNR += 1
        else:
            if y_pred[j] < 1:
                NRNR += 1
            else:
                NRR += 1

    for classk in range(len(test_classes)):
        xtest_ri = xtest[ytest == test_classes[classk]]
        y_pred_ri = model.predict(xtest_ri, 0)
        y_pred_ri_score = model.predict_score(xtest_ri, 0)
        np.savetxt(
            'test_results/' + sys.argv[1] + '_joint_unseen_' + str(classk) +
            '.out', y_pred_ri_score)

        for j in range(len(y_pred_ri)):
            if y_pred_ri[j] == 1:
                RiR[classk] += 1
            else:
                RiNR[classk] += 1

    print(RR, RNR, NRR, NRNR, RiR, RiNR, R2Ri)
    pickle.dump(
        model,
        open('test_results/trained_model_' + sys.argv[1] + '_joint.m', 'wb'))
Exemplo n.º 3
0
def run_setup(features, labels, train_classes, test_classes, test_fold):
    features = np.array(features)
    labels = np.array(labels)
    xtrain = features[np.isin(labels, train_classes), :]
    ytrain = labels[np.isin(labels, train_classes)]

    xtest = features[np.isin(labels, test_classes), :]
    ytest = labels[np.isin(labels, test_classes)]

    train_ids = np.arange(len(ytrain))
    np.random.shuffle(train_ids)

    train_index = train_ids[:int(len(ytrain) * 0.8)]
    test_index = train_ids[int(len(ytrain) * 0.8 + 1):]

    X_train, X_test = xtrain[train_index], xtrain[test_index]
    y_train, y_test = ytrain[train_index], ytrain[test_index]

    model = sor.HierarchicalClassifierModel(input_size=X_train[0].size,
                                            num_classes=len(risk_class_files),
                                            learning_rate=1e-3,
                                            num_epochs=1000,
                                            batch_size=100,
                                            model_name='wSOR',
                                            l1=0.1,
                                            l2=0)

    model.fit_wk(X_train, y_train)

    model.save('test_data/trained_model_' + str(test_fold) + '_wk.m')

    np.savez('test_data/test_' + str(test_fold) + '.npz',
             train_classes=train_classes,
             test_classes=test_classes,
             train_index=train_index,
             test_index=test_index)

    tfidf = TfidfVectorizer(ngram_range=(1, 1),
                            stop_words='english',
                            token_pattern=u'(?ui)\\b\\w*[a-z]+\\w*\\b')
    features = tfidf.fit_transform(X).toarray()
    features = np.array(features)
    xtrain = features[np.isin(labels, train_classes), :]
    ytrain = labels[np.isin(labels, train_classes)]

    X_train, X_test = xtrain[train_index], xtrain[test_index]
    y_train, y_test = ytrain[train_index], ytrain[test_index]

    scaler.fit(X_train)
    X_train_std = scaler.transform(X_train)
    pca.fit(X_train_std)

    model = sor.HierarchicalClassifierModel(
        input_size=pca.transform(X_train_std)[0].size,
        num_classes=len(risk_class_files),
        learning_rate=1e-3,
        num_epochs=1000,
        batch_size=100,
        model_name='wSOR',
        l1=0.1,
        l2=0)

    model.fit_wk(pca.transform(X_train_std), y_train)

    model.save('test_data/trained_model_' + str(test_fold) + '_pca_wk.m')

    transformer = FastICA(n_components=pca.n_components_,
                          random_state=0,
                          max_iter=500)
    xtrain_transformed = transformer.fit_transform(X_train_std)

    model = sor.HierarchicalClassifierModel(
        input_size=transformer.transform(X_train_std)[0].size,
        num_classes=len(risk_class_files),
        learning_rate=1e-3,
        num_epochs=1000,
        batch_size=100,
        model_name='wSOR',
        l1=0.1,
        l2=0)

    model.fit_wk(transformer.transform(X_train_std), y_train)

    model.save('test_data/trained_model_' + str(test_fold) + '_ica_wk.m')

    pickle.dump(transformer,
                open('test_data/' + str(test_fold) + '_ica.sav', 'wb'),
                protocol=4)