Пример #1
0
def multiclass(train_feature, train_label, test_feature, clftype, method_name,
               paras):
    """ The multi classifier method
    clftype: 'multiclass', 'onevsrest', 'onevsone'
    method_name: the classifier name
    paras: list form of parameters
    """
    Classifier, kwargs = get_classifier_by_name(method_name, paras)

    print 'Method: ', method_name
    from sklearn.cross_validation import KFold
    kf = KFold(len(train_label), n_folds, indices=True)
    index = 0
    avg_f1_score_list = [0] * n_folds
    for train_index, test_index in kf:
        print 'Prepare cv dataset: %d' % index
        model_train_feature = train_feature[train_index, :]
        model_test_feature = train_feature[test_index, :]
        model_train_label = train_label[train_index]
        model_test_label = train_label[test_index]

        #print 'Over sampling...'
        #model_train_feature, model_train_label = over_sampling(model_train_feature, model_train_label)
        #ipdb.set_trace()

        print 'SMOTE over sampling...'
        model_train_feature, model_train_label = smote_sampling(
            model_train_feature, model_train_label)

        clf = get_classifier_by_type(clftype, model_train_feature,
                                     model_train_label, Classifier, kwargs)
        model_test_pred = clf.predict(model_test_feature)

        print 'Model testing acc:'
        print classification_report(model_test_label, model_test_pred)

        #f1_score_list = f1_score(model_test_label, model_test_pred, average=None)
        #avg_f1_score_list[index] = sum(f1_score_list) / len(f1_score_list)
        #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score_list[index]

        avg_f1_score_list[index] = f1_score_dict(
            predition2dict(model_test_pred), predition2dict(model_test_label))
        print 'Avg: ', avg_f1_score_list[index]

        index += 1

    print 'Method:', method_name
    avg_avg_f1_score = sum(avg_f1_score_list) / len(avg_f1_score_list)
    print 'Avg avg_f1_score:', avg_avg_f1_score, '\n'

    #print 'Oversampling...'
    #train_feature, train_label = over_sampling(train_feature, train_label)

    print 'SMOTE over sampling...'
    train_feature, train_label = smote_sampling(train_feature, train_label)

    print 'Train the whole multi-class classifiers...'
    clf = get_classifier_by_type(clftype, train_feature, train_label,
                                 Classifier, kwargs)
    train_pred = clf.predict(train_feature)
    test_pred = clf.predict(test_feature)

    print 'Model train acc:'
    print classification_report(train_label, train_pred)

    #f1_score_list = f1_score(train_label, train_pred, average=None)
    #avg_f1_score = sum(f1_score_list) / len(f1_score_list)
    #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score

    # training F1 score
    print 'Training avg F1 score:', f1_score_dict(predition2dict(train_pred),
                                                  predition2dict(train_label))

    return method_name, test_pred, avg_avg_f1_score
Пример #2
0
def main(n_components, n_folds, method_name):
    print 'Load dataset...'
    import pickle
    f = open('task2-dataset/task2-dataset.pickle', 'r')
    train_feature, train_label, test_feature = pickle.load(f)
    f.close()

    train_feature, test_feature = PCA_transform(
        train_feature, test_feature, 'task2-dataset/task2-PCA-decomp.mat')
    train_feature = train_feature[:, :n_components]
    test_feature = test_feature[:, :n_components]

    kwargs = {}
    #from sklearn.naive_bayes import GaussianNB as Classifier
    #method_name = 'Twostep+NB'

    #from sklearn.svm import LinearSVC as Classifier
    #method_name = 'Twostep+SVC'
    #kwargs = {'random_state':0, 'C':10}

    from QDF import QDF as Classifier
    method_name = 'Twostep+QDF'

    #from LDF import LDF as Classifier
    #method_name = 'Twostep+LDF'

    print 'Method: ', method_name
    from sklearn.cross_validation import KFold
    kf = KFold(len(train_label), n_folds, indices=True)
    index = 0
    avg_f1_score_list = [0] * n_folds
    for train_index, test_index in kf:
        print 'Prepare cv dataset: %d' % index
        model_train_feature = train_feature[train_index, :]
        model_test_feature = train_feature[test_index, :]
        model_train_label = train_label[train_index]
        model_test_label = train_label[test_index]

        model_train_pred, model_test_pred = twostep(model_train_feature,
                                                    model_train_label,
                                                    model_test_feature,
                                                    Classifier, kwargs)

        #print 'Model testing acc:'
        #print classification_report(model_test_label, model_test_pred)

        #f1_score_list = f1_score(model_test_label, model_test_pred, average=None)
        #avg_f1_score_list[index] = sum(f1_score_list) / len(f1_score_list)
        #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score_list[index]

        avg_f1_score_list[index] = f1_score_dict(
            predition2dict(model_test_pred), predition2dict(model_test_label))
        print 'Avg: ', avg_f1_score_list[index]

        index += 1

    print 'Method:', method_name
    avg_avg_f1_score = sum(avg_f1_score_list) / len(avg_f1_score_list)
    print 'Avg avg_f1_score:', avg_avg_f1_score, '\n'

    print 'Train the whole multi-class classifiers...'
    train_pred, test_pred = twostep(train_feature, train_label, test_feature,
                                    Classifier, kwargs)
    # training F1 score
    print 'Training avg F1 score:', f1_score_dict(predition2dict(train_pred),
                                                  predition2dict(train_label))

    # save the final prediction
    index = 0
    f = open('twostep_output.csv', 'w')
    for y in test_pred:
        f.write('%d,%d\n' % (index + 1, test_pred[index]))
        index += 1

    f.close()
Пример #3
0
def main(n_components, n_folds, method_name):
    print 'Load dataset...'
    import pickle
    f = open('task2-dataset/task2-dataset.pickle', 'r')
    train_feature, train_label, test_feature = pickle.load(f)
    f.close()
    
    train_feature, test_feature = PCA_transform(train_feature, test_feature, 'task2-dataset/task2-PCA-decomp.mat')
    train_feature = train_feature[:, :n_components]
    test_feature = test_feature[:, :n_components]
    
    kwargs = {}
    #from sklearn.naive_bayes import GaussianNB as Classifier
    #method_name = 'Twostep+NB'
        
    #from sklearn.svm import LinearSVC as Classifier
    #method_name = 'Twostep+SVC'
    #kwargs = {'random_state':0, 'C':10}
    
    from QDF import QDF as Classifier
    method_name = 'Twostep+QDF'
    
    #from LDF import LDF as Classifier
    #method_name = 'Twostep+LDF'
    
    print 'Method: ', method_name
    from sklearn.cross_validation import KFold
    kf = KFold(len(train_label), n_folds, indices=True)
    index = 0
    avg_f1_score_list = [0] * n_folds
    for train_index, test_index in kf:
        print 'Prepare cv dataset: %d' % index
        model_train_feature = train_feature[train_index, :]
        model_test_feature = train_feature[test_index, :]
        model_train_label = train_label[train_index]
        model_test_label = train_label[test_index]
        
        model_train_pred, model_test_pred = twostep(model_train_feature, model_train_label, model_test_feature, Classifier, kwargs)

        #print 'Model testing acc:'
        #print classification_report(model_test_label, model_test_pred)
        
        #f1_score_list = f1_score(model_test_label, model_test_pred, average=None)
        #avg_f1_score_list[index] = sum(f1_score_list) / len(f1_score_list)
        #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score_list[index]
        
        avg_f1_score_list[index] = f1_score_dict(predition2dict(model_test_pred), predition2dict(model_test_label))
        print 'Avg: ', avg_f1_score_list[index]
        
        index += 1
        
    print 'Method:', method_name
    avg_avg_f1_score = sum(avg_f1_score_list) / len(avg_f1_score_list)
    print 'Avg avg_f1_score:', avg_avg_f1_score, '\n'
    
    print 'Train the whole multi-class classifiers...'
    train_pred, test_pred = twostep(train_feature, train_label, test_feature, Classifier, kwargs)
    # training F1 score
    print 'Training avg F1 score:', f1_score_dict(predition2dict(train_pred), predition2dict(train_label))
    
    # save the final prediction
    index = 0
    f = open('twostep_output.csv', 'w')
    for y in test_pred:
        f.write('%d,%d\n' % (index+1, test_pred[index]))
        index += 1
        
    f.close()
Пример #4
0
def multiclass(train_feature, train_label, test_feature, clftype, method_name, paras):
    """ The multi classifier method
    clftype: 'multiclass', 'onevsrest', 'onevsone'
    method_name: the classifier name
    paras: list form of parameters
    """
    Classifier, kwargs = get_classifier_by_name(method_name, paras)
    
    print 'Method: ', method_name
    from sklearn.cross_validation import KFold
    kf = KFold(len(train_label), n_folds, indices=True)
    index = 0
    avg_f1_score_list = [0] * n_folds
    for train_index, test_index in kf:
        print 'Prepare cv dataset: %d' % index
        model_train_feature = train_feature[train_index, :]
        model_test_feature = train_feature[test_index, :]
        model_train_label = train_label[train_index]
        model_test_label = train_label[test_index]
        
        #print 'Over sampling...'
        #model_train_feature, model_train_label = over_sampling(model_train_feature, model_train_label)
        #ipdb.set_trace()
        
        print 'SMOTE over sampling...'
        model_train_feature, model_train_label = smote_sampling(model_train_feature, model_train_label)
        
        clf = get_classifier_by_type(clftype, model_train_feature, model_train_label, Classifier, kwargs)
        model_test_pred = clf.predict(model_test_feature)
        
        print 'Model testing acc:'
        print classification_report(model_test_label, model_test_pred)
        
        #f1_score_list = f1_score(model_test_label, model_test_pred, average=None)
        #avg_f1_score_list[index] = sum(f1_score_list) / len(f1_score_list)
        #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score_list[index]
        
        avg_f1_score_list[index] = f1_score_dict(predition2dict(model_test_pred), predition2dict(model_test_label))
        print 'Avg: ', avg_f1_score_list[index]
        
        index += 1
    
    print 'Method:', method_name
    avg_avg_f1_score = sum(avg_f1_score_list) / len(avg_f1_score_list)
    print 'Avg avg_f1_score:', avg_avg_f1_score, '\n'
    
    #print 'Oversampling...'
    #train_feature, train_label = over_sampling(train_feature, train_label)
    
    print 'SMOTE over sampling...'
    train_feature, train_label = smote_sampling(train_feature, train_label)
    
    print 'Train the whole multi-class classifiers...'
    clf = get_classifier_by_type(clftype, train_feature, train_label, Classifier, kwargs)
    train_pred = clf.predict(train_feature)
    test_pred = clf.predict(test_feature)
    
    print 'Model train acc:'
    print classification_report(train_label, train_pred)
    
    #f1_score_list = f1_score(train_label, train_pred, average=None)
    #avg_f1_score = sum(f1_score_list) / len(f1_score_list)
    #print 'F1 score:', f1_score_list, 'Avg:', avg_f1_score
    
    # training F1 score
    print 'Training avg F1 score:', f1_score_dict(predition2dict(train_pred), predition2dict(train_label))
    
    return method_name, test_pred, avg_avg_f1_score