def go_by_category(category):
        input = TrainingFactory.build_sparse_matrix_target(limit=10000)
        targets = TrainingFactory.build_target_vector_by_category(category,limit=10000)

        input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1)

        classif = SVC(kernel='rbf', tol=0.001, probability=True)
        classif.fit(input_train, target_train)

        output_targets = classif.predict(input_test)
        print output_targets
        print target_test
        print
示例#2
0
    def go_by_category(category):
        input = TrainingFactory.build_sparse_matrix_target(limit=10000)
        targets = TrainingFactory.build_target_vector_by_category(category,
                                                                  limit=10000)

        input_train, input_test, target_train, target_test = train_test_split(
            input, targets, test_size=0.1)

        classif = SVC(kernel='rbf', tol=0.001, probability=True)
        classif.fit(input_train, target_train)

        output_targets = classif.predict(input_test)
        print output_targets
        print target_test
        print
    def go_by_category_2(category):
        input, targets, scaler = TrainingFactory.get_training_data_by_category(category,10000)
        input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1)

        test_data_sparse = TestingFactory.get_test_data(limit=1000)
        test_data_scaled = scaler.transform(test_data_sparse)
        test_data = csr_matrix(test_data_scaled)

        classif = SVC(kernel='rbf',C=0.1, tol=0.001, probability=True)
        classif.fit(input_train, target_train)

        output_targets_proba = classif.predict_proba(input_test)

        outputs_predicted_proba = [item[1] for item in output_targets_proba]
        output_targets = classif.predict(input_test)

        # print output_targets.tolist()
        # print outputs_predicted_proba
        # print target_test

        print log_loss(target_test, output_targets)
        accuracy = accuracy_score(target_test, output_targets)
        print accuracy
        print confusion_matrix(target_test, output_targets)


        testing_output = classif.predict_proba(test_data)
        testing_output_proba = [item[1] for item in testing_output]
        print testing_output_proba

        return accuracy, output_targets, testing_output_proba
    def go():

        input = TrainingFactory.build_sparse_matrix_input(limit=10000)
        targets = TrainingFactory.build_sparse_matrix_target(limit=10000)

        input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1)

        classif = OneVsRestClassifier(SVC(kernel='rbf', tol=0.001, probability=True))
        classif.fit(input_train, target_train)

        output_targets = classif.predict_proba(input_test)
        print ClassifierFactory.output_function(output_targets)
        print ClassifierFactory.output_function(target_test.todense())

        print log_loss(target_test, output_targets)
        print
示例#5
0
    def go_by_category_2(category):
        input, targets, scaler = TrainingFactory.get_training_data_by_category(
            category, 10000)
        input_train, input_test, target_train, target_test = train_test_split(
            input, targets, test_size=0.1)

        test_data_sparse = TestingFactory.get_test_data(limit=1000)
        test_data_scaled = scaler.transform(test_data_sparse)
        test_data = csr_matrix(test_data_scaled)

        classif = SVC(kernel='rbf', C=0.1, tol=0.001, probability=True)
        classif.fit(input_train, target_train)

        output_targets_proba = classif.predict_proba(input_test)

        outputs_predicted_proba = [item[1] for item in output_targets_proba]
        output_targets = classif.predict(input_test)

        # print output_targets.tolist()
        # print outputs_predicted_proba
        # print target_test

        print log_loss(target_test, output_targets)
        accuracy = accuracy_score(target_test, output_targets)
        print accuracy
        print confusion_matrix(target_test, output_targets)

        testing_output = classif.predict_proba(test_data)
        testing_output_proba = [item[1] for item in testing_output]
        print testing_output_proba

        return accuracy, output_targets, testing_output_proba
示例#6
0
    def go():

        input = TrainingFactory.build_sparse_matrix_input(limit=10000)
        targets = TrainingFactory.build_sparse_matrix_target(limit=10000)

        input_train, input_test, target_train, target_test = train_test_split(
            input, targets, test_size=0.1)

        classif = OneVsRestClassifier(
            SVC(kernel='rbf', tol=0.001, probability=True))
        classif.fit(input_train, target_train)

        output_targets = classif.predict_proba(input_test)
        print ClassifierFactory.output_function(output_targets)
        print ClassifierFactory.output_function(target_test.todense())

        print log_loss(target_test, output_targets)
        print
示例#7
0
    def go_by_category_2(category,num):
        #print category
        #return
        input, targets, scaler = TrainingFactory.get_training_data_by_category(category,10000)
        
        # Split arrays into random train and test subsets
        input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1)

        test_data_sparse = TestingFactory.get_test_data(limit=10000)
        test_data_scaled = scaler.transform(test_data_sparse)
        test_data = csr_matrix(test_data_scaled)
        if(num==1):
            classif = SVC(kernel='rbf',C=0.1, tol=0.001, probability=True)
        elif(num==0):
            classif= rfc(n_estimators=500, oob_score=True)
        elif(num==2):
            classif = MultinomialNB()
        elif(num==3):
            classif = tree.DecisionTreeClassifier(max_depth=1000)
        else:
            classif = SVC(kernel='linear',C=0.1, tol=0.001, probability=True)
        #classif.fit(input_train, target_train)
        classif.fit(input_train, target_train)

        #output_targets_proba = classif.predict_proba(input_test)

        #outputs_predicted_proba = [item[1] for item in output_targets_proba]
        output_targets = classif.predict(input_test)

        # print output_targets.tolist()
        # print outputs_predicted_proba
        # print target_test

        print "log loss: ",log_loss(target_test, output_targets)
        accuracy = accuracy_score(target_test, output_targets)
        print "accuracy: ",accuracy

        cm= confusion_matrix(target_test, output_targets)
        print "Confusion matrix :",cm,"\n"
        #print "prediction score"
        #print precision_score(target_test,output_targets,average="macro")

        testing_output = classif.predict_proba(test_data)
        testing_output_proba = [item[1] for item in testing_output]
       
        ###
        return accuracy, output_targets, testing_output_proba