def go_by_category(category): input = TrainingFactory.build_sparse_matrix_target(limit=10000) targets = TrainingFactory.build_target_vector_by_category(category,limit=10000) input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1) classif = SVC(kernel='rbf', tol=0.001, probability=True) classif.fit(input_train, target_train) output_targets = classif.predict(input_test) print output_targets print target_test print
def go_by_category(category): input = TrainingFactory.build_sparse_matrix_target(limit=10000) targets = TrainingFactory.build_target_vector_by_category(category, limit=10000) input_train, input_test, target_train, target_test = train_test_split( input, targets, test_size=0.1) classif = SVC(kernel='rbf', tol=0.001, probability=True) classif.fit(input_train, target_train) output_targets = classif.predict(input_test) print output_targets print target_test print
def go_by_category_2(category): input, targets, scaler = TrainingFactory.get_training_data_by_category(category,10000) input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1) test_data_sparse = TestingFactory.get_test_data(limit=1000) test_data_scaled = scaler.transform(test_data_sparse) test_data = csr_matrix(test_data_scaled) classif = SVC(kernel='rbf',C=0.1, tol=0.001, probability=True) classif.fit(input_train, target_train) output_targets_proba = classif.predict_proba(input_test) outputs_predicted_proba = [item[1] for item in output_targets_proba] output_targets = classif.predict(input_test) # print output_targets.tolist() # print outputs_predicted_proba # print target_test print log_loss(target_test, output_targets) accuracy = accuracy_score(target_test, output_targets) print accuracy print confusion_matrix(target_test, output_targets) testing_output = classif.predict_proba(test_data) testing_output_proba = [item[1] for item in testing_output] print testing_output_proba return accuracy, output_targets, testing_output_proba
def go(): input = TrainingFactory.build_sparse_matrix_input(limit=10000) targets = TrainingFactory.build_sparse_matrix_target(limit=10000) input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1) classif = OneVsRestClassifier(SVC(kernel='rbf', tol=0.001, probability=True)) classif.fit(input_train, target_train) output_targets = classif.predict_proba(input_test) print ClassifierFactory.output_function(output_targets) print ClassifierFactory.output_function(target_test.todense()) print log_loss(target_test, output_targets) print
def go_by_category_2(category): input, targets, scaler = TrainingFactory.get_training_data_by_category( category, 10000) input_train, input_test, target_train, target_test = train_test_split( input, targets, test_size=0.1) test_data_sparse = TestingFactory.get_test_data(limit=1000) test_data_scaled = scaler.transform(test_data_sparse) test_data = csr_matrix(test_data_scaled) classif = SVC(kernel='rbf', C=0.1, tol=0.001, probability=True) classif.fit(input_train, target_train) output_targets_proba = classif.predict_proba(input_test) outputs_predicted_proba = [item[1] for item in output_targets_proba] output_targets = classif.predict(input_test) # print output_targets.tolist() # print outputs_predicted_proba # print target_test print log_loss(target_test, output_targets) accuracy = accuracy_score(target_test, output_targets) print accuracy print confusion_matrix(target_test, output_targets) testing_output = classif.predict_proba(test_data) testing_output_proba = [item[1] for item in testing_output] print testing_output_proba return accuracy, output_targets, testing_output_proba
def go(): input = TrainingFactory.build_sparse_matrix_input(limit=10000) targets = TrainingFactory.build_sparse_matrix_target(limit=10000) input_train, input_test, target_train, target_test = train_test_split( input, targets, test_size=0.1) classif = OneVsRestClassifier( SVC(kernel='rbf', tol=0.001, probability=True)) classif.fit(input_train, target_train) output_targets = classif.predict_proba(input_test) print ClassifierFactory.output_function(output_targets) print ClassifierFactory.output_function(target_test.todense()) print log_loss(target_test, output_targets) print
def go_by_category_2(category,num): #print category #return input, targets, scaler = TrainingFactory.get_training_data_by_category(category,10000) # Split arrays into random train and test subsets input_train, input_test, target_train, target_test = train_test_split(input, targets, test_size=0.1) test_data_sparse = TestingFactory.get_test_data(limit=10000) test_data_scaled = scaler.transform(test_data_sparse) test_data = csr_matrix(test_data_scaled) if(num==1): classif = SVC(kernel='rbf',C=0.1, tol=0.001, probability=True) elif(num==0): classif= rfc(n_estimators=500, oob_score=True) elif(num==2): classif = MultinomialNB() elif(num==3): classif = tree.DecisionTreeClassifier(max_depth=1000) else: classif = SVC(kernel='linear',C=0.1, tol=0.001, probability=True) #classif.fit(input_train, target_train) classif.fit(input_train, target_train) #output_targets_proba = classif.predict_proba(input_test) #outputs_predicted_proba = [item[1] for item in output_targets_proba] output_targets = classif.predict(input_test) # print output_targets.tolist() # print outputs_predicted_proba # print target_test print "log loss: ",log_loss(target_test, output_targets) accuracy = accuracy_score(target_test, output_targets) print "accuracy: ",accuracy cm= confusion_matrix(target_test, output_targets) print "Confusion matrix :",cm,"\n" #print "prediction score" #print precision_score(target_test,output_targets,average="macro") testing_output = classif.predict_proba(test_data) testing_output_proba = [item[1] for item in testing_output] ### return accuracy, output_targets, testing_output_proba