Exemplo n.º 1
0
def init_model(workflow, client):
    global train_start_time
    global train_end_time
    initReq = workflow + "#" + client

    name, _, __ = requestHandler.parseReq(initReq, "fwf")
    print("whrkflow name : " + name)
    if name not in model.keys():
        _model = support_vector_machine()
        model[name] = _model

    if name not in preprocessor.keys():
        _preprocessor = DataPreprocessor()
        preprocessor[name] = _preprocessor
    train_start_time = time.time()
    dataX, datay = preprocessor[name].getData(workflow + "_" + client)
    trainX, testX, trainy, testy = train_test_split(dataX,
                                                    datay,
                                                    test_size=0.2)

    model[name].train(trainX, trainy)
    train_end_time = time.time()
    score = model[name].score(testX, testy)

    print("test score : " + str(score))
Exemplo n.º 2
0
def build_model():
    '''
    request body{
        name: model name,
        path: path to model
    }
    '''
    jsondata = request.get_json()
    name = jsondata["name"]
    if name not in model.keys():
        _model = support_vector_machine()
        model[name] = _model

    if name not in preprocessor.keys():
        _preprocessor = DataPreprocessor()
        preprocessor[name] = _preprocessor

    dataX, datay = DataPreprocessor().getData()
    trainX, testX, trainy, testy = train_test_split(dataX,
                                                    datay,
                                                    test_size=0.2)

    model[name].train(trainX, trainy)

    score = model[name].score(testX, testy)

    # payload = {"test R square": str(score),
    #            "result": "success",
    #            }
    return str(score)
Exemplo n.º 3
0
def build_model():
    '''
    request body{
        name: model name,
        path: path to model
    }
    '''
    global model
    global preprocessor
    global requestHandler
    global train_start_time
    global train_end_time
    jsondata = request.get_json()
    app.logger.info("json data : " + str(jsondata))
    name, _, __ = requestHandler.parseReq(jsondata, "nwf")
    # name, _, __ = requestHandler.parseReq(initReq, "fwf")
    # name = jsondata["workflow"]+"#"+jsondata["client_name"]
    _client = jsondata["client_name"]
    _workflow = jsondata["workflow"]
    client = _workflow + "_" + _client
    if name not in model.keys():
        _model = support_vector_machine()
        model[name] = _model
        app.logger.info("name key error ")
    if name not in preprocessor.keys():
        _preprocessor = DataPreprocessor()
        preprocessor[name] = _preprocessor
        app.logger.info("name key error ")
    train_start_time = time.time()
    app.logger.info("model start training")
    dataX, datay = DataPreprocessor().getData(client)
    trainX, testX, trainy, testy = train_test_split(dataX,
                                                    datay,
                                                    test_size=0.2)

    model[name].train(trainX, trainy)
    app.logger.info("training success")
    score = model[name].score(testX, testy)
    train_end_time = time.time()
    # payload = {"test R square": str(score),
    #            "result": "success",
    #            }
    res = {"start_time": train_start_time, "end_time": train_end_time}
    return jsonify(res)
Exemplo n.º 4
0
    mnist_training, mnist_validation, mnist_testing, USPS_mat, USPS_target)
softmax_score_mnist = accuracy_score(mnist_testing[1], y_test_soft)
cm = confusion_matrix(mnist_testing[1], y_test_soft)
print("Accuracy for MNIST Softmax: {}".format(softmax_score_mnist))
print("Confusion Matrix Softmax: \n{}".format(cm))

softmax_score_usps = accuracy_score(USPS_target, usps_y_soft)
cm = confusion_matrix(USPS_target, usps_y_soft)
print("Accuracy for MNIST Softmax: {}".format(softmax_score_usps))
print("Confusion Matrix Softmax: \n{}".format(cm))

print("Done with Logistic")

print("Starting SVM")
y_mnist_svm, y_usps_svm = svm.support_vector_machine(mnist_training,
                                                     mnist_testing, USPS_mat,
                                                     USPS_target)

# ##Uncomment to run SVM with Gamma = 1
# print("\n\tSVM with Gamma value set to 1")
# y_mnist_svm_1, y_usps_svm_1 = svm.support_vector_machine(mnist_training, mnist_testing, USPS_mat, USPS_target, gamma=1)
print("Done with SVM")

print("Starting Random Forest")
y_mnist_rf, y_usps_rf = random_forest.random_forest_implementation(
    mnist_training, mnist_testing, USPS_mat, USPS_target)
print("Done with Random Forest")

print("Starting Neural Network Classifier")
y_pred_mnist_nn, y_pred_usps_nn = neural_network_classifier.neural_net_implementation(
    mnist_training, mnist_testing, USPS_mat, USPS_target)
Exemplo n.º 5
0
def main():
    directory = "../Data/training_data"
    test_dir = "../Data/testing_data"

    test_features = []
    test_uni = []
    test_bi = []
    test_labels = []
    feature_vectors = []
    labels = []
    temp_features = []
    temp_labels = []
    unigrams = []
    bigrams = []

    feature_index = 0

    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            f = fileinput.input(os.path.join(directory, filename))

            for line in f:
                """ 
                    Stripping new line
                """
                line = line.rstrip('\n')

                line = line.replace(string.punctuation.replace('#', ''), '')

                line = " ".join(line.split())
                modified_line = line.replace('#L', '')
                tokens = modified_line.split()

                tag = nltk.pos_tag(tokens)

                for i in range(len(tag)):

                    if tag[i][0] not in pos_tag_dict:
                        modified_tag = pos_tag_label(tag[i][1])
                        pos_tag_dict[tag[i][0]] = modified_tag
                """ 
                Splitting on delimiter. Better to include stopwords
                
                """
                for word in line.split():
                    if word == "" or word == "#L":
                        continue
                    unigrams.append(word)

    for filename in os.listdir(test_dir):
        if filename.endswith(".txt"):
            f = fileinput.input(os.path.join(test_dir, filename))

            for line in f:
                """ 
                 Stripping new line
                 
                """
                line = line.rstrip('\n')
                line = line.replace(string.punctuation.replace('#', ''), '')
                modified_line = line.replace('#L', '')
                tokens = modified_line.split()

                tag = nltk.pos_tag(tokens)

                for i in range(len(tag)):

                    if tag[i][0] not in pos_tag_dict:
                        modified_tag = pos_tag_label(tag[i][1])
                        pos_tag_dict[tag[i][0]] = modified_tag
                """ 
                Splitting on delimiter. Better to include stopwords

                """
                for word in line.split():
                    if word == "" or word == "#L":
                        continue
                    test_uni.append(word)
    """
    Only form bigrams with unigrams that are not punctuations
    
    """

    for i in range(len(unigrams) - 1):
        bigrams.append(unigrams[i] + " " + unigrams[i + 1])
        mod_bigram = bigrams[-1].replace("#L", "")
        pos_tag_dict[mod_bigram] = 0

    all_words = unigrams + bigrams
    """
    doing nltk pos_tagging
    
    """
    labels = gen_labels(all_words)
    # print("Training")
    # print(collections.Counter(labels))

    features = generating_features(all_words, feature_index, feature_vectors,
                                   labels, prepositions, stop_words,
                                   location_tenders, substr, followed_by,
                                   nonlocation_nouns)
    """
    Only form bigrams with unigrams that are not punctuations
    
    """

    for i in range(len(test_uni) - 1):
        test_bi.append(test_uni[i] + " " + test_uni[i + 1])
        mod_bigram = test_bi[-1].replace("#L", "")
        pos_tag_dict[mod_bigram] = 0

    test_words = test_uni + test_bi

    test_labels = gen_labels(test_words)
    # print("Testing")
    # print(collections.Counter(test_labels))

    test_features = generating_features(test_words, feature_index,
                                        temp_features, test_labels,
                                        prepositions, stop_words,
                                        location_tenders, substr, followed_by,
                                        nonlocation_nouns)

    decision_tree_classifier(features, labels, test_features, test_labels)
    random_forest(features, labels, test_features, test_labels)
    support_vector_machine(features, labels, test_features, test_labels)
    logistic_regression(features, labels, test_features, test_labels)
    linear_regression(features, labels, test_features, test_labels)
    if (lca_flag):
        print("Doing LCA")
        train = lca(tf_idf_combine, tf_idf_training_matrix)
        test = lca(tf_idf_combine, tf_idf_testing_matrix)
    elif (pca_flag):
        print("Doing PCA")
        train = pca_preprocess(tf_idf_combine, tf_idf_training_matrix)
        test = pca_preprocess(tf_idf_combine, tf_idf_testing_matrix)
    else:
        print("Doing No Reduction")
        train = tf_idf_training_matrix
        test = tf_idf_testing_matrix
    
    if (svm_flag):
        print("Doing SVM")
        ans = svm.support_vector_machine(train, training_label, test, 0.01)
    elif (adaboo_flag):
        print("Doing Adaboost")
        ans = adaboost.adaboo(train, training_label, test)
    elif (rf_flag):
        print("Doing RF")
        ans = randomForest.r_forest(training_matrix, training_label, testing_matrix)
    elif (nb_flag):
        print("Doing Naive Bayes")
        ans = naiveBayes.nb(training_matrix, training_label, testing_matrix)
    elif (bagging_flag):
        print("Doing Bagging")
        ans =  baggingClassifier.bagging(training_matrix, training_label, testing_matrix)
    else:
        print("Invalid argument specified")