示例#1
0
def test_predictions(dataset, params):
    with open('test.pred', 'w') as file:
        for index, (label, features) in enumerate(dataset):
            x = feats_to_vec(features)
            pred = ll.predict(x, params)
            if not index == 0:
                file.write('\n')
            file.write(ut.I2L[pred])
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        x = feats_to_vec(features)
        y_pred = ll.predict(x, params)
        good += 1 if y_pred==L2I[label] else 0
        bad += 1 if y_pred!=L2I[label] else 0
    return good / (good + bad)
示例#3
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        y_prediction = ll.predict(features, params)
        if y_prediction == label:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
示例#4
0
def predict(trained_params, corpus, text_to_ngram, symbol_dict, label_dict):
    nx = len(symbol_dict)
    rev_label_dict = {v: k for k, v in label_dict.items()}
    for text in corpus:
        ngrams = Counter(text_to_ngram(text)).most_common()
        x = vectorize_utils.generate_vector(nx, ngrams, symbol_dict)
        label_int = ll.predict(x, trained_params)
        label_char = rev_label_dict[label_int]
        yield label_char
示例#5
0
def accuracy_on_dataset(dataset, params):
    good, bad = 0, 0
    for label, features in dataset:
        feature_vec = feats_to_vec(features)
        y_tag = ll.predict(feature_vec, params)
        if y_tag == L2I[label]:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
示例#6
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        prediction = ll.predict(features, params)
        if label == prediction:
            good += 1
        else:
            bad += 1
        pass
    return good / (good + bad)
示例#7
0
def get_tag(dataset, params):
    """
    Tgas the dataset based on trained params
    dataset: a list of (label, feature) pairs.
    params: list of parameters (initial values)
    """
    f = open('test.pred.ll', 'w')
    for label, features in dataset:
        predicted_label = ll.predict(feats_to_vec(features), params)
        f.write("%s\n" % utils.I2L[predicted_label])
    f.close()
示例#8
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        if ll.predict(features, params) == label:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
示例#9
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        x = feats_to_vec(features)
        y = ut.L2I[label]
        pred = ll.predict(x, params)
        if pred == y:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
示例#10
0
def test(test_data, params):
    prediction_file= open("test.pred", 'w')
    for label, features in test_data:
        x = feats_to_vec(features)  # convert features to a vector.
        pred = ll.predict(x, params)
        for key, val in ut.L2I.items():
            if val == pred:
                label = key
                break
        prediction_file.write(str(label) + "\n")
    prediction_file.close()
示例#11
0
def accuracy_on_dataset(dataset, params):
    good = total = 0.0
    for label, features in dataset:
        x = feats_to_vec(features)  # convert features to a vector.
        y = L2I.get(label)  # convert the label to number if needed.
        if ll.predict(x, params) == y:  # compare the prediction and the correct label
            good += 1
        total += 1
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
    return good / total
def pred(pred_data, params):
    """ Test classifier
    """

    I2L = {utils.L2I[l]: l for l in utils.L2I}

    with open("test.pred", "w+") as file:
        for features in pred_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y_hat = ll.predict(x, params)
            file.write(I2L[y_hat])
            file.write("\n")
示例#13
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        if utils.L2I[label] == ll.predict(feats_to_vec(features), params):
            good = good + 1
        else:
            bad = bad + 1
    return good / (good + bad)
示例#14
0
def run_test(test_data, params):
    pred_file = open("test.pred", 'w')
    for label, features in test_data:
        x = feats_to_vec(features)  # convert features to a vector.
        y_hat = ll.predict(x, params)
        #label = [label for label, num in ut.L2I.iteritems() if num == y_hat][0]
        for key, val in ut.L2I.items():  # for name, age in dictionary.iteritems():  (for Python 2.x)
            if val == y_hat:
                label = key
                break
        pred_file.write(str(label) + "\n")
    pred_file.close()
示例#15
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        pred = ll.predict(feats_to_vec(features), params)
        if pred == ut.L2I[label]:
            good += 1
        else:
            bad += 1
        pass
    return good / (good + bad)
示例#16
0
def accuracy_on_dataset(dataset, params):
    """
    Calculates accuracy by using loglinear predict.
    dataset: a list of (label, feature) pairs.
    params: list of parameters (initial values)
    """
    total = good = 0.0
    for label, features in dataset:
        total+=1
        predicted_label = ll.predict(feats_to_vec(features), params)
        if predicted_label == utils.L2I[label]:
            good += 1
    return float(good) / total
示例#17
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        x = feats_to_vec(features)  # convert features to a vector.
        y_hat = ll.predict(x, params)
        if y_hat == ut.L2I[label]:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
示例#18
0
def read_test_file():
    I2L()
    with open(
            r"C:\Users\bitro\OneDrive\שולחן העבודה\university\deep learning\test",
            "r") as rf:
        with open(
                r"C:\Users\bitro\OneDrive\שולחן העבודה\university\deep learning\out_test",
                "w") as wf:
            text = rf.readline()
            while text != '':
                data = text.split('\t')[1]
                prediction_num = ll.predict(feats_to_vec(
                    text_to_bigrams(data)))
                wf.write(I2L[prediction_num] + '\n')
示例#19
0
def create_predictions_file(data, parameters):
    file_predictions = open("test.pred", 'w')
    # list of languages
    languages_list = utils.L2I.items()
    for tag, features in data:
        x = feats_to_vec(features)  # convert features to a vector.
        predicted_language = ll.predict(x, parameters)
        for language, text in languages_list:  # for name, age in dictionary.iteritems():  (for Python 2.x)
            if predicted_language == text:
                tag = language
                break
        file_predictions.write(str(tag) + "\n")
        # close the file
    file_predictions.close()
示例#20
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        x = feats_to_vec(features)  # convert features to a vector.
        y = ut.L2I[label]  # convert the label to number if needed.
        pred = ll.predict(x, params)
        if (y == pred):
            good += 1
        else:
            bad += 1
            # Compute the accuracy (a scalar) of the current parameters
            # on the dataset.
            # accuracy is (correct_predictions / all_predictions)
    return good / (good + bad)
示例#21
0
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        feat_vec = feats_to_vec(features)
        y_hat = ll.predict(feat_vec, params)

        if label == y_hat:
            good += 1
        else:
            bad += 1
    return good / (good + bad)
示例#22
0
def test(parameters):
    """
    test classifier with test data - no labels

    params - the trained params
    """
    counter = 0
    test_ans = ''
    test_data = ut.read_data('test')
    for label, feature in test_data:
        pred = ll.predict(feats_to_vec(feature), parameters)
        for l, i in ut.L2I.items():
            if i == pred:
                test_ans = l
        counter += 1
        print 'line: ', counter, 'prediction: ', test_ans
示例#23
0
def accuracy_on_dataset(dataset, params):
    """
    calculates the accuracy of the prediction on a given data set
    :param dataset: bigrams of 2 letters and languages
    :param params, dataset
    :return: accuracy of the prediction by precentage
    """
    good = bad = 0.0
    for label, features in dataset:
        x = feats_to_vec(features)
        y = ut.L2I[label]
        # prediction returned the correct label
        if ll.predict(x, params) == y:
            good +=1
        else:
            bad += 1
    return good / (good + bad)
def create_test_pred_file(test_data, params):
    """
    creates a 'test.pred' file
    :param test_data: test data to be predicted
    :param params: trained params
    :return:
    """
    f = open("test.pred", 'w')
    for label, features in test_data:
        x = feats_to_vec(features)
        y_hat = ll.predict(x, params)
        for l, i in utils.L2I.items():
            if y_hat == i:
                label = l
                break
        f.write(label + "\n")
    f.close()
示例#25
0
def create_test_file(data_set, params):
    """
    create file with results of languages
    :param data_set: bigrams of 2 letters
    :param params
    :return: file with result
    """
    test_file = open("test.pred",'w')
    for l, features in data_set:
        x = feats_to_vec(features)
        index = ll.predict(x, params)
        for key, value in ut.L2I.items():
            if value == index:
                l = key
                break
        test_file.write(l+"\n")
    test_file.close()
def accuracy_on_dataset(dataset, params):
	good = bad = 0.0
	local_l2i = ut.L2I
	for label, features in dataset:
		feat_vec = feats_to_vec(features)
		y_hat = ll.predict(feat_vec, params)

		if local_l2i[label] == y_hat:
			good += 1
		else:
			bad += 1

	# Compute the accuracy (a scalar) of the current parameters
	# on the dataset.
	# accuracy is (correct_predictions / all_predictions)

	return good / (good + bad)
示例#27
0
def test(parameters):
    """
    test classifier with test data - no labels

    params - the trained params
    """
    fd = open("test.pred.ll", 'w')
    counter = 0
    test_ans = ''
    test_data = ut.read_data('test')
    for label, feature in test_data:
        pred = ll.predict(feats_to_vec(feature), parameters)
        for l, i in ut.L2I.items():
            if i == pred:
                test_ans = l
        counter += 1
        fd.write(test_ans + "\n")
    fd.close()
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0

    for label, features in dataset:

        # YOUR CODE HERE
        x = feats_to_vec(features)  # convert features to a vector.
        y = label  # convert the label to number if needed.

        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        if ll.predict(x, params) == y:
            good += 1
        else:
            bad += 1

    return good / (good + bad)
def accuracy_on_dataset(dataset, params):
    good = bad = 0.0
    for label, features in dataset:
        # YOUR CODE HERE
        # Compute the accuracy (a scalar) of the current parameters
        # on the dataset.
        # accuracy is (correct_predictions / all_predictions)
        
        
        label = L2I[label]
        features = feats_to_vec(features)
        
        if ll.predict(features, params) == label:
            good +=1
        else: bad +=1
        
        pass
    #print("accuracy: " +  str(good*100/(good + bad))+ "%")
    return good / (good + bad)
def test_predictions(test_data, params):
    import os

    # Clearing the content of the file if it already exists; Otherwise, creating the file.
    if os.path.exists("./test.pred"):
        os.remove("./test.pred")
    f = open("./test.pred", "a+")

    # For each example we find calculate the model prediction
    for label, features in test_data:
        x = feats_to_vec(features)

        # Get the index of the max log-probability
        prediction = ll.predict(x, params)

        # Write to the file
        f.write("{0}\n".format(index_to_language(prediction)))

    # Close the file.
    f.close()