Exemplo n.º 1
0
def test_accuracy():
    y_true = ["0111001", "1001", "00011111", "010101011", "1110"]
    y_pred = ["0010010", "1001", "00011110", "010101011", "1110"]
    assert_equal(
        .6,
        whole_sequence_accuracy(''.join(y_true), ''.join(y_pred),
                                [len(y) for y in y_true]))
Exemplo n.º 2
0
def test_accuracy():
    y_true = ["0111001", "1001", "00011111", "010101011", "1110"]
    y_pred = ["0010010", "1001", "00011110", "010101011", "1110"]
    assert_equal(
        .6,
        whole_sequence_accuracy(''.join(y_true), ''.join(y_pred),
                                map(len, y_true)))
Exemplo n.º 3
0
def testHMM(clf, X_test, y_test):

    # Validation after training
    y_pred = clf.predict(X_test, [len(y_test)])

    print y_pred
    # # Final score
    print(whole_sequence_accuracy(y_test, y_pred, [len(y_test)]))
Exemplo n.º 4
0
def testHMM(clf, data):
    # Validation after training
    X_test, y_test, lengths_test = load_conll(data, features)
    y_pred = clf.predict(X_test, lengths_test)

    print y_pred
    # # Final score
    print(whole_sequence_accuracy(y_test, y_pred, lengths_test))
    open("finer-data/data/digitoday.2014.train.csv", "r"), features)

clf = StructuredPerceptron(decode="bestfirst", verbose=1, random_state=0)

print("Fitting model " + str(clf))
clf.fit(X_train, y_train, lengths_train)

print("\nPredictions on dev set")

# читаем отладочное множество
X_dev, y_dev, lengths_dev = load_conll(
    open("finer-data/data/digitoday.2014.dev.csv", "r"), features)
y_pred = clf.predict(X_dev, lengths_dev)

print("Whole seq accuracy    ",
      whole_sequence_accuracy(y_dev, y_pred, lengths_dev))
print("Element-wise accuracy ", accuracy_score(y_dev, y_pred))
print("Mean F1-score macro   ", f1_score(y_dev, y_pred, average="macro"))
print(classification_report(y_dev, y_pred))

print(pd.Series(y_pred).value_counts())

print("\nPredictions on test set")

# читаем тестовое множество
X_test, y_test, lengths_test = load_conll(
    open("finer-data/data/digitoday-fixed.2015.test.csv", "r"), features)
y_pred = clf.predict(X_test, lengths_test)
print("Whole seq accuracy    ",
      whole_sequence_accuracy(y_test, y_pred, lengths_test))
print("Element-wise accuracy ", accuracy_score(y_test, y_pred))
# читаем обучающее множество
X_train, y_train, lengths_train = load_conll(open("../resources/train.data", "r"), features)

clf = StructuredPerceptron(decode="viterbi", lr_exponent=.05, max_iter=30)

print("Fitting model " + str(clf))
clf.fit(X_train, y_train, lengths_train)

print("\nPredictions on dev set")

# читаем отладочное множество
X_dev, y_dev, lengths_dev = load_conll(open("../resources/dev.data", "r"), features)
y_pred = clf.predict(X_dev, lengths_dev)

print("Whole seq accuracy    ", whole_sequence_accuracy(y_dev, y_pred, lengths_dev))
print("Element-wise accuracy ", accuracy_score(y_dev, y_pred))
print("Mean F1-score macro   ", f1_score(y_dev, y_pred, average="macro"))

print("\nPredictions on test set")

# читаем тестовое множество
X_test, _, lengths_test = load_conll(open("../resources/test.data", "r"), features)
y_pred = clf.predict(X_test, lengths_test)

print(pd.Series(y_pred).value_counts())

print("Saving predicted as a submission")

with open("submission.csv", "w") as wf:
    wf.write("id,tag\n")
Exemplo n.º 7
0
def test_accuracy():
    y_true = ["0111001", "1001", "00011111", "010101011", "1110"]
    y_pred = ["0010010", "1001", "00011110", "010101011", "1110"]
    assert_equal(.6, whole_sequence_accuracy(''.join(y_true), ''.join(y_pred),
                                             [len(y) for y in y_true]))
Exemplo n.º 8
0
        # next word's length
        yield "next_len=" + str(get_word_len(next_))

        # last letters of the next word
        yield "next_last_letters=" + (next_[-4:] if len(next_) > 4 else next_)
        yield "next_word_shape=" + get_word_shape(next_)
        yield "next_short_word_shape=" + get_short_word_shape(next_)


# читаем обучающее множество
X_train, y_train, lengths_train = load_conll(
    open("resources/talbanken-stanford-1.2/talbanken-stanford-train.tsv", "r"), features)

clf = StructuredPerceptron(decode="viterbi", verbose=1, random_state=0)

print("Fitting model " + str(clf))
clf.fit(X_train, y_train, lengths_train)

print("\nPredictions on test set")

# читаем тестовое множество
X_test, y_test, lengths_test = load_conll(
    open("resources/talbanken-stanford-1.2/talbanken-stanford-test.tsv", "r"), features)
y_pred = clf.predict(X_test, lengths_test)
print("Whole seq accuracy    ", whole_sequence_accuracy(y_test, y_pred, lengths_test))
print("Element-wise accuracy ", accuracy_score(y_test, y_pred))
print("Mean F1-score macro   ", f1_score(y_test, y_pred, average="macro"))
print(classification_report(y_test, y_pred))

print(pd.Series(y_pred).value_counts())
				
				print("Running fold %d for set %d" % (cv, set))
				clf=hmm.GMMHMM(n_components=2,n_mix=4,n_iter=100)
				clf.fit(x, train_lengths)
				pred = [row for row in clf.predict(tx, test_lengths)]
				pred_last = []
				ty_last = []
				length_count = 0
				for i in range(0, len(test_lengths)):
					length_count += test_lengths[i]
					pred_last.append(pred[length_count - 1])
					ty_last.append(ty[length_count-1])																																																																																																																																	
				hmm_pred.append(pred)
				
				
				acc_ws_0 = whole_sequence_accuracy(ty, pred, test_lengths)
				acc_last_0 = accuracy_score(ty_last, pred_last)
				mcc_ws_0 = matthews_corrcoef(ty, pred)
				mcc_last_0 = matthews_corrcoef(ty_last, pred_last)

				
				acc_ws_1 = whole_sequence_accuracy([(z + 1)%2 for z in ty], pred, test_lengths)
				acc_last_1 = accuracy_score([(z + 1)%2 for z in ty_last], pred_last)
				mcc_ws_1 = matthews_corrcoef([(z + 1)%2 for z in ty], pred)
				mcc_last_1 = matthews_corrcoef([(z + 1)%2 for z in ty_last], pred_last)	
				
				if acc_last_0 > acc_last_1:
					acc_ws = acc_ws_0
					acc_last = acc_last_0
					mcc_ws = mcc_ws_0
					mcc_last = mcc_last_0
Exemplo n.º 10
0
def test_accuracy():
    y_true = ["0111001", "1001", "00011111", "010101011", "1110"]
    y_pred = ["0010010", "1001", "00011110", "010101011", "1110"]
    assert_equal(.6, whole_sequence_accuracy(''.join(y_true), ''.join(y_pred),
                                             map(len, y_true)))