Пример #1
0
def train(filename):
    oracle = Oracle(filename)
    print('oracle transitions parsing...')
    oracle.search_transitions()

    x = np.array(oracle.features)
    y = np.array(oracle.transitions)

    y = np.array([k['transition'] for k in y])
    #shift:0, right:1, left:2
    new_y = []
    for i in range(len(y)):
        if y[i] == 'shift':
            new_y.append([1, 0, 0])
        elif y[i] == 'right':
            new_y.append([0, 1, 0])
        elif y[i] == 'left':
            new_y.append([0, 0, 1])

    y = np.array(new_y)

    print('sparse encoding of features...')
    vocs, inverses = zip(*(np.unique(feature, return_inverse=True)
                           for feature in x.T))
    x_new = np.vstack(inverses).T
    x_new = np.squeeze(x_new)

    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(x_new, y)

    pickle.dump(vocs, open("vocs.p", "wb"))
    pickle.dump(clf, open("model.p", "wb"))
Пример #2
0
import numpy as np
import time
from multiprocessing import Pool

from Configuration import Configuration
from Oracle import Oracle
#from Model import TBP_AS_model

filename = "../UD_French-GSD/UD_French-GSD/fr_gsd-ud-train.conllu"
#filename = "../UD_French-GSD/UD_French-GSD/test.conllu"

oracle = Oracle(filename)
oracle.search_transitions()

x = np.array(oracle.features)
y = np.array(oracle.transitions)

y = np.array([k['transition'] for k in y])
#shift:0, right:1, left:2
new_y = []
for i in range(len(y)):
    if y[i] == 'shift':
        new_y.append([1, 0, 0])
    elif y[i] == 'right':
        new_y.append([0, 1, 0])
    elif y[i] == 'left':
        new_y.append([0, 0, 1])

y = np.array(new_y)
x.shape, y.shape