def train(self, dataset, dev_dataset, step_size=0.1, max_epochs=50):

        model_acc = None
        self.Y = list(set([y for (y, x) in dataset]))

        # pour le moment, nous n'avons pas encore profité du corpus
        # de développement pour l'amélioration -> à ajouter

        for e in range(max_epochs):

            loss = 0.0
            e = 0
            for y, x in dataset:
                ypred = self.tag(x)
                if y != ypred:
                    loss += 1.0
                    delta_ref = SparseWeightVector.code_phi(x, y)
                    delta_pred = SparseWeightVector.code_phi(x, ypred)
                    self.model += step_size * (delta_ref - delta_pred)
                    # la moyenne du passé accumulé des poids
                    if not model_acc:
                        self.model_acc = self.model
                        e += 1
                    else:
                        model_acc += self.model
                        e += 1
                        self.model = model_acc * (1 / float(e))
            print("Loss (#errors) = ", loss)
            if loss == 0.0:
                return
Пример #2
0
    def train(self, dataset, step_size=0.1, max_epochs=50):

        model_acc = None
        self.Y = list(set([y for (y, x) in dataset]))

        for e in range(max_epochs):

            loss = 0.0
            e = 0
            for y, x in dataset:
                ypred = self.tag(x)
                if y != ypred:
                    loss += 1.0
                    delta_ref = SparseWeightVector.code_phi(x, y)
                    delta_pred = SparseWeightVector.code_phi(x, ypred)
                    self.model += step_size * (delta_ref - delta_pred)
                    if not model_acc:
                        self.model_acc = self.model
                        e += 1
                    else:
                        model_acc += self.model
                        e += 1
                        self.model = model_acc * (1 / float(e))
            print("Loss (#errors) = ", loss)
            if loss == 0.0:
                return
Пример #3
0
    def train(self,dataset,step_size=1.0,max_epochs=100):
        """
        @param dataset: a list of couples (y_tags,x_words)
        """
        self.Y = list(set([y for (ytags,xwords) in dataset for y in ytags]))

        N = len(dataset)
        for e in range(max_epochs):
            
            loss = 0.0
            for ytags,xwords in dataset:

                ypreds = self.tag(xwords)
                
                if ypreds != ytags:
                    loss += 1.0 

                    ytags_bigrams = list(zip([self.source_tag]+ytags,ytags))
                    ypreds_bigrams= list(zip([self.source_tag]+ypreds,ypreds))

                    delta_pred = SparseWeightVector()
                    for y,x in zip(ypreds_bigrams,xwords):
                        delta_pred += SparseWeightVector.code_phi(x,y)

                    delta_ref = SparseWeightVector()
                    for y,x in zip(ytags_bigrams,xwords):
                        delta_ref += SparseWeightVector.code_phi(x,y)
                    
                    self.model += step_size*(delta_ref-delta_pred)
                                    
            print('Loss = ',loss, "Sequence accurracy = ",(N-loss)/N)
            if loss == 0:
                return
Пример #4
0
    def train(self, dataset, step_size=0.1, max_epochs=100):
        #Maximizes log likelihood

        self.Y = list(set([y for (y, x) in dataset]))

        for e in range(max_epochs):  #Batch gradient ascent

            delta_ref = SparseWeightVector()
            delta_pred = SparseWeightVector()

            loss = 0.0
            for y, x in dataset:

                delta_ref += SparseWeightVector.code_phi(x, y)

                preds = self.predict(x)

                for idx, c in enumerate(self.Y):
                    delta_pred += SparseWeightVector.code_phi(x,
                                                              c) * preds[idx]

                loss += log(preds[self.Y.index(y)])

            self.model += step_size * (delta_ref - delta_pred)
            print('Loss (log likelihood) = ', loss)
Пример #5
0
    def train(self, dataset, step_size=1.0, max_epochs=100):
        """
        @param dataset : a list of dependency trees
        """
        N = len(dataset)
        for e in range(max_epochs):
            loss = 0.0
            for ref_tree in dataset:
                tokens = ref_tree.tokens
                pred_tree = self.parse_one(tokens)

                if ref_tree.accurracy(pred_tree) != 1.0:
                    loss += 1.0

                    delta_ref = SparseWeightVector()
                    for gov_idx, dep_idx in ref_tree.edges:
                        x_repr = self.__make_arc_representation(
                            gov_idx, dep_idx, tokens)
                        ylabel = ArcFactoredParser.RIGHTARC if gov_idx < dep_idx else ArcFactoredParser.LEFTARC
                        delta_ref += SparseWeightVector.code_phi(
                            x_repr, ylabel)

                    delta_pred = SparseWeightVector()
                    for gov_idx, dep_idx in pred_tree.edges:
                        x_repr = self.__make_arc_representation(
                            gov_idx, dep_idx, tokens)
                        ylabel = ArcFactoredParser.RIGHTARC if gov_idx < dep_idx else ArcFactoredParser.LEFTARC
                        delta_pred += SparseWeightVector.code_phi(
                            x_repr, ylabel)

                    self.model += step_size * (delta_ref - delta_pred)
            print('Loss = ', loss, "%Exact match = ", (N - loss) / N)
            if loss == 0.0:
                return
Пример #6
0
    def static_train(self, treebank, step_size=1.0, max_epochs=100):
        """
        Trains a model with a static oracle
        @param treebank : a list of dependency trees
        """
        dataset = []
        for dtree in treebank:
            dataset.extend(self.static_oracle_derivation(dtree))
        N = len(dataset)
        for e in range(max_epochs):
            loss = 0.0
            for ref_config, ref_action, tokens in dataset:
                pred_config, pred_action = self.predict_local(
                    ref_config, tokens)
                if ref_action != pred_action:
                    loss += 1.0
                    delta_ref = SparseWeightVector()
                    S, B, A, score = ref_config
                    x_repr = self.__make_config_representation(S, B, tokens)
                    delta_ref += SparseWeightVector.code_phi(
                        x_repr, ref_action)

                    delta_pred = SparseWeightVector()
                    S, B, A, score = ref_config
                    x_repr = self.__make_config_representation(S, B, tokens)
                    delta_pred += SparseWeightVector.code_phi(
                        x_repr, pred_action)

                    self.model += step_size * (delta_ref - delta_pred)
            print('Loss = ', loss, "%Local accurracy = ", (N - loss) / N)
            if loss == 0.0:
                return
Пример #7
0
    def dynamic_train(self, treebank, step_size=1.0, max_epochs=100):

        ACTIONS = [ArcEagerTransitionParser.LEFTARC,ArcEagerTransitionParser.RIGHTARC,\
                   ArcEagerTransitionParser.SHIFT,ArcEagerTransitionParser.REDUCE,\
                   ArcEagerTransitionParser.TERMINATE]

        N = len(treebank)
        for e in range(max_epochs):
            loss, total = 0, 0
            for dtree in treebank:
                ref_arcs = set(dtree.edges)
                n = len(dtree.tokens)
                C = ((0, ), tuple(range(1, n)), tuple(), 0.0
                     )  #A config is a hashable quadruple with score
                action = None
                while action != ArcEagerTransitionParser.TERMINATE:
                    pred_config, pred_action = self.predict_local(
                        C, dtree.tokens)
                    optimal_actions = list([
                        a for a in ACTIONS
                        if self.dynamic_oracle(C, a, ref_arcs)
                    ])
                    total += 1
                    if pred_action not in optimal_actions:
                        loss += 1
                        optimal_config, optimal_action = self.predict_local(
                            C, dtree.tokens, allowed=optimal_actions)
                        delta_ref = SparseWeightVector()
                        S, B, A, score = C
                        x_repr = self.__make_config_representation(
                            S, B, dtree.tokens)
                        delta_ref += SparseWeightVector.code_phi(
                            x_repr, optimal_action)

                        delta_pred = SparseWeightVector()
                        S, B, A, score = C
                        x_repr = self.__make_config_representation(
                            S, B, dtree.tokens)
                        delta_pred += SparseWeightVector.code_phi(
                            x_repr, pred_action)

                        self.model += step_size * (delta_ref - delta_pred)

                    action = self.choose(pred_action, optimal_actions)

                    if action == ArcEagerTransitionParser.SHIFT:
                        C = self.shift(C, dtree.tokens)
                    elif action == ArcEagerTransitionParser.LEFTARC:
                        C = self.leftarc(C, dtree.tokens)
                    elif action == ArcEagerTransitionParser.RIGHTARC:
                        C = self.rightarc(C, dtree.tokens)
                    elif action == ArcEagerTransitionParser.REDUCE:
                        C = self.reduce_config(C, dtree.tokens)
                    elif action == ArcEagerTransitionParser.TERMINATE:
                        C = self.terminate(C, dtree.tokens)
            print('Loss = ', loss, "%Local accurracy = ",
                  (total - loss) / total)
            if loss == 0.0:
                return
Пример #8
0
 def __init__(self):
     """
     @param weights: a SparseWeightVector
     @param non_terminals: an ordered list of non terminals
     """
     self.model = SparseWeightVector()
     self.nonterminals_decode = [] #maps integers to symbols
     self.nonterminals_code   = {} #maps symbols to integers
    def train(self,
              treebank,
              step_size=1.0,
              max_epochs=100,
              beam_size=4,
              left_markov=True):
        """         
        @param treebank a list of ConsTrees
        @param left_markov: if true -> left markovization else right markovization
        """
        self.transform(treebank, left_markov)
        dataset = list([(tree.tokens(), self.reference_derivation(tree))
                        for tree in treebank])
        N = len(dataset)
        for e in range(max_epochs):
            loss = 0.0
            for sentence, ref_derivation in dataset:
                pred_beam = (self.parse_one(sentence, get_beam=True))
                (update, ref_prefix,
                 pred_prefix) = self.early_prefix(ref_derivation, pred_beam)
                if update:
                    loss += 1.0
                    delta_ref = SparseWeightVector()
                    current_config = ref_prefix[0][1]
                    for action, config in ref_prefix[1:]:
                        S, B, score = current_config
                        x_repr = self.__make_config_representation(
                            S, B, sentence)
                        delta_ref += SparseWeightVector.code_phi(
                            x_repr, action)
                        current_config = config

                    delta_pred = SparseWeightVector()
                    current_config = pred_prefix[0][1]
                    for action, config in pred_prefix[1:]:
                        S, B, score = current_config
                        x_repr = self.__make_config_representation(
                            S, B, sentence)
                        delta_pred += SparseWeightVector.code_phi(
                            x_repr, action)
                        current_config = config

                    self.model += step_size * (delta_ref - delta_pred)

            print('Loss = ', loss, "%Exact match = ", (N - loss) / N)
            if loss == 0.0:
                return
Пример #10
0
    def train(self, dataset, step_size=1.0, max_epochs=100, beam_size=4):
        """
        @param dataset : a list of dependency trees
        """
        N = len(dataset)
        sequences = list([(dtree.tokens, self.oracle_derivation(dtree))
                          for dtree in dataset])

        for e in range(max_epochs):
            loss = 0.0
            for tokens, ref_derivation in sequences:
                pred_beam = self.parse_one(tokens, beam_size, get_beam=True)
                (update, ref_prefix,
                 pred_prefix) = self.early_prefix(ref_derivation, pred_beam)
                #print('R',ref_derivation)
                #print('P',pred_prefix)
                #self.test(dataset,beam_size)

                if update:
                    #print (pred_prefix)
                    loss += 1.0
                    delta_ref = SparseWeightVector()
                    current_config = ref_prefix[0][1]
                    for action, config in ref_prefix:
                        S, B, A, score = current_config
                        x_repr = self.__make_config_representation(
                            S, B, tokens)
                        delta_ref += SparseWeightVector.code_phi(
                            x_repr, action)
                        current_config = config

                    delta_pred = SparseWeightVector()
                    current_config = pred_prefix[0][1]
                    for action, config in pred_prefix:
                        S, B, A, score = current_config
                        x_repr = self.__make_config_representation(
                            S, B, tokens)
                        delta_pred += SparseWeightVector.code_phi(
                            x_repr, action)
                        current_config = config

                    self.model += step_size * (delta_ref - delta_pred)
            print('Loss = ', loss, "%Exact match = ", (N - loss) / N)
            if loss == 0.0:
                return
Пример #11
0
    def train(self, dataset, step_size=0.1, max_epochs=100):
        """
        @param dataset: a list of couples (y_tags,x_words)
        """
        self.Y = list(set([y for (ytags, xwords) in dataset for y in ytags]))

        #pre-computes delta_ref (first term of the gradient is constant)
        delta_ref = SparseWeightVector()
        for ytags, xwords in dataset:
            ytags_bigrams = list(zip([self.source_tag] + ytags, ytags))
            for x, y in zip(xwords, ytags_bigrams):
                delta_ref += SparseWeightVector.code_phi(x, y)

        for e in range(max_epochs):

            loss = 0.0
            delta_pred = SparseWeightVector()

            for ytags, xwords in dataset:
                N = len(xwords)
                K = len(self.Y)
                alphas, Z = self.forward(xwords)
                betas, _ = self.backward(xwords)

                #forward-backward at init
                for ytag in range(K):
                    prob = (self.score(self.source_tag, self.Y[ytag],
                                       xwords[0]) * betas[0, ytag]) / Z
                    delta_pred += prob * SparseWeightVector.code_phi(
                        xwords[0], (self.source_tag, self.Y[ytag]))
                #forward-backward loop
                for i in range(1, N):
                    for yprev in range(K):
                        for ytag in range(K):
                            prob = (alphas[i - 1, yprev] * self.score(
                                self.Y[yprev], self.Y[ytag], xwords[i]) *
                                    betas[i, ytag]) / Z
                            delta_pred += prob * SparseWeightVector.code_phi(
                                xwords[i], (self.Y[yprev], self.Y[ytag]))

                loss += log(self.sequence_score(ytags, xwords) / Z)

            print('Log likelihood(D) = ', loss)
            self.model += step_size * (delta_ref - delta_pred)
Пример #12
0
    def train(self, dataset, dev, step_size=0.1, max_epochs=20):

        self.Y = list(set([y for (y, x) in dataset]))
        dev_accs = []
        train_accs = []
        train_losses = []
        T = 1.
        avg_cumul = SparseWeightVector()
        for e in range(max_epochs):
            loss = 0.0
            random.shuffle(dataset)
            for y, x in dataset:
                ypred = self.tag(x)
                if y != ypred:
                    loss += 1.0
                    delta_ref = SparseWeightVector.code_phi(x, y)
                    delta_pred = SparseWeightVector.code_phi(x, ypred)
                    update = step_size * (delta_ref - delta_pred)
                    self.model += update
                    avg_cumul += T * update
                    T += 1
            # Calculate accuracy
            if len(dataset) != 0:
                acc = (len(dataset) - loss) / len(dataset)
            else:
                acc = (len(dataset) - loss)
            train_accs.append(acc)
            # Stock Loss
            train_losses.append(loss)
            # Update Avg Perceptron
            self.model_avg = self.model - avg_cumul / T
            # Calculate acc for dev corpus
            dev_acc = self.test(dev, True)
            dev_accs.append(dev_acc)
            # Print loss et acc
            print("Epoch = " + str(e) + ", Loss (#errors) = " + str(loss) +
                  ", Accuracy = " + str(acc * 100) + ", Dev acc = " +
                  str(dev_acc * 100))
            # Stop if loss null
            if loss == 0.0:
                return train_losses, train_accs, dev_accs
        return train_losses, train_accs, dev_accs
class AvgPerceptron:
    def __init__(self):

        self.model = SparseWeightVector()
        self.Y = []  #classes

    def train(self, dataset, dev_dataset, step_size=0.1, max_epochs=50):

        model_acc = None
        self.Y = list(set([y for (y, x) in dataset]))

        # pour le moment, nous n'avons pas encore profité du corpus
        # de développement pour l'amélioration -> à ajouter

        for e in range(max_epochs):

            loss = 0.0
            e = 0
            for y, x in dataset:
                ypred = self.tag(x)
                if y != ypred:
                    loss += 1.0
                    delta_ref = SparseWeightVector.code_phi(x, y)
                    delta_pred = SparseWeightVector.code_phi(x, ypred)
                    self.model += step_size * (delta_ref - delta_pred)
                    # la moyenne du passé accumulé des poids
                    if not model_acc:
                        self.model_acc = self.model
                        e += 1
                    else:
                        model_acc += self.model
                        e += 1
                        self.model = model_acc * (1 / float(e))
            print("Loss (#errors) = ", loss)
            if loss == 0.0:
                return

    def predict(self, dataline):
        return list([self.model.dot(dataline, c) for c in self.Y])

    def tag(self, dataline):

        scores = self.predict(dataline)
        imax = scores.index(max(scores))
        return self.Y[imax]

    def test(self, dataset):

        result = list([(y == self.tag(x)) for y, x in dataset])
        return sum(result) / len(result)
Пример #14
0
class MultinomialLogistic:
    def __init__(self):

        self.model = SparseWeightVector()
        self.Y = []  #classes

    def train(self, dataset, step_size=0.1, max_epochs=100):
        #Maximizes log likelihood

        self.Y = list(set([y for (y, x) in dataset]))

        for e in range(max_epochs):  #Batch gradient ascent

            delta_ref = SparseWeightVector()
            delta_pred = SparseWeightVector()

            loss = 0.0
            for y, x in dataset:

                delta_ref += SparseWeightVector.code_phi(x, y)

                preds = self.predict(x)

                for idx, c in enumerate(self.Y):
                    delta_pred += SparseWeightVector.code_phi(x,
                                                              c) * preds[idx]

                loss += log(preds[self.Y.index(y)])

            self.model += step_size * (delta_ref - delta_pred)
            print('Loss (log likelihood) = ', loss)

    def predict(self, dataline):

        probs = list([exp(self.model.dot(dataline, c)) for c in self.Y])
        Z = sum(probs)
        probs = list([p / Z for p in probs])
        return probs

    def tag(self, dataline):

        probs = self.predict(dataline)
        imax = probs.index(max(probs))
        return self.Y[imax]

    def test(self, dataset):

        result = list([(y == self.tag(x)) for y, x in dataset])
        return sum(result) / len(result)
Пример #15
0
class MultiClassPerceptron:
    def __init__(self):

        self.model = SparseWeightVector()
        self.Y = []  #classes

    def train(self, dataset, step_size=0.1, max_epochs=50):

        model_acc = None
        self.Y = list(set([y for (y, x) in dataset]))

        for e in range(max_epochs):

            loss = 0.0
            e = 0
            for y, x in dataset:
                ypred = self.tag(x)
                if y != ypred:
                    loss += 1.0
                    delta_ref = SparseWeightVector.code_phi(x, y)
                    delta_pred = SparseWeightVector.code_phi(x, ypred)
                    self.model += step_size * (delta_ref - delta_pred)
                    if not model_acc:
                        self.model_acc = self.model
                        e += 1
                    else:
                        model_acc += self.model
                        e += 1
                        self.model = model_acc * (1 / float(e))
            print("Loss (#errors) = ", loss)
            if loss == 0.0:
                return

    def predict(self, dataline):
        return list([self.model.dot(dataline, c) for c in self.Y])

    def tag(self, dataline):

        scores = self.predict(dataline)
        imax = scores.index(max(scores))
        return self.Y[imax]

    def test(self, dataset):

        result = list([(y == self.tag(x)) for y, x in dataset])
        return sum(result) / len(result)
Пример #16
0
    def train(self,treebank,step_size=1.0,max_epochs=100,left_markov=True):
        """
        Trains the parser with a structured perceptron
        @param: treebank a list of ConsTrees
        @param: left_markov :uses left markovization or right markovization 
        """
        
        self.transform(treebank,left_markov) #binarizes the treebank
        #makes a (x,y) pattern for the data set 
        dataset = list( [(ViterbiCKY.ngram_tokens(tree.tokens()),self.tree_as_edges(tree)) for tree in treebank] )
        
        N = len(dataset)
        
        for e in range(max_epochs):
            loss = 0.0
            for sentence,ref_edges in dataset:
                pred_edges = set(self.parse_one(sentence,edges=True))
                ref_edges  = set(ref_edges) 
                if pred_edges != ref_edges: #update
                    loss += 1.0
                    delta_ref = SparseWeightVector()
                    for r_edge in ref_edges:
                        if len(r_edge) == 3:
                            (i,j,Nroot),(i,k,Nl),(k,j,Nr) = r_edge
                            x_repr = self.__make_edge_representation(i,j,k,sentence)
                            delta_ref += SparseWeightVector.code_phi(x_repr,(Nroot,Nl,Nr))
                        elif len(r_edge) == 2:
                            (i,j,pos),widx = r_edge
                            x_repr = self.__make_unary_representation(widx,sentence)
                            delta_ref += SparseWeightVector.code_phi(x_repr,pos)

                    delta_pred = SparseWeightVector()
                    for p_edge in pred_edges:
                        if len(p_edge) == 3:
                            (i,j,Nroot),(i,k,Nl),(k,j,Nr) = p_edge
                            x_repr = self.__make_edge_representation(i,j,k,sentence)
                            delta_pred += SparseWeightVector.code_phi(x_repr,(Nroot,Nl,Nr))
                        elif len(p_edge) == 2:
                            (i,j,pos),widx = p_edge
                            x_repr = self.__make_unary_representation(widx,sentence)
                            delta_pred += SparseWeightVector.code_phi(x_repr,pos)

                    self.model += step_size * (delta_ref-delta_pred)
            print('Loss = ',loss, "%Local accurracy = ",(N-loss)/N)
            if loss == 0.0:
                return
Пример #17
0
class ArcFactoredParser:

    LEFTARC = "L"
    RIGHTARC = "R"

    def __init__(self):

        self.model = SparseWeightVector()

    def _argmax(self, x, y, argmax, argvalue):
        """
        computes m = max(x,y) and updates prev argmax by argvalue if max(x,y) = y
        @param x: number (current max)
        @param y: number
        @param argmax: the current argmax
        @param argvalue: the potential argmax
        @return (max,argmax) a couple with the current max and argmax
        """
        if y > x:
            return (y, argvalue)
        return (x, argmax)

    def parse_one(self, sentence):
        """
        @param sentence: a list of tokens as encoded in a dependency
        tree (first token is a dummy root token)
        @return : a DependencyTree Object 
        """
        COMPLETE, INCOMPLETE = 1, 0
        LEFTARROW, RIGHTARROW = 0, 1

        N = len(sentence)
        chart = np.zeros((N, N, 2, 2))
        history = {}

        #recurrence
        for span_length in range(1, N):
            for i in range(N - span_length):
                j = i + span_length
                #incomplete
                max_left, max_right = -inf, -inf
                amax_left, amax_right = i, i
                for k in range(i, j):
                    tmp_score = chart[i][k][RIGHTARROW][COMPLETE] \
                              + chart[k+1][j][LEFTARROW][COMPLETE] \
                              + self.score(j,i,sentence)
                    max_left, amax_left = self._argmax(max_left, tmp_score,
                                                       amax_left, k)
                    tmp_score = chart[i][k][RIGHTARROW][COMPLETE] \
                              + chart[k+1][j][LEFTARROW][COMPLETE] \
                              + self.score(i,j,sentence)
                    max_right, amax_right = self._argmax(
                        max_right, tmp_score, amax_right, k)
                chart[i][j][LEFTARROW][INCOMPLETE] = max_left
                chart[i][j][RIGHTARROW][INCOMPLETE] = max_right
                history[(i, j, LEFTARROW, INCOMPLETE)] = [
                    (i, amax_left, RIGHTARROW, COMPLETE),
                    (amax_left + 1, j, LEFTARROW, COMPLETE)
                ]
                history[(i, j, RIGHTARROW, INCOMPLETE)] = [
                    (i, amax_right, RIGHTARROW, COMPLETE),
                    (amax_right + 1, j, LEFTARROW, COMPLETE)
                ]

                #complete
                max_left, max_right = -inf, -inf
                amax_left, amax_right = i, i
                for k in range(i, j):
                    max_left, amax_left = self._argmax(
                        max_left, chart[i][k][LEFTARROW][COMPLETE] +
                        chart[k][j][LEFTARROW][INCOMPLETE], amax_left, k)
                for k in range(i + 1, j + 1):
                    max_right, amax_right = self._argmax(
                        max_right, chart[i][k][RIGHTARROW][INCOMPLETE] +
                        chart[k][j][RIGHTARROW][COMPLETE], amax_right, k)
                chart[i][j][LEFTARROW][COMPLETE] = max_left
                chart[i][j][RIGHTARROW][COMPLETE] = max_right
                history[(i, j, LEFTARROW,
                         COMPLETE)] = [(i, amax_left, LEFTARROW, COMPLETE),
                                       (amax_left, j, LEFTARROW, INCOMPLETE)]
                history[(i, j, RIGHTARROW,
                         COMPLETE)] = [(i, amax_right, RIGHTARROW, INCOMPLETE),
                                       (amax_right, j, RIGHTARROW, COMPLETE)]

        #backtrace (collects edges of the dependency tree)
        edges = []
        agenda = [(0, N - 1, RIGHTARROW, COMPLETE)]
        while agenda:
            current_item = agenda.pop()
            (i, j, direction, c) = current_item
            if c == INCOMPLETE and i != j:
                if direction == LEFTARROW:
                    edges.append((j, i))
                elif direction == RIGHTARROW:
                    edges.append((i, j))
            if current_item in history:
                agenda.extend(history[current_item])

        return DependencyTree(tokens=sentence, edges=edges)

    def score(self, gov_idx, dep_idx, toklist):
        """
        @param gov_idx,dep_idx : the indexes of the governor and
        dependant in the sentence
        @toklist: the list of tokens of the sentence
        @return : a float (score)
        """
        dep_repr = self.__make_arc_representation(gov_idx, dep_idx, toklist)
        ylabel = ArcFactoredParser.RIGHTARC if gov_idx < dep_idx else ArcFactoredParser.LEFTARC
        return self.model.dot(dep_repr, ylabel)

    def __make_arc_representation(self, gov_idx, dep_idx, toklist):
        """
        Creates a list of values from which to code a dependency arc as binary features 
        Inserts the interactions between the words for coding the dependency in the representation.
        
        @param gov_idx,dep_idx : the indexes of the governor and dependant in the sentence
        @toklist: the list of tokens of the sentence
        @return : a list of tuples ready to be binarized and scored.
        """
        interaction1 = (
            toklist[gov_idx][1],
            toklist[dep_idx][1],
        )
        interaction2 = (
            toklist[gov_idx][0],
            toklist[dep_idx][0],
        )
        #add more interactions here to improve the parser

        return toklist[gov_idx] + toklist[dep_idx] + (interaction1, ) + (
            interaction2, )

    def train(self, dataset, step_size=1.0, max_epochs=100):
        """
        @param dataset : a list of dependency trees
        """
        N = len(dataset)
        for e in range(max_epochs):
            loss = 0.0
            for ref_tree in dataset:
                tokens = ref_tree.tokens
                pred_tree = self.parse_one(tokens)

                if ref_tree.accurracy(pred_tree) != 1.0:
                    loss += 1.0

                    delta_ref = SparseWeightVector()
                    for gov_idx, dep_idx in ref_tree.edges:
                        x_repr = self.__make_arc_representation(
                            gov_idx, dep_idx, tokens)
                        ylabel = ArcFactoredParser.RIGHTARC if gov_idx < dep_idx else ArcFactoredParser.LEFTARC
                        delta_ref += SparseWeightVector.code_phi(
                            x_repr, ylabel)

                    delta_pred = SparseWeightVector()
                    for gov_idx, dep_idx in pred_tree.edges:
                        x_repr = self.__make_arc_representation(
                            gov_idx, dep_idx, tokens)
                        ylabel = ArcFactoredParser.RIGHTARC if gov_idx < dep_idx else ArcFactoredParser.LEFTARC
                        delta_pred += SparseWeightVector.code_phi(
                            x_repr, ylabel)

                    self.model += step_size * (delta_ref - delta_pred)
            print('Loss = ', loss, "%Exact match = ", (N - loss) / N)
            if loss == 0.0:
                return

    def test(self, dataset):
        N = len(dataset)
        sum_acc = 0.0
        for ref_tree in dataset:
            tokens = ref_tree.tokens
            pred_tree = self.parse_one(tokens)
            print(pred_tree)
            print()
            sum_acc += ref_tree.accurracy(pred_tree)
        return sum_acc / N
Пример #18
0
    def __init__(self):

        self.model = SparseWeightVector()
        self.Y = []  #classes
Пример #19
0
 def __init__(self):
     self.model = SparseWeightVector()
Пример #20
0
class ViterbiCKY:
    """
    This implements a CKY parser with perceptron scoring.
    """
    def __init__(self):
        """
        @param weights: a SparseWeightVector
        @param non_terminals: an ordered list of non terminals
        """
        self.model = SparseWeightVector()
        self.nonterminals_decode = [] #maps integers to symbols
        self.nonterminals_code   = {} #maps symbols to integers

    def transform(self,dataset,left_markov = True):
        """
        In place (destructive) conversion of a treebank to Chomsky Normal Form.
        Builds the list of the parser nonterminals as a side effect
        and indexes references trees.
         
        @param dataset a list of ConsTrees
        @param left_markov: if true -> left markovization else right markovization
        """
        all_nonterminals = set()
        for tree in dataset:
            tree.close_unaries()
            if left_markov:
                tree.left_markovize()
            else:
                tree.right_markovize()
            all_nonterminals.update(tree.collect_nonterminals()) 
        self.nonterminals_decode = list(all_nonterminals)
        self.nonterminals_code = dict(zip(self.nonterminals_decode,range(len(self.nonterminals_decode))))
        for tree in dataset:
            tree.index_leaves()

    @staticmethod
    def ngram_tokens(toklist):
        """
        Returns the input with additional fields for ngrams based features
        """
        BOL = '@@@'
        EOL = '$$$'
        wordlist = [BOL] + toklist + [EOL]
        word_trigrams = list(zip(wordlist,wordlist[1:],wordlist[2:]))
        return list(zip(wordlist[1:-1],word_trigrams))

    def score_edge(self,Nroot,Nleft,Nright,i,j,k,sentence):
        """
        Scores an edge.
        @param Nroot,Nleft,Nright : the root node, the left node and
        the right node
        @param i,j,k: the i,j,k indexes of the clique to be scored
        @param sentence: the list of words to be parsed
        @return a perceptron score
        """
        edge_repr = self.__make_edge_representation(i,j,k,sentence)
        return self.model.dot(edge_repr,(Nroot,Nleft,Nright))
        
    def score_unary(self,Nroot,word_idx,sentence):
        """
        Scores an unary edge
        @param Nroot: the pos tag of the word
        @param word_idx: the index of the word in the sentence
        @param sentence: the sentence
        """
        unary_repr = self.__make_unary_representation(word_idx,sentence)
        return self.model.dot(unary_repr,Nroot)

    def __make_edge_representation(self,i,j,k,sentence):
        """
        Builds features for an edge.
        @param i,j,k: the i,j,k indexes of the clique to be scored
        """
        return [sentence[i],sentence[k-1],sentence[k],sentence[j-1]]

    def __make_unary_representation(self,word_idx,sentence):
        """
        Builds features for an unary reduction.
        """
        return [sentence[word_idx]]

    def __build_tree(self,root_vertex,tree_root,history,sentence):
        """
        Builds a parse tree from a chart history
        @param root_vertex: the tuple (i,j,label) encoding a vertex
        @param tree_root: the current ConsTree root
        @param history: the parse forest
        @param sentence: the list of words to be parsed
        @return the root of the tree
        """
        (i,k,labelL),(k,j,labelR) = history[root_vertex]
        left  = ConsTree(self.nonterminals_decode[labelL])
        right = ConsTree(self.nonterminals_decode[labelR])
        tree_root.children = [left,right]
        if k-i > 1: 
            self.__build_tree((i,k,labelL),left,history,sentence)
        else:
            left.add_child(ConsTree(sentence[i][0]))
        if j-k > 1:
            self.__build_tree((k,j,labelR),right,history,sentence)
        else:
            right.add_child(ConsTree(sentence[k][0]))
        return tree_root

    def __build_edges(self,root_vertex,history):
        """
        Extracts the hyperedges of the best tree from the chart and history
        (method useful for running the update while training)
        """
        left,right = history[root_vertex]
        result = [(root_vertex,left,right)]
        
        i,k,lblA = left
        k,j,lblB = right
        if k-i == 1:
            result.append((left,i))
        else:
            result.extend(self.__build_edges(left,history))
        if j-k == 1:
            result.append((right,k))
        else:
            result.extend(self.__build_edges(right,history))
        return result

    def parse_one(self,sentence,untransform=True,edges=False):
        """
        Parses a sentence with the cky viterbi algorithm
        @param sentence: a list of word strings
        @param untransform: if true, untransforms the result
        @param edges: if true returns hyperedges instead of a parse tree
        @return a ConsTree
        """
        N = len(sentence)
        G = len(self.nonterminals_decode) #num nonterminal symbols
        chart = np.empty([N,N+1,G])
        chart.fill(-inf)       # 0 for perceptron
        history = {}
        
        for i in range(N):#init (= part of speech tagging)
            for Nt in range(G):
                chart[i,i+1,Nt] = self.score_unary(Nt,i,sentence)
                                
        for span in range(2,N+1):#recurrence
            for i in range(N+1-span):
                j = i+span
                for Nt in range(G):
                    for k in range(i+1,j):
                        for Nt1 in range(G):
                            for Nt2 in range(G):
                                score = chart[i,k,Nt1]+chart[k,j,Nt2]+self.score_edge(Nt,Nt1,Nt2,i,j,k,sentence)
                                if score > chart[i,j,Nt]:
                                    chart[i,j,Nt] = score
                                    history[(i,j,Nt)] = ((i,k,Nt1),(k,j,Nt2))
                                    
        #Finds the max
        max_succ,argmax_succ = chart[0,N,0],(0,N,0)
        for Nt in range(1,G):
            if chart[0,N,Nt] > max_succ:
                max_succ,argmax_succ = chart[0,N,Nt],(0,N,Nt)
                
        i,j,label = argmax_succ
        if edges:#returns a list of hyperedges, useful for updates
            return self.__build_edges(argmax_succ,history)
        else:#builds a ConsTree
            result =  self.__build_tree(argmax_succ,ConsTree(self.nonterminals_decode[label]),history,sentence)
            if untransform:
                result.unbinarize()
                result.expand_unaries()
            return result
    
    def tree_as_edges(self,tree_root):
        """
        Returns a list of hyperedges from a *binary* Constree
        Supposes that leaves a indexed by a field 'idx'
        @param tree_root: a constree
        @return : a list of hyperedges
        @see ConsTree.index_leaves(...) 
        """
        assert(len(tree_root.children) <= 2)
        if len(tree_root.children) == 1: #unary edges
            idx = tree_root.get_child().idx
            jdx = idx + 1
            return [((idx,jdx,self.nonterminals_code[tree_root.label]),idx)]

        elif len(tree_root.children) == 2:#binary edges
            left_edges = self.tree_as_edges(tree_root.children[0])
            right_edges = self.tree_as_edges(tree_root.children[1])
            i,k,lblA = left_edges[0][0]
            k,j,lblB = right_edges[0][0]
            result = [((i,j,self.nonterminals_code[tree_root.label]),(i,k,lblA),(k,j,lblB))]
            result.extend(left_edges)
            result.extend(right_edges)
            return result

    def test(self,treebank):
        """
        Tests a model against a treebank
        and returns the average F-score.
        """
        Fscore = []
        for ref_tree in treebank:
            xinput = ViterbiCKY.ngram_tokens(ref_tree.tokens())
            pred_tree = self.parse_one(xinput)
            print(pred_tree)
            P,R,F = ref_tree.compare(pred_tree)
            Fscore.append(F)
        return sum(Fscore)/len(Fscore)

    
    def train(self,treebank,step_size=1.0,max_epochs=100,left_markov=True):
        """
        Trains the parser with a structured perceptron
        @param: treebank a list of ConsTrees
        @param: left_markov :uses left markovization or right markovization 
        """
        
        self.transform(treebank,left_markov) #binarizes the treebank
        #makes a (x,y) pattern for the data set 
        dataset = list( [(ViterbiCKY.ngram_tokens(tree.tokens()),self.tree_as_edges(tree)) for tree in treebank] )
        
        N = len(dataset)
        
        for e in range(max_epochs):
            loss = 0.0
            for sentence,ref_edges in dataset:
                pred_edges = set(self.parse_one(sentence,edges=True))
                ref_edges  = set(ref_edges) 
                if pred_edges != ref_edges: #update
                    loss += 1.0
                    delta_ref = SparseWeightVector()
                    for r_edge in ref_edges:
                        if len(r_edge) == 3:
                            (i,j,Nroot),(i,k,Nl),(k,j,Nr) = r_edge
                            x_repr = self.__make_edge_representation(i,j,k,sentence)
                            delta_ref += SparseWeightVector.code_phi(x_repr,(Nroot,Nl,Nr))
                        elif len(r_edge) == 2:
                            (i,j,pos),widx = r_edge
                            x_repr = self.__make_unary_representation(widx,sentence)
                            delta_ref += SparseWeightVector.code_phi(x_repr,pos)

                    delta_pred = SparseWeightVector()
                    for p_edge in pred_edges:
                        if len(p_edge) == 3:
                            (i,j,Nroot),(i,k,Nl),(k,j,Nr) = p_edge
                            x_repr = self.__make_edge_representation(i,j,k,sentence)
                            delta_pred += SparseWeightVector.code_phi(x_repr,(Nroot,Nl,Nr))
                        elif len(p_edge) == 2:
                            (i,j,pos),widx = p_edge
                            x_repr = self.__make_unary_representation(widx,sentence)
                            delta_pred += SparseWeightVector.code_phi(x_repr,pos)

                    self.model += step_size * (delta_ref-delta_pred)
            print('Loss = ',loss, "%Local accurracy = ",(N-loss)/N)
            if loss == 0.0:
                return
Пример #21
0
class StructuredPerceptron:

    def __init__(self):
        self.model      = SparseWeightVector()
        self.Y          = []    #classes
        self.source_tag = "@@@"

    def tag(self,sentence):
        """
        Viterbi + backtrace
        """
        N = len(sentence)
        K = len(self.Y)
        
        viterbi = np.zeros((N,K))
        history = np.zeros((N,K))
        
        #init
        for j in range(K):
            viterbi[0,j] = self.score(self.source_tag,self.Y[j],sentence[0])

        #Recurrence
        for i in range(1,N):
            for j in range(K):
                smax,amax = -inf,-inf
                for pred in range(K):
                    score =  viterbi[i-1,pred] + self.score(self.Y[pred],self.Y[j],sentence[i])
                    if score > smax:
                        smax,amax = score,pred
                viterbi[i,j],history[i,j] = smax,amax
    
        #End state
        smax,amax = -inf,-inf
        for pred in range(K):
            score = viterbi[N-1,pred]
            if score > smax:
                smax,amax = score,pred
                
        #Backtrace
        rev_tag_sequence = [] 
        for i in range(N-1,-1,-1):
            rev_tag_sequence.append(self.Y[ amax ])
            amax = int(history[i,amax])
            
        return list(reversed(rev_tag_sequence))

    def score(self,y_pred,y,word_repr):
        """
        Scores a structured perceptron clique
        @param y_pred : prev tag
        @param y  : current tag
        @word_repr : a word data representation (a list of hashable symbols)
        @return a real value
        """
        return self.model.dot(word_repr,(y_pred,y))

    
    def train(self,dataset,step_size=1.0,max_epochs=100):
        """
        @param dataset: a list of couples (y_tags,x_words)
        """
        self.Y = list(set([y for (ytags,xwords) in dataset for y in ytags]))

        N = len(dataset)
        for e in range(max_epochs):
            
            loss = 0.0
            for ytags,xwords in dataset:

                ypreds = self.tag(xwords)
                
                if ypreds != ytags:
                    loss += 1.0 

                    ytags_bigrams = list(zip([self.source_tag]+ytags,ytags))
                    ypreds_bigrams= list(zip([self.source_tag]+ypreds,ypreds))

                    delta_pred = SparseWeightVector()
                    for y,x in zip(ypreds_bigrams,xwords):
                        delta_pred += SparseWeightVector.code_phi(x,y)

                    delta_ref = SparseWeightVector()
                    for y,x in zip(ytags_bigrams,xwords):
                        delta_ref += SparseWeightVector.code_phi(x,y)
                    
                    self.model += step_size*(delta_ref-delta_pred)
                                    
            print('Loss = ',loss, "Sequence accurracy = ",(N-loss)/N)
            if loss == 0:
                return
            
    def test(self,dataset):
        N       = 0.0
        correct = 0.0
        for ytags,xwords in dataset:
            N += len(ytags)
            ypreds = self.tag(xwords)
            correct += sum([ref == pred for ref,pred in zip(ytags,ypreds)])
        return correct / N
Пример #22
0
class AvgPerceptron:
    """
    Averaged Perceptron
    """
    def __init__(self):

        self.model = SparseWeightVector()
        self.Y = []  # classes
        self.model_avg = SparseWeightVector()

    def train(self, dataset, dev, step_size=0.1, max_epochs=20):

        self.Y = list(set([y for (y, x) in dataset]))
        dev_accs = []
        train_accs = []
        train_losses = []
        T = 1.
        avg_cumul = SparseWeightVector()
        for e in range(max_epochs):
            loss = 0.0
            random.shuffle(dataset)
            for y, x in dataset:
                ypred = self.tag(x)
                if y != ypred:
                    loss += 1.0
                    delta_ref = SparseWeightVector.code_phi(x, y)
                    delta_pred = SparseWeightVector.code_phi(x, ypred)
                    update = step_size * (delta_ref - delta_pred)
                    self.model += update
                    avg_cumul += T * update
                    T += 1
            # Calculate accuracy
            if len(dataset) != 0:
                acc = (len(dataset) - loss) / len(dataset)
            else:
                acc = (len(dataset) - loss)
            train_accs.append(acc)
            # Stock Loss
            train_losses.append(loss)
            # Update Avg Perceptron
            self.model_avg = self.model - avg_cumul / T
            # Calculate acc for dev corpus
            dev_acc = self.test(dev, True)
            dev_accs.append(dev_acc)
            # Print loss et acc
            print("Epoch = " + str(e) + ", Loss (#errors) = " + str(loss) +
                  ", Accuracy = " + str(acc * 100) + ", Dev acc = " +
                  str(dev_acc * 100))
            # Stop if loss null
            if loss == 0.0:
                return train_losses, train_accs, dev_accs
        return train_losses, train_accs, dev_accs

    def predict(self, dataline, avg=False):
        if avg:
            return list([self.model_avg.dot(dataline, c) for c in self.Y])
        else:
            return list([self.model.dot(dataline, c) for c in self.Y])

    def tag(self, dataline, avg=False):
        scores = self.predict(dataline, avg)
        imax = scores.index(max(scores))
        return self.Y[imax]

    def test(self, dataset, avg=False):

        result = list([(y == self.tag(x, avg)) for y, x in dataset])
        return sum(result) / len(result)
class ConstituentTransitionParser:

    SHIFT = "S"
    REDUCE = "R"
    STOP = "!"

    def __init__(self):
        self.model = SparseWeightVector()
        self.nonterminals = []

    def static_oracle(self, stack, buffer, ref_triples):
        """
        Returns the action to do given a configuration and a ref parse tree
        @param ref_triples : the triples from the reference tree
        @param stack: the config stack
        @param buffer: a list of integers
        @return a couple (parse action, action param)
        """
        if len(stack) >= 2:
            (i, k, X1), (k, j, X2) = stack[-2], stack[-1]
            for X in self.nonterminals:
                if (i, j, X) in ref_triples:
                    return (ConstituentTransitionParser.REDUCE, X)
        if buffer:
            idx = buffer[0]
            for tag in self.nonterminals:
                if (idx, idx + 1, tag) in ref_triples:
                    return (ConstituentTransitionParser.SHIFT, tag)
        return (ConstituentTransitionParser.STOP,
                ConstituentTransitionParser.STOP)

    def reference_derivation(self, ref_tree):
        """
        Returns a reference derivation given a reference tree
        @param ref_tree: a ConsTree
        """
        ref_tree.index_leaves()
        ref_triples = set(ref_tree.triples())
        sentence = ref_tree.tokens()
        N = len(sentence)

        action = (None, None)
        c = (tuple(), tuple(range(N)), 0.0)
        derivation = [(action, c)]

        for t in range(2 * N):  #because 2N-1+terminate
            S, B, score = c
            action, param = self.static_oracle(S, B, ref_triples)
            if action == ConstituentTransitionParser.REDUCE:
                c = self.reduce(c, param, sentence)
            elif action == ConstituentTransitionParser.SHIFT:
                c = self.shift(c, param, sentence)
            else:
                c = self.terminate(c, sentence)
            derivation.append(((action, param), c))
        return derivation

    def build_tree(self, derivation, sentence):
        """
        Builds a ConsTree from a parse derivation
        @param derivation: a parse derivation
        @param sentence: a list of tokens
        @return a ConsTree
        """
        tree_stack = []
        for (action, param), C in derivation:
            S, B, score = C
            if action == ConstituentTransitionParser.SHIFT:
                i, j, lbl = S[-1]
                tag_node = ConsTree(param)
                leaf_node = ConsTree(sentence[i])
                tag_node.add_child(leaf_node)
                tree_stack.append(tag_node)
            elif action == ConstituentTransitionParser.REDUCE:
                root_node = ConsTree(param)
                rnode = tree_stack.pop()
                lnode = tree_stack.pop()
                root_node.children = [lnode, rnode]
                tree_stack.append(root_node)
        return tree_stack[-1]

    def reduce(self, C, param, sentence):
        """
        Performs a reduction from the current configuration and returns the result
        @param S: a stack
        @param B: a buffer
        @param param: the category for reduction
        @return a configuration
        """
        S, B, score = C
        i, k, _ = S[-2]
        k, j, _ = S[-1]
        return (
            S[:-2] + ((i, j, param), ), B, score +
            self.score(C,
                       (ConstituentTransitionParser.REDUCE, param), sentence))

    def shift(self, C, param, sentence):
        """
        Performs a reduction from the current configuration and returns the result
        @param S: a stack
        @param B: a buffer
        @param param: the category for reduction
        @return a configuration
        """
        S, B, score = C
        idx = S[-1][1] if S else 0
        return (
            S + ((idx, idx + 1, param), ), B[1:], score +
            self.score(C,
                       (ConstituentTransitionParser.SHIFT, param), sentence))

    def terminate(self, C, sentence):
        """
        Performs a stop action returns the result
        """
        S, B, score = C
        return (S, B, score +
                self.score(C, (ConstituentTransitionParser.STOP,
                               ConstituentTransitionParser.STOP), sentence))

    def score(self, configuration, action, tokens):
        """
        Computes the prefix score of a derivation
        @param configuration : a triple (S,B,score)
        @param action: an action label 
        @param tokens: the x-sequence of tokens to be parsed
        @return a prefix score
        """
        S, B, old_score = configuration
        config_repr = self.__make_config_representation(S, B, tokens)
        return old_score + self.model.dot(config_repr, action)

    def __make_config_representation(self, S, B, tokens):
        """
        This gathers the information for coding the configuration as a feature vector.
        @param S: a configuration stack
        @param B  a configuration buffer
        @return an ordered list of tuples 
        """
        #default values for inaccessible positions
        s0cat, s1cat, s0l, s0r, s1l, s1r, b0, b1, b2 = "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_"

        if len(S) > 0:
            i, j, lbl = S[-1]
            s0l, s0r, s0cat = tokens[i], tokens[j - 1], lbl
        if len(S) > 1:
            i, j, lbl = S[-2]
            s1l, s1r, s1cat = tokens[i], tokens[j - 1], lbl
        if len(B) > 0:
            b0 = tokens[B[0]]
        if len(B) > 1:
            b1 = tokens[B[1]]
        if len(B) > 2:
            b2 = tokens[B[2]]

        wordlist = [s0l, s0r, s1l, s1r, b0, b1, b2]
        catlist = [s0cat, s1cat, b0]
        word_bigrams = list(zip(wordlist, wordlist[1:]))
        word_trigrams = list(zip(wordlist, wordlist[1:], wordlist[2:]))
        cat_bigrams = list(zip(catlist, catlist[1:]))

        return word_bigrams + word_trigrams + cat_bigrams

    def transform(self, dataset, left_markov=True):
        """
        In place (destructive) conversion of a treebank to Chomsky Normal Form.
        Builds the list of the parser nonterminals as a side effect
        and indexes references trees.
         
        @param dataset a list of ConsTrees
        @param left_markov: if true -> left markovization else right markovization
        """
        all_nonterminals = set()
        for tree in dataset:
            tree.close_unaries()
            if left_markov:
                tree.left_markovize()
            else:
                tree.right_markovize()
            all_nonterminals.update(tree.collect_nonterminals())
        self.nonterminals = list(all_nonterminals)

    def parse_one(self,
                  sentence,
                  beam_size=4,
                  get_beam=False,
                  deriv=False,
                  untransform=True):
        """
        @param sentence: a list of strings
        @param beam_size: size of the beam
        @param get_beam : returns the beam instead of tree like structures
        @param deriv: returns the derivation instead of the parse tree
        @param untransform: bool if true unbinarizes the resulting tree.
        """

        actions = [ConstituentTransitionParser.SHIFT,\
                   ConstituentTransitionParser.REDUCE,\
                   ConstituentTransitionParser.STOP]
        all_actions = list([(a, p) for a in actions
                            for p in self.nonterminals])

        N = len(sentence)
        init = (tuple(), tuple(range(N)), 0.0
                )  #A config is a hashable triple with score
        current_beam = [(-1, (None, None), init)]
        beam = [current_beam]

        for i in range(2 * N):  #because 2*N-1+terminate
            next_beam = []
            for idx, (_, action, config) in enumerate(current_beam):
                S, B, score = config
                for (a, p) in all_actions:
                    if a == ConstituentTransitionParser.SHIFT:
                        if B:
                            newconfig = self.shift(config, p, sentence)
                            next_beam.append((idx, (a, p), newconfig))
                    elif a == ConstituentTransitionParser.REDUCE:
                        if len(S) >= 2:
                            newconfig = self.reduce(config, p, sentence)
                            next_beam.append((idx, (a, p), newconfig))
                    elif a == ConstituentTransitionParser.STOP:
                        if len(S) < 2 and not B:
                            newconfig = self.terminate(config, sentence)
                            next_beam.append((idx, (a, a), newconfig))
            next_beam.sort(key=lambda x: x[2][2], reverse=True)
            next_beam = next_beam[:beam_size]
            beam.append(next_beam)
            current_beam = next_beam

        if get_beam:
            return beam
        else:
            #Backtrace for derivation
            idx = 1
            prev_jdx = 0
            derivation = []
            while prev_jdx != -1:
                current = beam[-idx][prev_jdx]
                prev_jdx, prev_action, C = current
                derivation.append((prev_action, C))
                idx += 1
            derivation.reverse()
            if deriv:
                return derivation
            else:
                result = self.build_tree(derivation, sentence)
                if untransform:
                    result.unbinarize()
                    result.expand_unaries()
                return result

    def early_prefix(self, ref_parse, beam):
        """
        Finds the prefix for early update, that is the prefix where the ref parse fall off the beam.
        @param ref_parse: a parse derivation
        @param beam: a beam output by the parse_one function
        @return (bool, ref parse prefix, best in beam prefix)
                the bool is True if update required false otherwise
        """
        idx = 0
        for (actionR, configR), (beamCol) in zip(ref_parse, beam):
            found = False
            for source_idx, action, configTarget in beamCol:
                if action == actionR and configTarget[:-1] == configR[:-1]:  #-1 -> does not test score equality
                    found = True
                    break
            if not found:
                #backtrace
                jdx = idx
                source_idx = 0
                early_prefix = []
                while jdx >= 0:
                    new_source_idx, action, config = beam[jdx][source_idx]
                    early_prefix.append((action, config))
                    source_idx = new_source_idx
                    jdx -= 1
                early_prefix.reverse()
                return (True, ref_parse[:idx + 1], early_prefix)
            idx += 1
        #if no error found check that the best in beam is the ref parse
        last_ref_action, last_ref_config = ref_parse[-1]
        _, last_pred_action, last_pred_config = beam[-1][0]
        if last_pred_config[:-1] == last_ref_config[:-1]:
            return (False, None, None)  #returns a no update message
        else:  #backtrace
            jdx = len(beam) - 1
            source_idx = 0
            early_prefix = []
            while jdx >= 0:
                new_source_idx, action, config = beam[jdx][source_idx]
                early_prefix.append((action, config))
                source_idx = new_source_idx
                jdx -= 1
            early_prefix.reverse()
            return (True, ref_parse, early_prefix)

    def test(self, treebank, beam_size=4):
        """         
        @param treebank a list of ConsTrees
        @param left_markov: if true -> left markovization else right markovization
        @return the avg f-score
        """
        Fscores = []
        for tree in treebank:
            result = self.parse_one(tree.tokens(), beam_size)
            print(result)
            P, R, F = tree.compare(result)
            Fscores.append(F)
        return sum(Fscores) / len(Fscores)

    def train(self,
              treebank,
              step_size=1.0,
              max_epochs=100,
              beam_size=4,
              left_markov=True):
        """         
        @param treebank a list of ConsTrees
        @param left_markov: if true -> left markovization else right markovization
        """
        self.transform(treebank, left_markov)
        dataset = list([(tree.tokens(), self.reference_derivation(tree))
                        for tree in treebank])
        N = len(dataset)
        for e in range(max_epochs):
            loss = 0.0
            for sentence, ref_derivation in dataset:
                pred_beam = (self.parse_one(sentence, get_beam=True))
                (update, ref_prefix,
                 pred_prefix) = self.early_prefix(ref_derivation, pred_beam)
                if update:
                    loss += 1.0
                    delta_ref = SparseWeightVector()
                    current_config = ref_prefix[0][1]
                    for action, config in ref_prefix[1:]:
                        S, B, score = current_config
                        x_repr = self.__make_config_representation(
                            S, B, sentence)
                        delta_ref += SparseWeightVector.code_phi(
                            x_repr, action)
                        current_config = config

                    delta_pred = SparseWeightVector()
                    current_config = pred_prefix[0][1]
                    for action, config in pred_prefix[1:]:
                        S, B, score = current_config
                        x_repr = self.__make_config_representation(
                            S, B, sentence)
                        delta_pred += SparseWeightVector.code_phi(
                            x_repr, action)
                        current_config = config

                    self.model += step_size * (delta_ref - delta_pred)

            print('Loss = ', loss, "%Exact match = ", (N - loss) / N)
            if loss == 0.0:
                return
Пример #24
0
class LinearChainCRF:
    def __init__(self):
        self.model = SparseWeightVector()
        self.Y = []  #classes
        self.source_tag = "@@@"

    def tag(self, sentence):
        """
        Viterbi + backtrace
        """
        N = len(sentence)
        K = len(self.Y)

        viterbi = np.zeros((N, K))
        history = np.zeros((N, K))

        #init
        for j in range(K):
            viterbi[0, j] = self.score(self.source_tag, self.Y[j], sentence[0])

        #Recurrence
        for i in range(1, N):
            for j in range(K):
                smax, amax = 0, 0
                for pred in range(K):
                    score = viterbi[i - 1, pred] * self.score(
                        self.Y[pred], self.Y[j], sentence[i])
                    if score > smax:
                        smax, amax = score, pred
                viterbi[i, j], history[i, j] = smax, amax

        #End state
        smax, amax = 0, 0
        for pred in range(K):
            score = viterbi[N - 1, pred]
            if score > smax:
                smax, amax = score, pred

        #Backtrace
        rev_tag_sequence = []
        for i in range(N - 1, -1, -1):
            rev_tag_sequence.append(self.Y[amax])
            amax = int(history[i, amax])

        return list(reversed(rev_tag_sequence))

    def sequence_score(self, ytags, xwords):
        """
        Returns the unnormalized exp(dot product) score of a tag
        sequence given words and model parameters.
        @param ytags  : a tag sequence
        @param xwords:  a sequence of word representations
        """
        ytags_bigrams = list(zip([self.source_tag] + ytags, ytags))
        score = 1
        for x, y in zip(xwords, ytags_bigrams):
            score *= self.score(y[0], y[1], x)
        return score

    def score(self, y_pred, y, word_repr):
        """
        Scores a CRF clique (psi value for y-1,y,x)
        @param y_pred : prev tag
        @param y  : current tag
        @word_repr : a word data representation (a list of hashable symbols)
        @return a psi (potential) positive value
        """
        return exp(self.model.dot(word_repr, (y_pred, y)))

    def forward(self, sentence):
        """
        @param sentence: a list of xwords
        @return a forward matrix and Z (norm constant)
        """
        N = len(sentence)
        K = len(self.Y)
        forward = np.zeros((N, K))
        #init
        for j in range(K):
            forward[0, j] = self.score(self.source_tag, self.Y[j], sentence[0])
        #recurrence
        for i in range(1, N):
            for j in range(K):
                for pred in range(K):
                    forward[i, j] += forward[i - 1, pred] * self.score(
                        self.Y[pred], self.Y[j], sentence[i])
        return (forward, forward[N - 1, :].sum())

    def backward(self, sentence):
        """
        @param sentence: a list of xwords
        @return a backward matrix and Z (norm constant)
        """
        N = len(sentence)
        K = len(self.Y)

        backward = np.zeros((N, K))
        backward[N - 1, :] = 1.0

        #recurrence
        for i in range(N - 2, -1, -1):
            for j in range(K):
                for succ in range(K):
                    backward[i, j] += backward[i + 1, succ] * self.score(
                        self.Y[j], self.Y[succ], sentence[i + 1])
        Z = sum([
            self.score(self.source_tag, self.Y[succ], sentence[0]) *
            backward[0, succ] for succ in range(K)
        ])
        return (backward, Z)

    def train(self, dataset, step_size=0.1, max_epochs=100):
        """
        @param dataset: a list of couples (y_tags,x_words)
        """
        self.Y = list(set([y for (ytags, xwords) in dataset for y in ytags]))

        #pre-computes delta_ref (first term of the gradient is constant)
        delta_ref = SparseWeightVector()
        for ytags, xwords in dataset:
            ytags_bigrams = list(zip([self.source_tag] + ytags, ytags))
            for x, y in zip(xwords, ytags_bigrams):
                delta_ref += SparseWeightVector.code_phi(x, y)

        for e in range(max_epochs):

            loss = 0.0
            delta_pred = SparseWeightVector()

            for ytags, xwords in dataset:
                N = len(xwords)
                K = len(self.Y)
                alphas, Z = self.forward(xwords)
                betas, _ = self.backward(xwords)

                #forward-backward at init
                for ytag in range(K):
                    prob = (self.score(self.source_tag, self.Y[ytag],
                                       xwords[0]) * betas[0, ytag]) / Z
                    delta_pred += prob * SparseWeightVector.code_phi(
                        xwords[0], (self.source_tag, self.Y[ytag]))
                #forward-backward loop
                for i in range(1, N):
                    for yprev in range(K):
                        for ytag in range(K):
                            prob = (alphas[i - 1, yprev] * self.score(
                                self.Y[yprev], self.Y[ytag], xwords[i]) *
                                    betas[i, ytag]) / Z
                            delta_pred += prob * SparseWeightVector.code_phi(
                                xwords[i], (self.Y[yprev], self.Y[ytag]))

                loss += log(self.sequence_score(ytags, xwords) / Z)

            print('Log likelihood(D) = ', loss)
            self.model += step_size * (delta_ref - delta_pred)

    def test(self, dataset):
        N = 0.0
        correct = 0.0

        for ytags, xwords in dataset:
            N += len(ytags)
            ypreds = self.tag(xwords)
            correct += sum([ref == pred for ref, pred in zip(ytags, ypreds)])
        return correct / N
 def __init__(self):
     self.model = SparseWeightVector()
     self.nonterminals = []
Пример #26
0
class ArcStandardTransitionParser:

    #actions
    LEFTARC = "L"
    RIGHTARC = "R"
    SHIFT = "S"
    TERMINATE = "T"

    def __init__(self):
        self.model = SparseWeightVector()

    @staticmethod
    def static_oracle(configuration, reference_arcs, N):
        """
        @param configuration: a parser configuration
        @param reference arcs: a set of dependency arcs
        @param N: the length of the input sequence
        """
        S, B, A, score = configuration
        all_words = range(N)
        if len(S) >= 2:
            i, j = S[-2], S[-1]
            if j != 0 and (i, j) in reference_arcs and all(
                [(j, k) in A for k in all_words if (j, k) in reference_arcs]):
                return ArcStandardTransitionParser.RIGHTARC
            elif i != 0 and (j, i) in reference_arcs and all(
                [(i, k) in A for k in all_words if (i, k) in reference_arcs]):
                return ArcStandardTransitionParser.LEFTARC
        if B:
            return ArcStandardTransitionParser.SHIFT
        return ArcStandardTransitionParser.TERMINATE

    def oracle_derivation(self, ref_parse):
        """
        This generates an oracle reference derivation from a sentence
        @param ref_parse: a DependencyTree object
        @return : the oracle derivation
        """
        sentence = ref_parse.tokens
        edges = set(ref_parse.edges)
        N = len(sentence)

        C = (tuple(), tuple(range(len(sentence))), tuple(), 0.0
             )  #A config is a hashable quadruple with score
        action = None
        derivation = [(action, C)]

        while action != ArcStandardTransitionParser.TERMINATE:

            action = ArcStandardTransitionParser.static_oracle(C, edges, N)

            if action == ArcStandardTransitionParser.SHIFT:
                C = self.shift(C, sentence)
            elif action == ArcStandardTransitionParser.LEFTARC:
                C = self.leftarc(C, sentence)
            elif action == ArcStandardTransitionParser.RIGHTARC:
                C = self.rightarc(C, sentence)
            elif action == ArcStandardTransitionParser.TERMINATE:
                C = self.terminate(C, sentence)

            derivation.append((action, C))

        return derivation

    def shift(self, configuration, tokens):
        """
        Performs the shift action and returns a new configuration
        """
        S, B, A, score = configuration
        w0 = B[0]
        return (S + (w0, ), B[1:], A, score + self.score(
            configuration, ArcStandardTransitionParser.SHIFT, tokens))

    def leftarc(self, configuration, tokens):
        """
        Performs the left arc action and returns a new configuration
        """
        S, B, A, score = configuration
        i, j = S[-2], S[-1]
        return (S[:-2] + (j, ), B, A + ((j, i), ), score + self.score(
            configuration, ArcStandardTransitionParser.LEFTARC, tokens))

    def rightarc(self, configuration, tokens):
        S, B, A, score = configuration
        i, j = S[-2], S[-1]
        return (S[:-1], B, A + ((i, j), ), score + self.score(
            configuration, ArcStandardTransitionParser.RIGHTARC, tokens))

    def terminate(self, configuration, tokens):
        S, B, A, score = configuration
        return (S, B, A, score + self.score(
            configuration, ArcStandardTransitionParser.TERMINATE, tokens))

    def parse_one(self, sentence, beam_size=4, get_beam=False):

        actions = [ArcStandardTransitionParser.LEFTARC,\
                   ArcStandardTransitionParser.RIGHTARC,\
                   ArcStandardTransitionParser.SHIFT,\
                   ArcStandardTransitionParser.TERMINATE]

        N = len(sentence)
        init = (tuple(), tuple(range(N)), tuple(), 0.0
                )  #A config is a hashable quadruple with score
        current_beam = [(-1, None, init)]
        beam = [current_beam]

        for i in range(2 * N):  #because 2N-1+terminate
            next_beam = []
            for idx, (_, action, config) in enumerate(current_beam):
                S, B, A, score = config
                for a in actions:
                    if a == ArcStandardTransitionParser.SHIFT:
                        if B:
                            newconfig = self.shift(config, sentence)
                            next_beam.append((idx, a, newconfig))
                    elif a == ArcStandardTransitionParser.LEFTARC:
                        if len(S) >= 2 and S[
                                -2] != 0:  #a word cannot dominate the dummy root
                            newconfig = self.leftarc(config, sentence)
                            next_beam.append((idx, a, newconfig))
                    elif a == ArcStandardTransitionParser.RIGHTARC:
                        if len(S) >= 2:
                            newconfig = self.rightarc(config, sentence)
                            next_beam.append((idx, a, newconfig))
                    elif a == ArcStandardTransitionParser.TERMINATE:
                        if len(S) < 2 and not B:
                            newconfig = self.terminate(config, sentence)
                            next_beam.append((idx, a, newconfig))
            next_beam.sort(key=lambda x: x[2][3], reverse=True)
            next_beam = next_beam[:beam_size]
            beam.append(next_beam)
            current_beam = next_beam

        if get_beam:
            return beam
        else:
            succ = beam[-1][0][
                2]  #success in last beam, top position, newconfig
            print(beam[-1][0][1], succ)
            return DependencyTree(tokens=sentence, edges=succ[2])

    def early_prefix(self, ref_parse, beam):
        """
        Finds the prefix for early update, that is the prefix where the ref parse fall off the beam.
        @param ref_parse: a parse derivation
        @param beam: a beam output by the parse_one function
        @return (bool, ref parse prefix, best in beam prefix)
                the bool is True if update required false otherwise
        """
        idx = 0
        for (actionR, configR), (beamCol) in zip(ref_parse, beam):
            found = False
            #print("seeking",configR, "at index",idx)
            for source_idx, action, configTarget in beamCol:
                #print("  ",configTarget)
                if action == actionR and configTarget[:-1] == configR[:-1]:  #-1 -> does not test score equality
                    found = True
                    #print("   => found")
                    break
            if not found:
                #print("   => not found")
                #backtrace
                jdx = idx
                source_idx = 0
                early_prefix = []
                while jdx >= 0:
                    new_source_idx, action, config = beam[jdx][source_idx]
                    early_prefix.append((action, config))
                    source_idx = new_source_idx
                    jdx -= 1
                early_prefix.reverse()
                return (True, ref_parse[:idx + 1], early_prefix)
            idx += 1
        #if no error found check that the best in beam is the ref parse
        last_ref_action, last_ref_config = ref_parse[-1]
        _, last_pred_action, last_pred_config = beam[-1][0]
        if last_pred_config[:-1] == last_ref_config[:-1]:
            return (False, None, None)  #returns a no update message
        else:  #backtrace
            jdx = len(beam) - 1
            source_idx = 0
            early_prefix = []
            while jdx >= 0:
                new_source_idx, action, config = beam[jdx][source_idx]
                early_prefix.append((action, config))
                source_idx = new_source_idx
                jdx -= 1
            early_prefix.reverse()
            return (True, ref_parse, early_prefix)

    def score(self, configuration, action, tokens):
        """
        Computes the prefix score of a derivation
        @param configuration : a quintuple (S,B,A,score,history)
        @param action: an action label in {LEFTARC,RIGHTARC,TERMINATE,SHIFT}
        @param tokens: the x-sequence of tokens to be parsed
        @return a prefix score
        """
        S, B, A, old_score = configuration
        config_repr = self.__make_config_representation(S, B, tokens)
        return old_score + self.model.dot(config_repr, action)

    def __make_config_representation(self, S, B, tokens):
        """
        This gathers the information for coding the configuration as a feature vector.
        @param S: a configuration stack
        @param B  a configuration buffer
        @return an ordered list of tuples 
        """
        #default values for inaccessible positions
        s0w, s1w, s0t, s1t, b0w, b1w, b0t, b1t = "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_"

        if len(S) > 0:
            s0w, s0t = tokens[S[-1]][0], tokens[S[-1]][1]
        if len(S) > 1:
            s1w, s1t = tokens[S[-2]][0], tokens[S[-2]][1]
        if len(B) > 0:
            b0w, b0t = tokens[B[0]][0], tokens[B[0]][1]
        if len(B) > 1:
            b1w, b1t = tokens[B[1]][0], tokens[B[1]][1]

        wordlist = [s0w, s1w, b0w, b1w]
        taglist = [s0t, s1t, b0t, b1t]
        word_bigrams = list(zip(wordlist, wordlist[1:]))
        tag_bigrams = list(zip(taglist, taglist[1:]))
        word_trigrams = list(zip(wordlist, wordlist[1:], wordlist[2:]))
        tag_trigrams = list(zip(taglist, taglist[1:], taglist[2:]))
        return word_bigrams + tag_bigrams + word_trigrams + tag_trigrams

    def test(self, dataset, beam_size=4):
        """
        @param dataset: a list of DependencyTrees
        @param beam_size: size of the beam
        """
        N = len(dataset)
        sum_acc = 0.0
        for ref_tree in dataset:
            tokens = ref_tree.tokens
            pred_tree = self.parse_one(tokens, beam_size)
            print(pred_tree)
            print()
            sum_acc += ref_tree.accurracy(pred_tree)
        return sum_acc / N

    def train(self, dataset, step_size=1.0, max_epochs=100, beam_size=4):
        """
        @param dataset : a list of dependency trees
        """
        N = len(dataset)
        sequences = list([(dtree.tokens, self.oracle_derivation(dtree))
                          for dtree in dataset])

        for e in range(max_epochs):
            loss = 0.0
            for tokens, ref_derivation in sequences:
                pred_beam = self.parse_one(tokens, beam_size, get_beam=True)
                (update, ref_prefix,
                 pred_prefix) = self.early_prefix(ref_derivation, pred_beam)
                #print('R',ref_derivation)
                #print('P',pred_prefix)
                #self.test(dataset,beam_size)

                if update:
                    #print (pred_prefix)
                    loss += 1.0
                    delta_ref = SparseWeightVector()
                    current_config = ref_prefix[0][1]
                    for action, config in ref_prefix:
                        S, B, A, score = current_config
                        x_repr = self.__make_config_representation(
                            S, B, tokens)
                        delta_ref += SparseWeightVector.code_phi(
                            x_repr, action)
                        current_config = config

                    delta_pred = SparseWeightVector()
                    current_config = pred_prefix[0][1]
                    for action, config in pred_prefix:
                        S, B, A, score = current_config
                        x_repr = self.__make_config_representation(
                            S, B, tokens)
                        delta_pred += SparseWeightVector.code_phi(
                            x_repr, action)
                        current_config = config

                    self.model += step_size * (delta_ref - delta_pred)
            print('Loss = ', loss, "%Exact match = ", (N - loss) / N)
            if loss == 0.0:
                return
Пример #27
0
 def __init__(self):
     self.model = SparseWeightVector()
     self.Y = []  #classes
     self.source_tag = "@@@"
Пример #28
0
class ArcEagerTransitionParser:

    #actions
    LEFTARC = "LA"
    RIGHTARC = "RA"
    SHIFT = "S"
    REDUCE = "R"
    TERMINATE = "T"

    def __init__(self):
        self.model = SparseWeightVector()

    @staticmethod
    def static_oracle(configuration, reference_arcs, N):
        """
        @param configuration: a parser configuration
        @param reference arcs: a set of dependency arcs
        @param N: the length of the input sequence
        @return the action to execute given config and reference arcs
        """
        S, B, A, score = configuration
        all_words = range(N)

        if S and B:
            i, j = S[-1], B[0]
            if i != 0 and (j, i) in reference_arcs:
                return ArcEagerTransitionParser.LEFTARC
            if (i, j) in reference_arcs:
                return ArcEagerTransitionParser.RIGHTARC
        if S and any([(k,S[-1]) in A for k in all_words])\
            and all ([(S[-1],k) in A for k in all_words if (S[-1],k) in reference_arcs]):
            return ArcEagerTransitionParser.REDUCE
        if B:
            return ArcEagerTransitionParser.SHIFT
        return ArcEagerTransitionParser.TERMINATE

    @staticmethod
    def dynamic_oracle(configuration, action, reference_arcs):
        """
        Computes the cost of an action given a configuration and a reference tree
        @param configuration: a parser configuration tuple
        @param reference_arcs a set of dependencies
        @return a bool set to true if cost = 0 , false otherwise (cost > 0 or impossible action)
        """
        S, B, A, score = configuration
        if S and B:
            i, j = S[-1], B[0]
            if action == ArcEagerTransitionParser.LEFTARC:
                if any([(k, i) in reference_arcs for k in B[1:]]):
                    return False
                if any([(i, k) in reference_arcs for k in B]):
                    return False
                return True
            elif action == ArcEagerTransitionParser.RIGHTARC:
                if any([(k, j) in reference_arcs for k in B]):
                    return False
                if any([(k, j) in reference_arcs for k in S[:-1]]):
                    return False
                if any([(j, k) in reference_arcs for k in S]):
                    return False
                return True
        if S:
            if action == ArcEagerTransitionParser.REDUCE:
                if any([(i, k) in reference_arcs for k in B]):
                    return False
                return True

        if B:
            if action == ArcEagerTransitionParser.SHIFT:
                if any([(j, k) in reference_arcs or (k, j) in reference_arcs
                        for k in S]):
                    return False
                return True
        if not B and action == ArcEagerTransitionParser.TERMINATE:
            return True

        return False

    def static_oracle_derivation(self, ref_parse):
        """
        This generates a static oracle reference derivation from a sentence
        @param ref_parse: a DependencyTree object
        @return : the oracle derivation as a list of (Configuration,action,toklist) triples
        """
        sentence = ref_parse.tokens
        edges = set(ref_parse.edges)
        N = len(sentence)

        C = ((0, ), tuple(range(1, len(sentence))), tuple(), 0.0
             )  #A config is a hashable quadruple with score
        action = ArcEagerTransitionParser.static_oracle(C, edges, N)
        derivation = [(C, action, sentence)]

        while C[1] and action != ArcEagerTransitionParser.TERMINATE:
            #print(C,action)
            if action == ArcEagerTransitionParser.SHIFT:
                C = self.shift(C, sentence)
            elif action == ArcEagerTransitionParser.LEFTARC:
                C = self.leftarc(C, sentence)
            elif action == ArcEagerTransitionParser.RIGHTARC:
                C = self.rightarc(C, sentence)
            elif action == ArcEagerTransitionParser.REDUCE:
                C = self.reduce_config(C, sentence)
            elif action == ArcEagerTransitionParser.TERMINATE:
                C = self.terminate(C, sentence)

            action = ArcEagerTransitionParser.static_oracle(C, edges, N)
            derivation.append((C, action, sentence))

        return derivation

    def shift(self, configuration, tokens):
        """
        Performs the shift action and returns a new configuration
        """
        S, B, A, score = configuration
        w0 = B[0]
        return (
            S + (w0, ), B[1:], A, score +
            self.score(configuration, ArcEagerTransitionParser.SHIFT, tokens))

    def leftarc(self, configuration, tokens):
        """
        Performs the left arc action and returns a new configuration
        """
        S, B, A, score = configuration
        i, j = S[-1], B[0]
        return (S[:-1], B, A + ((j, i), ), score + self.score(
            configuration, ArcEagerTransitionParser.LEFTARC, tokens))

    def rightarc(self, configuration, tokens):
        S, B, A, score = configuration
        i, j = S[-1], B[0]
        return (S + (j, ), B[1:], A + ((i, j), ), score + self.score(
            configuration, ArcEagerTransitionParser.RIGHTARC, tokens))

    def reduce_config(self, configuration, tokens):
        S, B, A, score = configuration
        return (
            S[:-1], B, A, score +
            self.score(configuration, ArcEagerTransitionParser.REDUCE, tokens))

    def terminate(self, configuration, tokens):
        S, B, A, score = configuration
        return (S, B, A, score + self.score(
            configuration, ArcEagerTransitionParser.TERMINATE, tokens))

    def predict_local(self, configuration, sentence, allowed=None):
        """
        Statistical prediction of an action given a configuration
        @param configuration: a tuple (S,B,A,score)
        @param sentence: a list of tokens
        @param allowed:  a list of allowed actions
        @return (new_config,action_performed)
        """
        action_set = set([ArcEagerTransitionParser.LEFTARC,ArcEagerTransitionParser.RIGHTARC,\
                          ArcEagerTransitionParser.SHIFT,ArcEagerTransitionParser.REDUCE,\
                          ArcEagerTransitionParser.TERMINATE])
        if allowed:
            action_set = set(allowed)

        N = len(sentence)
        S, B, A, score = configuration
        candidates = []
        if B and ArcEagerTransitionParser.SHIFT in action_set:
            candidates.append(
                (self.shift(configuration,
                            sentence), ArcEagerTransitionParser.SHIFT))

        if S and ArcEagerTransitionParser.REDUCE in action_set:
            i = S[-1]
            if any([(k, i) in A for k in range(N)]):
                candidates.append((self.reduce_config(configuration, sentence),
                                   ArcEagerTransitionParser.REDUCE))

        if S and B:
            if ArcEagerTransitionParser.LEFTARC in action_set:
                i = S[-1]
                if i != 0 and not any([(k, i) in A for k in range(N)]):
                    candidates.append((self.leftarc(configuration, sentence),
                                       ArcEagerTransitionParser.LEFTARC))
            if ArcEagerTransitionParser.RIGHTARC in action_set:
                j = B[0]
                if not any([(k, j) in A for k in range(N)]):
                    candidates.append((self.rightarc(configuration, sentence),
                                       ArcEagerTransitionParser.RIGHTARC))

        if not B and ArcEagerTransitionParser.TERMINATE in action_set:
            candidates.append(
                (self.terminate(configuration,
                                sentence), ArcEagerTransitionParser.TERMINATE))

        if candidates:
            candidates.sort(key=lambda x: x[0][3], reverse=True)
            return candidates[0]
        else:  #emergency exit when we have no candidate
            return (C, ArcEagerTransitionParser.TERMINATE)

    def parse_one(self, sentence):
        """
        Greedy parsing
        @param sentence: a list of tokens
        """
        N = len(sentence)
        C = ((0, ), tuple(range(1, N)), tuple(), 0.0
             )  #A config is a hashable quadruple with score
        action = None
        while action != ArcEagerTransitionParser.TERMINATE:
            C, action = self.predict_local(C, sentence)

        #Connects any remaining dummy root to 0
        S, B, A, score = C
        Aset = set(A)
        for s in S:
            if s != 0 and not any([(k, s) in Aset for k in range(N)]):
                Aset.add((0, s))
        return DependencyTree(tokens=sentence, edges=list(Aset))

    def score(self, configuration, action, tokens):
        """
        Computes the prefix score of a derivation
        @param configuration : a quintuple (S,B,A,score,history)
        @param action: an action label in {LEFTARC,RIGHTARC,REDUCE,TERMINATE,SHIFT}
        @param tokens: the x-sequence of tokens to be parsed
        @return a prefix score
        """
        S, B, A, old_score = configuration
        config_repr = self.__make_config_representation(S, B, tokens)
        return old_score + self.model.dot(config_repr, action)

    def __make_config_representation(self, S, B, tokens):
        """
        This gathers the information for coding the configuration as a feature vector.
        @param S: a configuration stack
        @param B  a configuration buffer
        @return an ordered list of tuples 
        """
        #default values for inaccessible positions
        s0w, s1w, s0t, s1t, b0w, b1w, b0t, b1t = "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_", "_UNDEF_"

        if len(S) > 0:
            s0w, s0t = tokens[S[-1]][0], tokens[S[-1]][1]
        if len(S) > 1:
            s1w, s1t = tokens[S[-2]][0], tokens[S[-2]][1]
        if len(B) > 0:
            b0w, b0t = tokens[B[0]][0], tokens[B[0]][1]
        if len(B) > 1:
            b1w, b1t = tokens[B[1]][0], tokens[B[1]][1]

        wordlist = [s0w, s1w, b0w, b1w]
        taglist = [s0t, s1t, b0t, b1t]
        word_bigrams = list(zip(wordlist, wordlist[1:]))
        tag_bigrams = list(zip(taglist, taglist[1:]))
        word_trigrams = list(zip(wordlist, wordlist[1:], wordlist[2:]))
        tag_trigrams = list(zip(taglist, taglist[1:], taglist[2:]))
        return word_bigrams + tag_bigrams + word_trigrams + tag_trigrams

    def test(self, dataset):
        """
        @param dataset: a list of DependencyTrees
        @param beam_size: size of the beam
        """
        N = len(dataset)
        sum_acc = 0.0
        for ref_tree in dataset:
            tokens = ref_tree.tokens
            pred_tree = self.parse_one(tokens)
            print(pred_tree)
            print()
            sum_acc += ref_tree.accurracy(pred_tree)
        return sum_acc / N

    def choose(self, pred_action, optimal_actions):
        """
        Choice function for dynamic oracle. Chooses next action in case of ambiguity.
        (does not perform exploration) 
        """
        if pred_action in optimal_actions:
            return pred_action
        else:
            return optimal_actions[randrange(0, len(optimal_actions))]

    def dynamic_train(self, treebank, step_size=1.0, max_epochs=100):

        ACTIONS = [ArcEagerTransitionParser.LEFTARC,ArcEagerTransitionParser.RIGHTARC,\
                   ArcEagerTransitionParser.SHIFT,ArcEagerTransitionParser.REDUCE,\
                   ArcEagerTransitionParser.TERMINATE]

        N = len(treebank)
        for e in range(max_epochs):
            loss, total = 0, 0
            for dtree in treebank:
                ref_arcs = set(dtree.edges)
                n = len(dtree.tokens)
                C = ((0, ), tuple(range(1, n)), tuple(), 0.0
                     )  #A config is a hashable quadruple with score
                action = None
                while action != ArcEagerTransitionParser.TERMINATE:
                    pred_config, pred_action = self.predict_local(
                        C, dtree.tokens)
                    optimal_actions = list([
                        a for a in ACTIONS
                        if self.dynamic_oracle(C, a, ref_arcs)
                    ])
                    total += 1
                    if pred_action not in optimal_actions:
                        loss += 1
                        optimal_config, optimal_action = self.predict_local(
                            C, dtree.tokens, allowed=optimal_actions)
                        delta_ref = SparseWeightVector()
                        S, B, A, score = C
                        x_repr = self.__make_config_representation(
                            S, B, dtree.tokens)
                        delta_ref += SparseWeightVector.code_phi(
                            x_repr, optimal_action)

                        delta_pred = SparseWeightVector()
                        S, B, A, score = C
                        x_repr = self.__make_config_representation(
                            S, B, dtree.tokens)
                        delta_pred += SparseWeightVector.code_phi(
                            x_repr, pred_action)

                        self.model += step_size * (delta_ref - delta_pred)

                    action = self.choose(pred_action, optimal_actions)

                    if action == ArcEagerTransitionParser.SHIFT:
                        C = self.shift(C, dtree.tokens)
                    elif action == ArcEagerTransitionParser.LEFTARC:
                        C = self.leftarc(C, dtree.tokens)
                    elif action == ArcEagerTransitionParser.RIGHTARC:
                        C = self.rightarc(C, dtree.tokens)
                    elif action == ArcEagerTransitionParser.REDUCE:
                        C = self.reduce_config(C, dtree.tokens)
                    elif action == ArcEagerTransitionParser.TERMINATE:
                        C = self.terminate(C, dtree.tokens)
            print('Loss = ', loss, "%Local accurracy = ",
                  (total - loss) / total)
            if loss == 0.0:
                return

    def static_train(self, treebank, step_size=1.0, max_epochs=100):
        """
        Trains a model with a static oracle
        @param treebank : a list of dependency trees
        """
        dataset = []
        for dtree in treebank:
            dataset.extend(self.static_oracle_derivation(dtree))
        N = len(dataset)
        for e in range(max_epochs):
            loss = 0.0
            for ref_config, ref_action, tokens in dataset:
                pred_config, pred_action = self.predict_local(
                    ref_config, tokens)
                if ref_action != pred_action:
                    loss += 1.0
                    delta_ref = SparseWeightVector()
                    S, B, A, score = ref_config
                    x_repr = self.__make_config_representation(S, B, tokens)
                    delta_ref += SparseWeightVector.code_phi(
                        x_repr, ref_action)

                    delta_pred = SparseWeightVector()
                    S, B, A, score = ref_config
                    x_repr = self.__make_config_representation(S, B, tokens)
                    delta_pred += SparseWeightVector.code_phi(
                        x_repr, pred_action)

                    self.model += step_size * (delta_ref - delta_pred)
            print('Loss = ', loss, "%Local accurracy = ", (N - loss) / N)
            if loss == 0.0:
                return