Exemplo n.º 1
0
def convert_prediction(dpack, triples):
    """Populate a datapack prediction array from a list
    of triples

    Parameters
    ----------
    prediction: [(string, string, string)]

        List of EDU id, EDU id, label triples

    Returns
    -------
    dpack: DataPack
        A copy of the original DataPack with predictions
        set
    """
    link_map = {(id1, id2): lab for id1, id2, lab in triples}

    def get_lbl(pair):
        'from edu pair to label number'
        edu1, edu2 = pair
        key = edu1.id, edu2.id
        lbl = link_map.get(key, UNRELATED)
        return dpack.label_number(lbl)

    prediction = np.fromiter((get_lbl(pair) for pair in dpack.pairings),
                             dtype=np.dtype(np.int16))
    graph = Graph(prediction=prediction,
                  attach=dpack.graph.attach,
                  label=dpack.graph.label)
    return dpack.set_graph(graph)
Exemplo n.º 2
0
    def _classify(self, dpack, X, W, nonfixed_pairs=None):
        """ return predicted tree """
        num_items = len(dpack)
        if nonfixed_pairs is None:
            nonfixed_pairs = np.arange(num_items)

        if dpack.graph is None:
            scores = np.zeros(num_items)
            label = np.zeros((num_items, len(dpack.labels)))
            prediction = np.empty(num_items)
        else:
            scores = np.copy(dpack.graph.attach)
            label = np.copy(dpack.graph.label)
            prediction = np.copy(dpack.graph.prediction)

        # compute attachment scores of all EDU pairs
        # TODO should this be self.decision_function?
        # we need to reshape, to lose 2nd dim (shape[1] == 1) of dot product
        scores[nonfixed_pairs] = (X[nonfixed_pairs].dot(W.T).reshape(
            len(nonfixed_pairs)))
        # dummy labelling scores and predictions (for unlabelled parsing)
        unk = dpack.label_number(UNKNOWN)
        # for every pair, set the best label to UNK
        # * score(lbl) = 1.0 if lbl == UNK, 0.0 otherwise
        label[nonfixed_pairs] = 0.0
        label[nonfixed_pairs, unk] = 1.0
        # * predicted label = UNK (will be overwritten by the decoder)
        prediction[nonfixed_pairs] = unk
        dpack = dpack.set_graph(
            Graph(prediction=prediction, attach=scores, label=label))
        # call decoder
        dpack_pred = self.decoder.transform(dpack)
        edge_list = prediction_to_triples(dpack_pred)
        return edge_list
Exemplo n.º 3
0
    def _fix_intra_edges(self, dpack, spacks):
        """Fix intra-sentential edges for inter-sentential parsing.

        Scores are set to 1.0 for both attachment and labelling, for
        intra-sentential links.

        Parameters
        ----------
        dpack : DataPack
            Original datapack.

        spacks : list of DataPack
            List of intra-sentential datapacks, containing intra-sentential
            predictions.

        Returns
        -------
        dpack_copy : DataPack
            Copy of dpack with attachment and labelling scores updated.

        FIXME
        -----
        [ ] generalize to support non-probabilistic scores
        """
        # NB this code was moved here from SoftParser._recombine()
        # it probably leaves room for improvement, notably speedups
        unrelated_lbl = dpack.label_number(UNRELATED)
        sent_lbl = self._mk_get_lbl(dpack, spacks)

        # tweak intra-sentential attachment and labelling scores
        weights_a = np.copy(dpack.graph.attach)
        weights_l = np.copy(dpack.graph.label)
        for i, (edu1, edu2) in enumerate(dpack.pairings):
            if edu1.id == FAKE_ROOT_ID:
                # don't confuse the inter parser with sentence roots
                continue
            lbl = sent_lbl(i)
            if lbl is not None and lbl != unrelated_lbl:
                weights_a[i] = 1.0
                weights_l[i] = np.zeros(len(dpack.labels))
                weights_l[i, lbl] = 1.0

        # FIXME "legacy" code that used to be in learning.oracle
        # it looks simpler thus better than what precedes, but is it
        # (partly) functionally equivalent in our pipelines?
        if False:  # if _pass_intras:
            # set all intra attachments to 1.0
            intra_pairs = idxes_intra(dpack, include_fake_root=False)
            weights_a[intra_pairs] = 1.0  # replace res with (?)
        # end FIXME

        graph = Graph(prediction=dpack.graph.prediction,
                      attach=weights_a,
                      label=weights_l)
        dpack_copy = dpack.set_graph(graph)
        return dpack_copy
Exemplo n.º 4
0
 def _classify(self, dpack, X, W):
     """ return predicted tree """
     decoder = self.decoder
     num_items = len(dpack)
     scores = X.dot(W.T)  # TODO: should this be self.decision_function?
     scores = scores.reshape(
         num_items)  # lose 2nd dimension (shape[1] == 1)
     # unlabelled
     unk = dpack.label_number(UNKNOWN)
     label = np.zeros((num_items, len(dpack.labels)))
     label[:, unk] = 1.0
     prediction = np.empty(num_items)
     prediction[:] = unk
     dpack = dpack.set_graph(
         Graph(prediction=prediction, attach=scores, label=label))
     # print "SCORES:", scores
     graph = decoder.transform(dpack)
     return prediction_to_triples(graph)
Exemplo n.º 5
0
    def _recombine(self, dpack, spacks):
        "soft decoding - pass sentence edges through the prob dist"
        unrelated_lbl = dpack.label_number(UNRELATED)
        sent_lbl = self._mk_get_lbl(dpack, spacks)

        weights_a = np.copy(dpack.graph.attach)
        weights_l = np.copy(dpack.graph.label)
        for i, (edu1, _) in enumerate(dpack.pairings):
            if edu1.id == FAKE_ROOT_ID:
                # don't confuse the inter parser with sentence roots
                continue
            lbl = sent_lbl(i)
            if lbl is not None and lbl != unrelated_lbl:
                weights_a[i] = 1.0
                weights_l[i] = np.zeros(len(dpack.labels))
                weights_l[i, lbl] = 1.0
        dpack = dpack.set_graph(Graph(prediction=dpack.graph.prediction,
                                      attach=weights_a,
                                      label=weights_l))
        return self._parsers.inter.transform(dpack)
Exemplo n.º 6
0
    def multiply(dpack, attach=None, label=None):
        """
        If the datapack is weighted, multiply its existing probabilities
        by the given ones, otherwise set them

        Parameters
        ----------
        attach (array(float), optional)
            If unset will default to ones
        label (2D array(float), optional)
            If unset will default to ones

        Returns
        -------
        The modified datapack
        """
        if dpack.graph is None:
            if attach is None:
                attach = np.ones(len(dpack))
            if label is None:
                label = np.ones((len(dpack), len(dpack.labels)))
            prediction = np.empty(len(dpack))
            prediction[:] = dpack.label_number(UNKNOWN)
        else:
            gra = dpack.graph
            prediction = gra.prediction
            if attach is None:
                attach = gra.attach
            else:
                attach = np.multiply(attach, gra.attach)

            if label is None:
                label = gra.label
            else:
                label = np.multiply(label, gra.label)
        graph = Graph(prediction=prediction,
                      attach=attach,
                      label=label)
        return dpack.set_graph(graph)