Пример #1
0
    def ff_hminghkm(self,
                    info,
                    fWord,
                    eWord,
                    fIndex,
                    eIndex,
                    links,
                    diagValues,
                    currentNode=None):
        """
    Return translation rules rules extracted at this node encoded as features.
    """
        if currentNode.data["pos"] == '_XXX_':
            return {}
        name = self.ff_hminghkm.func_name
        features = defaultdict(int)

        start_span = currentNode.span_start()
        end_span = currentNode.span_start()
        l = []
        minf = len(info['f'])
        maxf = 0

        for link in links:
            if link[1] >= start_span and link[1] <= end_span:
                l.append((link[0], link[1] - start_span))
                if link[0] < minf:
                    minf = link[0]
                if link[0] > maxf:
                    maxf = link[0]

        fsubset = info['f'][minf:maxf + 1]
        links_subset = [(link[0] - minf, link[1]) for link in l]

        if len(links_subset) > 0:
            for rule in minghkm.extract(fsubset,
                                        currentNode,
                                        links_subset,
                                        start_span,
                                        hierarchical=True):
                # We only care about rules with root(LHS) = currentNode
                try:
                    ruleRoot = rule.e.data["pos"]
                except:
                    # Probably a blank line or a bad rule?
                    continue

                if ruleRoot != currentNode.data["pos"]:
                    continue
                rulestr = str(rule)
                rulestr = rulestr.replace(" ", "_")
                features[name + '___' + rulestr] = 1
        return features
Пример #2
0
  def ff_hminghkm(self, info, fWord, eWord, fIndex, eIndex, links, diagValues, currentNode = None):
    """
    Return translation rules rules extracted at this node encoded as features.
    """
    if currentNode.data == '_XXX_':
      return {}
    name = self.ff_hminghkm.func_name
    features = defaultdict(int)

    start_span = currentNode.span_start()
    end_span = currentNode.span_start()
    l = [ ]
    minf = len(info['f'])
    maxf = 0

    for link in links:
      if link[1] >= start_span and link[1] <= end_span:
        l.append((link[0], link[1]-start_span))
        if link[0] < minf:
          minf = link[0]
        if link[0] > maxf:
          maxf = link[0]

    fsubset = info['f'][minf:maxf+1]
    links_subset = [(link[0]-minf, link[1]) for link in l]

    if len(links_subset) > 0:
      for rule in minghkm.extract(fsubset,
                                  currentNode,
                                  links_subset,
                                  start_span,
                                  hierarchical=True):
        # We only care about rules with root(LHS) = currentNode
        try:
          ruleRoot = rule.e.data
        except:
          # Probably a blank line or a bad rule?
          continue

        if ruleRoot != currentNode.data:
          continue
        rulestr = str(rule)
        rulestr = rulestr.replace(" ","_")
        features[name+'___'+rulestr] = 1
    return features
Пример #3
0
    def ff_nonlocal_hminghkm(self, info, treeNode, edge, links, srcSpan,
                             tgtSpan, linkedToWords, childEdges, diagValues,
                             treeDistValues):
        """
        Fire features for every translation rule extracted at the current node.
        """
        name = self.ff_nonlocal_hminghkm.func_name
        if len(links) == 0:
            return {}
        features = defaultdict(int)

        start_span = treeNode.span_start()
        end_span = treeNode.span_end()
        l = []
        minf = len(info['f'])
        maxf = 0

        for link in links:
            if link[1] >= start_span and link[1] <= end_span:
                l.append((link[0], link[1] - start_span))
                if link[0] < minf:
                    minf = link[0]
                if link[0] > maxf:
                    maxf = link[0]

        fsubset = info['f'][minf:maxf + 1]
        links_subset = [(link[0] - minf, link[1]) for link in l]

        if len(links_subset) > 0:
            for rule in minghkm.extract(fsubset,
                                        treeNode,
                                        links_subset,
                                        start_span,
                                        hierarchical=True):
                try:
                    ruleRoot = rule.e.data["pos"]
                except:
                    # Probably a blank line or a bad rule?
                    continue

                rulestr = str(rule)
                rulestr = rulestr.replace(" ", "_")
                features[name + '___' + rulestr] = 1

        return features
Пример #4
0
  def ff_nonlocal_hminghkm(self, info, treeNode, edge, links, srcSpan, tgtSpan, linkedToWords, childEdges, diagValues, treeDistValues):
    """
    Fire features for every translation rule extracted at the current node.
    """
    name = self.ff_nonlocal_hminghkm.func_name
    if len(links) == 0:
      return {}
    features = defaultdict(int)

    start_span = treeNode.span_start()
    end_span = treeNode.span_end()
    l = [ ]
    minf = len(info['f'])
    maxf = 0

    for link in links:
      if link[1] >= start_span and link[1] <= end_span:
        l.append((link[0], link[1]-start_span))
        if link[0] < minf:
          minf = link[0]
        if link[0] > maxf:
          maxf = link[0]

    fsubset = info['f'][minf:maxf+1]
    links_subset = [(link[0]-minf, link[1]) for link in l]

    if len(links_subset) > 0:
      for rule in minghkm.extract(fsubset, treeNode, links_subset, start_span, hierarchical=True):
        try:
          ruleRoot = rule.e.data
        except:
          # Probably a blank line or a bad rule?
          continue

        rulestr = str(rule)
        rulestr = rulestr.replace(" ","_")
        features[name+'___'+rulestr] = 1

    return features