Exemplo n.º 1
0
 def _create_samples(self, trees):
     for tree in trees:
         tokens = tree.leaves
         words = [utils.normalize(token.word) for token in tokens]
         cats = [str(token.cat) for token in tokens]
         deps = self._get_dependencies(tree, len(tokens))
         sent = ' '.join(words)
         self.sents.append(sent)
         self.samples.append((sent, [cats, deps]))
Exemplo n.º 2
0
 def _traverse(self, tree):
     if tree.is_leaf:
         self.cats[str(tree.cat)] += 1
         word = utils.normalize(tree.word)
         self.words[word.lower()] += 1
         for f in get_suffix(word):
             self.suffixes[f] += 1
         for f in get_prefix(word):
             self.prefixes[f] += 1
     else:
         children = tree.children
         if len(children) == 1:
             rule = str(tree.cat), str(children[0].cat)
             self.unary_rules[rule] += 1
             self._traverse(children[0])
         else:
             rule = str(children[0].cat), str(children[1].cat)
             self.seen_rules[rule] += 1
             self._traverse(children[0])
             self._traverse(children[1])
Exemplo n.º 3
0
 def text_to_instance(self,
                      sentence: str,
                      tags: List[str] = None,
                      deps: List[int] = None,
                      weight: float = 1.0) -> Instance:  # type: ignore
     # pylint: disable=arguments-differ
     tokens = [
         Token(utils.normalize(token)) for token in sentence.split(' ')
     ]
     token_field = TextField(tokens, self._token_indexers)
     metadata = MetadataField({'words': sentence})
     weight = ArrayField(numpy.array([weight], 'f'))
     fields = {
         'words': token_field,
         'metadata': metadata,
         'weight': weight,
     }
     if tags is not None and deps is not None:
         fields['head_tags'] = SequenceLabelField(
             tags, token_field, label_namespace='head_tags')
         fields['head_indices'] = SequenceLabelField(
             deps, token_field, label_namespace='head_indices')
     return Instance(fields)