def ids_to_tree(self, emb, postprocess=True): """Create a fake (flat) t-tree from token embeddings (IDs). @param emb: source embeddings (token IDs) @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \ markers)? True by default. @return: the corresponding tree """ tree = TreeData() tokens = self.ids_to_strings(emb) for token in tokens: if token in ['<GO>', '<STOP>', '<VOID>']: continue if postprocess: # casing (only if set to lowercase) if self.lowercase and len(tree) == 1 or tree.nodes[-1].t_lemma in ['.', '?', '!']: token = token[0].upper() + token[1:] # plural merging (if plural tokens come up) if token == '<-s>' and tree.nodes[-1].t_lemma is not None: token = self._singular_to_plural(tree.nodes[-1].t_lemma) tree.remove_node(len(tree) - 1) elif token == '<-s>': continue tree.create_child(0, len(tree), NodeData(token, 'x')) return tree
def ids_to_tree(self, emb, postprocess=True): """Create a fake (flat) t-tree from token embeddings (IDs). @param emb: source embeddings (token IDs) @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \ markers)? True by default. @return: the corresponding tree """ tree = TreeData() tokens = self.ids_to_strings(emb) for token in tokens: if token in ['<GO>', '<STOP>', '<VOID>']: continue tree.create_child(0, len(tree), NodeData(token, 'x')) return tree
from tgen.planner import CandidateList from tgen.tree import TreeData, NodeData import random import zlib random.seed(1206) l = CandidateList() for i in xrange(10000): # l[str(i)] = random.randint(0, 100) # l[str(random.randint(0,1000))] = random.randint(0, 100) # l[(str(random.randint(0,1000)), str(random.randint(0,1000)))] = random.randint(0, 100) # tree = TreeData() # tree.create_child(0, 1, NodeData(str(random.randint(0, 1000)), str(random.randint(0, 1000)))) # l[tree] = random.randint(0, 100) tree = TreeData() for j in xrange(random.randint(1, 10)): tree.create_child( random.randint(0, len(tree) - 1), random.randint(0, 1) == 1, NodeData(str(random.randint(0, 1000)), str(random.randint(0, 1000)))) l[tree] = random.randint(0, 100) x = [] while l: x.append(l.pop()) print zlib.crc32(str(x))
from tgen.planner import CandidateList from tgen.tree import TreeData, NodeData import random import zlib random.seed(1206) l = CandidateList() for i in xrange(10000): # l[str(i)] = random.randint(0, 100) # l[str(random.randint(0,1000))] = random.randint(0, 100) # l[(str(random.randint(0,1000)), str(random.randint(0,1000)))] = random.randint(0, 100) # tree = TreeData() # tree.create_child(0, 1, NodeData(str(random.randint(0, 1000)), str(random.randint(0, 1000)))) # l[tree] = random.randint(0, 100) tree = TreeData() for j in xrange(random.randint(1, 10)): tree.create_child( random.randint(0, len(tree) - 1), random.randint(0, 1) == 1, NodeData(str(random.randint(0, 1000)), str(random.randint(0, 1000))), ) l[tree] = random.randint(0, 100) x = [] while l: x.append(l.pop()) print zlib.crc32(str(x))