def buildHypothesisesForArea(self, sense, hypStacks, area, areaTags, phraseBorders, phraseDerivations, intrinsicCoverageMap, phraseClosedTokens, intrinsicCoverageSourceMap): """ Build Hypothesis stacks in given area. @param hypStacks: hypothesis stack @param area: (low token id, high token id) @param phraseBorders: map node -> (left border, right border) @param phraseDerivations: list (child node, yielded node) @param phraseCoverages: map node -> (left coverage end, right coverage end) @param intrinsicRuleCoverages: map source string -> coverage @return: None """ basePhrases = self.enumerateBasePhrasesForArea(area, phraseBorders, phraseDerivations) # Assume all base phrases fullfill the area # print [phraseBorders[p] for p in basePhrases] # Decode in phrase CYK for phraseCount in range(1, len(basePhrases) + 1): for beginPosition in range(0, len(basePhrases) - phraseCount + 1): phraseGroup = basePhrases[beginPosition : beginPosition + phraseCount] area = (phraseBorders[phraseGroup[0]][0], phraseBorders[phraseGroup[-1]][1]) generatedSources = self.generateSources(phraseGroup, phraseClosedTokens, phraseBorders) finalHyps = [] intrinsicSource = None intrinsicSourceString = None triedIntrinsicSource = False if area in intrinsicCoverageSourceMap: intrinsicSource = intrinsicCoverageSourceMap[area] intrinsicSourceString = self.buildSourceString(sense, areaTags, intrinsicSource) generatedSources.append(intrinsicSource) # Fetch rules and decode # For all sources try exactly matching for source in generatedSources: dependentAreas = [p for p in source if isinstance(p, tuple)] # Check dependent hypothesises missingSupport = False for dependentArea in dependentAreas: if dependentArea not in hypStacks: missingSupport = True break if missingSupport: continue # Fetch rule sourceString = self.buildSourceString(sense, areaTags, source) if not sourceString: continue if sourceString == intrinsicSourceString: if triedIntrinsicSource: continue else: triedIntrinsicSource = True # Fetch exactly matched rule exactlyMatchedRules = self.rulefetcher.findRulesBySourceString(sourceString, dependentAreas) hyps = None # If this source is an intrinsic source # then try to reconstruct or build depraved rules if not exactlyMatchedRules and sourceString != intrinsicSourceString: continue subTreeDistance = self.getSubTreeDistance(intrinsicCoverageMap, area, dependentAreas) if not exactlyMatchedRules: # In this case, here we got a intrinsic rule covers same area in the parse tree. # We should not allow this rule to be kicked off, so use reconstruction or depraved glue rule. depravedReconstruction = len(source) > 12 # Need reconstruction reconstructor = Reconstructor(self.ruletable, self.model, sense, area, sourceString, subTreeDistance, hypStacks, source, areaTags, dependentAreas, depravedReconstruction) hyps = reconstructor.parse() else: # Got some rules, then using normal cube pruning to get hypothesis pruner = CubePruner(self.model, area, sourceString, subTreeDistance, exactlyMatchedRules, dependentAreas, hypStacks) hyps = pruner.prune() finalHyps.extend(hyps) if not finalHyps and area in intrinsicCoverageSourceMap: import pdb; pdb.set_trace() if finalHyps: hypStacks[area] = finalHyps[:setting.size_beam] areaTags[area] = self.taggingFunction(sense, phraseGroup)
def translateNBestOLD(self,data_tree,data_dep): """ Translate and return a N-best list @type data_tag: string @type data_dep: string @rtype: list of GentileHypothesis """ # first, we need get the tree of input self.model.cacheMode = False setting.load(["nbest", "head_phrases_limit"]) tree = SenseTree(data_tree,data_dep) tree.rebuildTopNode() tree.appendXToTree() tree.upMergeAllConjNodes() tree.rebuildCommaNodes() tree.convertTags() tree.separateContiniousNonTerminals() # tree.mergeContinuousNTs() fetcher = self.prepareRulesForTranslation(tree) # build lexical hypothesis stack # { id->[lexical hyp,] } # stack_lex = self.buildLexicalStack(fetcher) # { id->[lexical hyp,] } hypStacks = {} # for each fragment ( head node is not leaf ) at bottom-up style # use corresponding rules and basic hypothesis(lex or normal) to build normal hyp for this fragment tree.buildLevelMap() cur_level = tree.getMaxLevel() # A dirty trick: save current sense tree to cross-module global variable. __builtin__.currentSenseTree = tree # start pruning self.model.cacheMode = True while cur_level > 0: # [head id,] nodes_cur_level = tree.getNodesByLevel(cur_level) if cur_level == 1: self.model.smode = True else: self.model.smode = False for node in nodes_cur_level: if node not in fetcher.joints: # only prune for joint nodes continue # get rules rules, sitesInvolved = fetcher.mapJointRules[node] # okay available could in random order # we dont need sort it if not rules: # No rules found, force to use CYK. rc = Reconstructor(self.ruletable, self.model, tree, hypStacks, node) hyps = rc.parse() else: # Rules found then cube prunning. # sort rules rules = self.model.sortRules(rules) # now run the cube pruning and get normal hypothesises for current node hyps = separately_prune(self.model, node, rules, sitesInvolved, hypStacks) hypStacks[node] = hyps self.model.clearCache() # end of current node cur_level -= 1 rootNode = tree.getRootNode() if rootNode not in hypStacks or len(hypStacks[rootNode])==0: # failed print "[GentileDecoder]","Translation Failed!!!" return [] # end building normal hypothesis stack # hypStacks[rootNode][0].trace() return hypStacks[rootNode][:setting.nbest]
def buildHypothesisesForArea(self, sense, hypStacks, area, areaTags, phraseBorders, phraseDerivations, phraseCoverages, phraseClosedTokens, intrinsicRuleCoverages): """ Build Hypothesis stacks in given area. @param hypStacks: hypothesis stack @param area: (low token id, high token id) @param phraseBorders: map node -> (left border, right border) @param phraseDerivations: list (child node, yielded node) @param phraseCoverages: map node -> (left coverage end, right coverage end) @param intrinsicRuleCoverages: map source string -> coverage @return: None """ basePhrases = self.enumerateBasePhrasesForArea(area, phraseBorders, phraseDerivations) # Assume all base phrases fullfill the area # print [phraseBorders[p] for p in basePhrases] # Decode in phrase CYK for phraseCount in range(1, len(basePhrases) + 1): for beginPosition in range(0, len(basePhrases) - phraseCount + 1): phraseGroup = basePhrases[beginPosition : beginPosition + phraseCount] area = (phraseBorders[phraseGroup[0]][0], phraseBorders[phraseGroup[-1]][1]) sourcesWithPhrase = self.generateSourcesWithPhrase(phraseGroup, phraseClosedTokens, phraseBorders) # Fetch rules and decode # For all sources try exactly matching for source, phrases in sourcesWithPhrase: dependentAreas = [p for p in source if isinstance(p, tuple)] # Check dependent hypothesises missingSupport = False for dependentArea in dependentAreas: if dependentArea not in hypStacks: missingSupport = True break if missingSupport: continue # Fetch rule sourceString = self.buildSourceString(sense, areaTags, source) if not sourceString: continue # Fetch exactly matched rule exactlyMatchedRules = self.rulefetcher.findRulesBySourceString(sourceString, dependentAreas) hyps = None if not exactlyMatchedRules: # If this source is an intrinsic source # then try to reconstruct or build depraved rules if sourceString not in intrinsicRuleCoverages or intrinsicRuleCoverages[sourceString] != area: continue # In this case, here we got a intrinsic rule covers same area in the parse tree. # We should not allow this rule to be kicked off, so use reconstruction or depraved glue rule. if len(source) > 12: exactlyMatchedRules = self.rulefetcher.buildDepravedMatchingRules(sense, source) if not exactlyMatchedRules: # Need reconstruction reconstructor = Reconstructor(self.ruletable, self.model, sense, hypStacks, source, areaTags, dependentAreas) hyps = reconstructor.parse() else: # Got some rules, then using normal cube pruning to get hypothesis hyps = separately_prune(self.model, exactlyMatchedRules, hypStacks) if hyps: hypStacks[area] = hyps areaTags[area] = None