def process_edge(self, edge, depth): hg = self.system.get_hg(self) uedge = unidecode_edge(edge) if uedge != edge and hg.exists(uedge): self.corefs += 1 for op in make_corefs_ops(hg, edge, uedge): yield op
def process_edge(self, edge, depth): hg = self.system.get_hg(self) if (not edge.is_atom() and len(edge) == 2 and edge[0].is_atom() and edge[0].root() == 'the' and has_proper_concept(edge[1])): self.corefs += 1 for op in make_corefs_ops(hg, edge, edge[1]): yield op
def on_end(self): hg = self.system.get_hg(self) i = 0 self.logger.info('processing seeds') with progressbar.ProgressBar(max_value=len(self.seeds)) as bar: for seed in self.seeds: crefs = self.corefs_from_seed(seed) # check if the seed should be assigned to a synonym set if len(crefs) > 0: # find set with the highest degree and normalize set # degrees by total degree cref_degs = [hg.sum_deep_degree(cref) for cref in crefs] total_deg = sum(cref_degs) if total_deg == 0: continue cref_ratios = [ cref_deg / total_deg for cref_deg in cref_degs ] max_ratio = 0. best_pos = -1 for pos, ratio in enumerate(cref_ratios): if ratio > max_ratio: max_ratio = ratio best_pos = pos dd = hg.deep_degree(seed) # ensure that the seed is used by itself if total_deg < dd: self.logger.debug('seed: {}'.format(seed)) self.logger.debug('crefs: {}'.format(crefs)) self.logger.debug('max_ratio: {}'.format(max_ratio)) self.logger.debug( 'total coref dd: {}'.format(total_deg)) self.logger.debug('seed dd: {}'.format(dd)) # add seed if coreference set is sufficiently dominant if max_ratio >= .7: crefs[best_pos].add(seed) self.logger.debug('seed added to cref: {}'.format( crefs[best_pos])) for cref in crefs: for edge1, edge2 in itertools.combinations(cref, 2): self.logger.debug('are corefs: {} | {}'.format( edge1.to_str(), edge2.to_str())) self.corefs += 1 for op in make_corefs_ops(hg, edge1, edge2): yield op i += 1 bar.update(i)
def process_edge(self, edge, depth): hg = self.system.get_hg(self) if edge.type()[0] == 'C' and edge not in self.done: self.done.add(edge) subs = tuple(subtypes(hg, edge)) # check if the concept should be assigned to a synonym set if len(subs) > 0: # find set with the highest degree and normalize set # degrees by total degree sub_degs = [hg.deep_degree(sub) for sub in subs] total_deg = sum(sub_degs) total_deg = 1 if total_deg == 0 else total_deg sub_ratios = [sub_deg / total_deg for sub_deg in sub_degs] max_ratio = 0. best_pos = -1 for pos, ratio in enumerate(sub_ratios): if ratio > max_ratio: max_ratio = ratio best_pos = pos # compute some degree-related metrics sdd = hg.deep_degree(subs[best_pos]) dd = hg.deep_degree(edge) if dd > sdd: sdd_dd = float(sdd) / float(dd) self.logger.debug('concept: {}'.format(edge.to_str())) self.logger.debug('subconcepts: {}'.format(subs)) self.logger.debug('# subs: {}'.format(len(subs))) self.logger.debug('max_ratio: {}'.format(max_ratio)) self.logger.debug('sdd: {}'.format(sdd)) self.logger.debug('dd: {}'.format(dd)) self.logger.debug('sdd_dd: {}'.format(sdd_dd)) if max_ratio >= .7: # and sdd_dd < .5: edge1 = edge edge2 = subs[best_pos] self.logger.debug('are corefs: {} | {}'.format( edge1.to_str(), edge2.to_str())) self.corefs += 1 for op in make_corefs_ops(hg, edge1, edge2): yield op
def _make_singular_plural_relation(self, singular, plural): hg = self.system.get_hg(self) self.logger.debug('singular: {}; plural: {}'.format(singular, plural)) for op in make_singular_plural_ops(hg, singular, plural): yield op self.sng_pl += 1 for op in make_corefs_ops(hg, singular, plural): yield op self.corefs += 1 for subtype in subtypes(hg, singular): plural_edge = subtype.replace_main_concept(plural) if plural_edge and hg.exists(plural_edge): for op in self._make_singular_plural_relation(subtype, plural_edge): yield op