Пример #1
0
    def guess_parent_container(self):
        stat_map_sorted = sorted(
            self._get_statistical_map().items(),
            key=lambda x: x[1].cnt_words,
            reverse=True
        )

        frequent_parent_uuid, stat = stat_map_sorted[0]
        if not stat.cnt_p and not stat.cnt_words:
            return None

        for parent, child in iterparent(self.tree):
            if (parent.attrib.get('uuid') == frequent_parent_uuid):
                return parent
Пример #2
0
    def _get_statistical_map(self):
        stat_map = {}
        for parent, child in iterparent(self.tree):
            if child.tag != 'p':
                continue

            text = get_text_of(child)
            if text is None:
                continue

            parent_uuid = parent.attrib.get('uuid')
            if parent_uuid in stat_map:
                stat_map[parent_uuid].inc_p()
                stat_map[parent_uuid].inc_words(text)
            else:
                stat_map[parent_uuid] = StatCounter(0, text, parent_uuid)

        return stat_map