def __init__(self, name, tree, encoder=None):
   self.name = name
   self.blocks = get_textblocks(tree, encoder)
   self.weight = sum( b.weight for b in self.blocks )
   #self.weight_noanchor = sum( b.weight_noanchor for b in self.blocks )
   self.anchor_strs = []
   return
 def __init__(self, name, tree, encoder=None):
     self.name = name
     self.blocks = get_textblocks(tree, encoder)
     self.weight = sum(b.weight for b in self.blocks)
     #self.weight_noanchor = sum( b.weight_noanchor for b in self.blocks )
     self.anchor_strs = []
     return
Exemplo n.º 3
0
 def identify_layout(self, tree, pat_threshold, strict=True):
   top = (None, None)
   blocks = get_textblocks(tree, encoder=self.encoder)
   if 2 <= self.debug:
     tree.dump()
   max_weight = sum( b.weight for b in blocks ) * pat_threshold
   for pat1 in self.pats:
     layout = pat1.match_blocks(blocks, strict=strict)
     if layout:
       weight = sum( sect.weight for sect in layout )
       if max_weight < weight:
         top = (pat1, layout)
         max_weight = weight
   return top
 def identify_layout(self, tree, pat_threshold, strict=True):
     top = (None, None)
     blocks = get_textblocks(tree, encoder=self.encoder)
     if 2 <= self.debug:
         tree.dump()
     max_weight = sum(b.weight for b in blocks) * pat_threshold
     for pat1 in self.pats:
         layout = pat1.match_blocks(blocks, strict=strict)
         if layout:
             weight = sum(sect.weight for sect in layout)
             if max_weight < weight:
                 top = (pat1, layout)
                 max_weight = weight
     return top