Пример #1
0
 def is_line_height_hidden(self, element):
     fontHeight = None
     # 1. 获取自身的font-size大小
     height = self.htmlParser.get_element_style_attr_value(element, 'line-height')
     if height is not None:
         fontHeight = height.lower()
     else:
         # 2. 获取父类中的font-size大小
         parent = self.htmlParser.get_all_parent_element(element)
         for elem in parent:
             size = self.htmlParser.get_element_style_attr_value(elem, 'line-height')
             if size is not None:
                 fontHeight = size
                 break
     if fontHeight is not None:
         match = re.search(r'([-]?\d+)\s*(\D+)', fontHeight, re.IGNORECASE)
         if match is not None:
             # print '%s'%match.group(0)
             # print '%s'%match.group(1)
             # print '%s'%match.group(2)
             if match.group(2).lower() == 'px' \
                     and match.group(1).isdigit() \
                     and float(match.group(1)) <= settings.getfloat('LINE_HEIGHT'):
                 return True
             elif match.group(2).lower() == 'em' \
                     and match.group(1).isdigit() \
                     and (float(match.group(1)) * 10) <= settings.getfloat('LINE_HEIGHT'):
                 return True
             elif match.group(2) == '%' \
                     and match.group(1).isdigit() \
                     and float(match.group(1)) <= settings.getfloat('LINE_HEIGHT_PERCENT'):
                 return True
             else:
                 return False
     return False
Пример #2
0
 def is_font_size_hidden(self, element):
     fontSize = None
     # 1. 获取自身的font-size大小
     size = self.htmlParser.get_element_style_attr_value(element, 'font-size')
     if size is not None:
         fontSize = size.lower()
     else:
         # 2. 获取父类中的font-size大小
         parent = self.htmlParser.get_all_parent_element(element)
         for elem in parent:
             size = self.htmlParser.get_element_style_attr_value(elem, 'font-size')
             if size is not None:
                 fontSize = size
                 break
     if fontSize is not None:
         match = re.search(r'([-]?\d+)\s*(\D+)', fontSize, re.IGNORECASE)
         if match is not None:
             if match.group(2).lower() == 'px' \
                     and match.group(1).isdigit() \
                     and float(match.group(1)) <= settings.getfloat('FONT_SIZE'):
                 return True
             elif match.group(2).lower() == 'em' \
                     and match.group(1).isdigit() \
                     and (float(match.group(1)) * 10) <= settings.getfloat('FONT_SIZE'):
                 return True
             elif match.group(2) == '%' \
                     and match.group(1).isdigit() \
                     and float(match.group(1)) <= settings.getfloat('FONT_SIZE_PERCENT'):
                 return True
             else:
                 return False
     return False
Пример #3
0
 def is_overflow_height_hidden(self, element):
     overString = None
     heightSize = None
     # 1.获取父类中的overflow、height值
     parent = self.htmlParser.get_all_parent_element(element)
     for elem in parent:
         overflow = self.htmlParser.get_element_style_attr_value(elem, 'overflow')
         height = self.htmlParser.get_element_style_attr_value(elem, 'height')
         if overflow is not None and height is not None:
             overString = overflow
             heightSize = height
             break
     if overString is not None and heightSize is not None:
         if overString.lower() == 'hidden':
             matchNormal = re.search(r'([-]?\d+)\s*(\D+)', heightSize, re.IGNORECASE)
             matchChange = re.search(r'expression_r\(((\d+-)?\d+)\)', heightSize, re.IGNORECASE)
             if matchChange is not None:
                 if eval(matchChange.group(1)) <= settings.getfloat('OVER_HEIGHT'):
                     return True
                 else:
                     return False
             elif matchNormal is not None:
                 if matchNormal.group(2).lower() == 'px' \
                         and matchNormal.group(1).isdigit() \
                         and float(matchNormal.group(1)) <= settings.getfloat('OVER_HEIGHT'):
                     return True
                 elif matchNormal.group(2).lower() == 'em' \
                         and matchNormal.group(1).isdigit() \
                         and (float(matchNormal.group(1)) * 10) <= settings.getfloat('OVER_HEIGHT'):
                     return True
                 else:
                     return False
             else:
                 return False
     return False
Пример #4
0
 def is_text_indent_hidden(self, element):
     indentSize = None
     # 1. 获取自身的position、left值
     indent = self.htmlParser.get_element_style_attr_value(element, 'text-indent')
     if indent is not None:
         indentSize = indent
     else:
         # 2.获取父类中的position、left值
         parent = self.htmlParser.get_all_parent_element(element)
         for elem in parent:
             indent = self.htmlParser.get_element_style_attr_value(elem, 'text-indent')
             if indent is not None:
                 indentSize = indent
                 break
     if indentSize is not None:
         match = re.search(r'([-]?\d+)\s*(\D+)', indentSize, re.IGNORECASE)
         if match is not None:
             if match.group(2).lower() == 'px' \
                     and match.group(1).isdigit() \
                     and float(match.group(1)) <= settings.getfloat('TEXT_INDENT'):
                 return True
             elif match.group(2).lower() == 'em' \
                     and match.group(1).isdigit() \
                     and (float(match.group(1)) * 10) <= settings.getfloat('TEXT_INDENT'):
                 return True
             elif match.group(2) == '%' \
                     and match.group(1).isdigit() \
                     and float(match.group(1)) <= settings.getfloat('TEXT_INDENT_PERCENT'):
                 return True
             else:
                 return False
     return False
Пример #5
0
 def is_marquee_value_hidden(self, element):
     # 1. 获取父类的标签属性
     parent = self.htmlParser.get_all_parent_element(element)
     for elem in parent:
         if elem.tag == 'marquee':
             height = elem.get('height')
             width = elem.get('width')
             scrollamount = elem.get('scrollamount')
             if height is not None and width is not None and scrollamount is not None:
                 if height.isdigit() and width.isdigit() and scrollamount.isdigit() \
                     and 0 < float(height) <= settings.getfloat('HEIGHT_MAX') \
                         and 0 < float(width) <= settings.getfloat('WIDTH_MAX') \
                         and float(scrollamount) >= settings.getfloat('SCROLLAMOUNT_MIN'):
                     return True
     return False
Пример #6
0
 def is_position_left_hidden(self, element):
     posString = None
     leftSize = None
     # 1. 获取自身的position、left值
     position = self.htmlParser.get_element_style_attr_value(element, 'position')
     left = self.htmlParser.get_element_style_attr_value(element, 'left')
     if position is not None and left is not None:
         posString = position
         leftSize = left
     else:
         # 2.获取父类中的position、left值
         parent = self.htmlParser.get_all_parent_element(element)
         for elem in parent:
             position = self.htmlParser.get_element_style_attr_value(elem, 'position')
             left = self.htmlParser.get_element_style_attr_value(elem, 'left')
             if position is not None and left is not None:
                 posString = position
                 leftSize = left
                 break
     if posString is not None and leftSize is not None:
         if posString.lower() == 'absolute' or posString.lower() == 'fixed':
             matchNormal = re.search(r'([-]?\d+)\s*(\D+)', leftSize, re.IGNORECASE)
             matchChange = re.search(r'expression_r\(((\d+-)?\d+)\)', leftSize, re.IGNORECASE)
             if matchChange is not None:
                 if eval(matchChange.group(1)) <= settings.getfloat('POSITION_LEFT'):
                     return True
                 else:
                     return False
             elif matchNormal is not None:
                 if matchNormal.group(2).lower() == 'px' \
                         and matchNormal.group(1).isdigit() \
                         and float(matchNormal.group(1)) <= settings.getfloat('POSITION_LEFT'):
                     return True
                 elif matchNormal.group(2).lower() == 'em' \
                         and matchNormal.group(1).isdigit() \
                         and (float(matchNormal.group(1)) * 10) <= settings.getfloat('POSITION_LEFT'):
                     return True
                 elif matchNormal.group(2) == '%' \
                         and matchNormal.group(1).isdigit() \
                         and float(matchNormal.group(1)) <= settings.getfloat('POSITION_LEFT_PERCENT'):
                     return True
                 else:
                     return False
             else:
                 return False
     return False
Пример #7
0
 def get_detect_html(self):
     '''
     描述: 通过浏览器获取当前页面的HTML内容
     '''
     try:
         import socket
         timeout = settings.getfloat('HTML_TIMEOUT')
         socket.setdefaulttimeout(timeout)
     except Exception, e:
         raise DarkException, _('Failed to import socket to set timeout. Exception: %(exception)s.' % {'exception': str(e)})