def ignore_it(cls, subject): """ Checks if we have to ignore the given subject. :param str subject: The subject ot work with. :return: The result of the check. :rtype: bool """ # We set the list of regex to match to be # considered as ignored. # # Note: In a more aggressive way, r"(\$|,)(image)" may be added. to_ignore = [r"(^!|^@@|^\/|^\[|^\.|^-|^_|^\?|^&)"] for element in to_ignore: # We loop through the list of regex. if helpers.Regex(element).match(subject.strip(), return_match=False): # The currently read line match the currently read # regex. # We return true, it has to be ignored. return True # Wer return False, it does not has to be ignored. return False
def __extract_it(self): """ Try to extract the expiration date from the given data. """ for regex in self.expiration_patterns: expiration_date = helpers.Regex(regex).match( self.data, return_match=True, rematch=True, group=0 ) if not expiration_date: continue expiration_date = expiration_date[0].strip() if helpers.Regex(self.regex_numbers).match( expiration_date, return_match=True ): return self.__format_it(expiration_date) return None
def __decode_v4(self, data): """ Decodes the v4. :param str data: A string to decode. :rtype: list """ result = [] rematch = helpers.Regex(r"^(.*?)(?:#{2}|#@#)").match( data, return_match=True, group=0, rematch=True ) if rematch: result.extend(self.format_decoded(rematch)) return result
def __decode_v3(self, data): """ Decodes the v3. :param str data: A string to decode. :rtype: list """ result = [] rematch = helpers.Regex( r"(?:#+(?:[a-z]+?)?\[[a-z]+(?:\^|\*)\=(?:\'|\"))(.*\..*)(?:(?:\'|\")\])" ).match(data, return_match=True, group=0, rematch=True) if rematch: result.extend(self.format_decoded(rematch)) return result
def __decode_v2(self, data): """ Decodes the v2. :param str data: A string to decode. :rtype: list """ result = [] rematch = helpers.Regex(r"^\|(.*\..*)\|$").match( data, return_match=True, group=0, rematch=True ) if rematch: result.extend(self.format_decoded(rematch)) return result
def __format_it(self, data): """ Formats the given data. """ for index, regex in self.regex_dates.items(): matched = helpers.Regex(regex).match(data, return_match=True, rematch=True) if not matched: continue date = self.__format_management(index, matched) if not date: # pragma: no cover continue return "-".join(date) return None # pragma: no cover
def __decode_v1(self, data): """ Decodes the v1. :param str data: A string to decode. :rtype: list """ result = [] rematch = helpers.Regex(r"^(?:.*\|\|)([^\/\$\^]{1,}).*$").match( data, return_match=True, group=0, rematch=True ) if rematch: if self.options_separator in data: options = data.split(self.options_separator)[-1].split( self.option_separator ) # pylint: disable=too-many-boolean-expressions if ( not options[-1] or "third-party" in options or "script" in options or "popup" in options or "xmlhttprequest" in options or "all" in options or "document" in options ): result.extend(self.extract_base(rematch)) extra = self.extract_from_options(options) if extra: if isinstance(extra, list): # pragma: no cover extra.extend(self.extract_base(rematch)) result.extend(self.extract_base(extra)) else: result.extend(self.extract_base(rematch)) else: result.extend(self.extract_base(rematch)) return result
def extract_from_options(self, options): """ Exctracts the relevant data from the list of options. :param list options: The list of options of a rule. :return: The list of domains. :rtype: list """ result = [] # We initiate the regex which will be used to extract the domain listed # under the option domain= regex_domain_option = r"domain=(.*)" for option in options: # We loop through the list of option. try: # We try to extract the list of domains from the currently read # option. domains = helpers.Regex(regex_domain_option).match( option, return_match=True, rematch=True, group=0 )[-1] if domains: # We could extract something. if self.aggressive: # pragma: no cover result.extend( [ x for x in domains.split("|") if x and not x.startswith("~") ] ) else: return True except TypeError: pass # We return the result. return result