Exemplos de Check.is_domain_valid em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: PyFunceble.check

Classe / Tipo: Check

Método / Função: is_domain_valid

Exemplos em hotexamples.com: 2

Check.is_domain_valid em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de PyFunceble.check.Check.is_domain_valid em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Check(30)

is_domain(3)

is_domain_valid(2)

endswith(1)

Métodos Frequentes

Check (30)

is_domain (3)

is_domain_valid (2)

endswith (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: expiration_date.py Projeto: mrijinm/PyFunceble

class ExpirationDate: # pylint: disable=too-few-public-methods """ Get, format and return the expiration date of a domain, if exist. """ def __init__(self): # We set the log separator. self.log_separator = "=" * 100 + " \n" # We initiate a variable which will save the extracted expiration date. self.expiration_date = "" # We initate a variable which will save our WHOIS record.s self.whois_record = "" # We initate an instance of Check self.checker = Check() def get(self): # pragma: no cover """ Execute the logic behind the meaning of ExpirationDate + return the matched status. :return: The status of the tested domain. Can be one of the official status. :rtype: str """ # We get the status of the domain validation. domain_validation = self.checker.is_domain_valid() # We get the status of the IPv4 validation. ip_validation = self.checker.is_ip_valid() if "current_test_data" in PyFunceble.INTERN: # The end-user want more information whith his test. # We update some index. PyFunceble.INTERN["current_test_data"].update({ "domain_syntax_validation": domain_validation, "ip4_syntax_validation": ip_validation, }) if (domain_validation and not ip_validation or domain_validation or PyFunceble.CONFIGURATION["local"]): # * The element is a valid domain. # and # * The element is not ahe valid IPv4. # or # * The element is a valid domain. # * We get the HTTP status code of the currently tested element. # and # * We try to get the element status from the IANA database. PyFunceble.INTERN.update({ "http_code": HTTPCode().get(), "referer": Referer().get() }) if not PyFunceble.INTERN["referer"]: # We could not get the referer. # We parse the referer status into the upstream call. return PyFunceble.INTERN["referer"] # The WHOIS record status is not into our list of official status. if PyFunceble.INTERN["referer"] and not self.checker.is_subdomain( ): # * The iana database comparison status is not None. # and # * The domain we are testing is not a subdomain. # We try to extract the expiration date from the WHOIS record. # And we return the matched status. return self._extract() # The iana database comparison status is None. # We log our whois record if the debug mode is activated. Logs().whois(self.whois_record) # And we return None, we could not extract the expiration date. return None if (ip_validation and not domain_validation or ip_validation or PyFunceble.CONFIGURATION["local"]): # * The element is a valid IPv4. # and # * The element is not a valid domain. # or # * The element is a valid IPv4. # We get the HTTP status code. PyFunceble.INTERN["http_code"] = HTTPCode().get() # We log our whois record if the debug mode is activated. Logs().whois(self.whois_record) # And we return None, there is no expiration date to look for. return None # The validation was not passed. # We log our whois record if the debug mode is activated. Logs().whois(self.whois_record) # And we return False, the domain could not pass the IP and domains syntax validation. return False @classmethod def _convert_1_to_2_digits(cls, number): """ Convert 1 digit number to two digits. :param number: A number or a digit string. :type number: str|int :return: A 2 or more digit string. :rtype: str """ return str(number).zfill(2) @classmethod def _convert_or_shorten_month(cls, data): """ Convert a given month into our unified format. :param data: The month to convert or shorten. :type data: str :return: The unified month name. :rtype: str """ # We map the different month and their possible representation. short_month = { "jan": [str(1), "01", "Jan", "January"], "feb": [str(2), "02", "Feb", "February"], "mar": [str(3), "03", "Mar", "March"], "apr": [str(4), "04", "Apr", "April"], "may": [str(5), "05", "May"], "jun": [str(6), "06", "Jun", "June"], "jul": [str(7), "07", "Jul", "July"], "aug": [str(8), "08", "Aug", "August"], "sep": [str(9), "09", "Sep", "September"], "oct": [str(10), "Oct", "October"], "nov": [str(11), "Nov", "November"], "dec": [str(12), "Dec", "December"], } for month in short_month: # We loop through our map. if data in short_month[month]: # If the parsed data (or month if you prefer) is into our map. # We return the element (or key if you prefer) assigned to # the month. return month # The element is not into our map. # We return the parsed element (or month if you prefer). return data def _cases_management(self, regex_number, matched_result): """ A little internal helper of self.format. (Avoiding of nested loops) .. note:: Please note that the second value of the case represent the groups in order :code:`[day,month,year]`. This means that a :code:`[2,1,0]` will be for example for a date in format :code:`2017-01-02` where :code:`01` is the month. :param regex_number: The identifiant of the regex. :type regex_number: int :param matched_result: The matched result to format. :type matched_result: list :return: A list representing the expiration date. The list can be "decoded" like :code:`[day, month, year]` :rtype: list|None """ # We map our regex numbers with with the right group order. # Note: please report to the method note for more information about the mapping. cases = { "first": [[1, 2, 3, 10, 11, 22, 26, 27, 28, 29, 32, 34, 38], [0, 1, 2]], "second": [[14, 15, 31, 33, 36, 37], [1, 0, 2]], "third": [ [ 4, 5, 6, 7, 8, 9, 12, 13, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 35 ], [2, 1, 0], ], } for case in cases: # We loop through the cases. # We get the case data. case_data = cases[case] if int(regex_number) in case_data[0]: # The regex number is into the currently read case data. # We return a list with the formatted elements. # 1. We convert the day to 2 digits. # 2. We convert the month to the unified format. # 3. We return the year. return [ self._convert_1_to_2_digits( matched_result[case_data[1][0]]), self._convert_or_shorten_month( matched_result[case_data[1][1]]), str(matched_result[case_data[1][2]]), ] # The regex number is not already mapped. # We return the parsed data. return matched_result # pragma: no cover def _format(self, date_to_convert=None): """ Format the expiration date into an unified format (01-jan-1970). :param date_to_convert: The date to convert. In other words, the extracted date. :type date_to_convert: str :return: The formatted expiration date. :rtype: str """ if not date_to_convert: # pragma: no cover # The date to conver is given. # We initiate the date we are working with. date_to_convert = self.expiration_date # We map the different possible regex. # The regex index represent a unique number which have to be reported # to the self._case_management() method. regex_dates = { # Date in format: 02-jan-2017 "1": r"([0-9]{2})-([a-z]{3})-([0-9]{4})", # Date in format: 02.01.2017 // Month: jan "2": r"([0-9]{2})\.([0-9]{2})\.([0-9]{4})$", # Date in format: 02/01/2017 // Month: jan "3": r"([0-3][0-9])\/(0[1-9]|1[012])\/([0-9]{4})", # Date in format: 2017-01-02 // Month: jan "4": r"([0-9]{4})-([0-9]{2})-([0-9]{2})$", # Date in format: 2017.01.02 // Month: jan "5": r"([0-9]{4})\.([0-9]{2})\.([0-9]{2})$", # Date in format: 2017/01/02 // Month: jan "6": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})$", # Date in format: 2017.01.02 15:00:00 "7": r"([0-9]{4})\.([0-9]{2})\.([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: 20170102 15:00:00 // Month: jan "8": r"([0-9]{4})([0-9]{2})([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: 2017-01-02 15:00:00 // Month: jan "9": r"([0-9]{4})-([0-9]{2})-([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: 02.01.2017 15:00:00 // Month: jan "10": r"([0-9]{2})\.([0-9]{2})\.([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: 02-Jan-2017 15:00:00 UTC "11": r"([0-9]{2})-([A-Z]{1}[a-z]{2})-([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]{1}.*", # pylint: disable=line-too-long # Date in format: 2017/01/02 01:00:00 (+0900) // Month: jan "12": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s$.*$", # Date in format: 2017/01/02 01:00:00 // Month: jan "13": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}$", # Date in format: Mon Jan 02 15:00:00 GMT 2017 "14": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]{3}\s([0-9]{4})", # pylint: disable=line-too-long # Date in format: Mon Jan 02 2017 "15": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{2})\s([0-9]{4})", # Date in format: 2017-01-02T15:00:00 // Month: jan "16": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}$", # Date in format: 2017-01-02T15:00:00Z // Month: jan${'7} "17": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[A-Z].*", # Date in format: 2017-01-02T15:00:00+0200 // Month: jan "18": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{4}", # Date in format: 2017-01-02T15:00:00+0200.622265+03:00 // # Month: jan "19": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9].*[+-][0-9]{2}:[0-9]{2}", # pylint: disable=line-too-long # Date in format: 2017-01-02T15:00:00+0200.622265 // Month: jan "20": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}$", # Date in format: 2017-01-02T23:59:59.0Z // Month: jan "21": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9].*[A-Z]", # Date in format: 02-01-2017 // Month: jan "22": r"([0-9]{2})-([0-9]{2})-([0-9]{4})", # Date in format: 2017. 01. 02. // Month: jan "23": r"([0-9]{4})\.\s([0-9]{2})\.\s([0-9]{2})\.", # Date in format: 2017-01-02T00:00:00+13:00 // Month: jan "24": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}", # pylint: disable=line-too-long # Date in format: 20170102 // Month: jan "25": r"(?=[0-9]{8})(?=([0-9]{4})([0-9]{2})([0-9]{2}))", # Date in format: 02-Jan-2017 "26": r"([0-9]{2})-([A-Z]{1}[a-z]{2})-([0-9]{4})$", # Date in format: 02.1.2017 // Month: jan "27": r"([0-9]{2})\.([0-9]{1})\.([0-9]{4})", # Date in format: 02 Jan 2017 "28": r"([0-9]{1,2})\s([A-Z]{1}[a-z]{2})\s([0-9]{4})", # Date in format: 02-January-2017 "29": r"([0-9]{2})-([A-Z]{1}[a-z]*)-([0-9]{4})", # Date in format: 2017-Jan-02. "30": r"([0-9]{4})-([A-Z]{1}[a-z]{2})-([0-9]{2})\.", # Date in format: Mon Jan 02 15:00:00 2017 "31": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{1,2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s([0-9]{4})", # pylint: disable=line-too-long # Date in format: Mon Jan 2017 15:00:00 "32": r"()[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: January 02 2017-Jan-02 "33": r"([A-Z]{1}[a-z]*)\s([0-9]{1,2})\s([0-9]{4})", # Date in format: 2.1.2017 // Month: jan "34": r"([0-9]{1,2})\.([0-9]{1,2})\.([0-9]{4})", # Date in format: 20170102000000 // Month: jan "35": r"([0-9]{4})([0-9]{2})([0-9]{2})[0-9]+", # Date in format: 01/02/2017 // Month: jan "36": r"(0[1-9]|1[012])\/([0-3][0-9])\/([0-9]{4})", # Date in format: January 2 2017 "37": r"([A-Z]{1}[a-z].*)\s\s([0-9]{1,2})\s([0-9]{4})", # Date in format: 2nd January 2017 "38": r"([0-9]{1,})[a-z]{1,}\s([A-Z].*)\s(2[0-9]{3})", } for regx in regex_dates: # We loop through our map. # We try to get the matched groups if the date to convert match the currently # read regex. matched_result = Regex(date_to_convert, regex_dates[regx], return_data=True, rematch=True).match() if matched_result: # The matched result is not None or an empty list. # We get the date. date = self._cases_management(regx, matched_result) if date: # The date is given. # We return the formatted date. return "-".join(date) # We return an empty string as we were not eable to match the date format. return "" def _extract(self): # pragma: no cover """ Extract the expiration date from the whois record. :return: The status of the domain. :rtype: str """ # We try to get the expiration date from the database. expiration_date_from_database = Whois().get_expiration_date() if expiration_date_from_database: # The hash of the current whois record did not changed and the # expiration date from the database is not empty not equal to # None or False. # We generate the files and print the status. # It's an active element! Generate( PyFunceble.STATUS["official"]["up"], "WHOIS", expiration_date_from_database, ).status_file() # We handle und return the official up status. return PyFunceble.STATUS["official"]["up"] # We get the whois record. self.whois_record = Lookup().whois(PyFunceble.INTERN["referer"]) # We list the list of regex which will help us get an unformatted expiration date. to_match = [ r"expire:(.*)", r"expire on:(.*)", r"Expiry Date:(.*)", r"free-date(.*)", r"expires:(.*)", r"Expiration date:(.*)", r"Expiry date:(.*)", r"Expire Date:(.*)", r"renewal date:(.*)", r"Expires:(.*)", r"validity:(.*)", r"Expiration Date :(.*)", r"Expiry :(.*)", r"expires at:(.*)", r"domain_datebilleduntil:(.*)", r"Data de expiração \/ Expiration Date $dd\/mm\/yyyy$:(.*)", r"Fecha de expiración $Expiration date$:(.*)", r"\[Expires on\](.*)", r"Record expires on(.*)($YYYY-MM-DD$)", r"status: OK-UNTIL(.*)", r"renewal:(.*)", r"expires............:(.*)", r"expire-date:(.*)", r"Exp date:(.*)", r"Valid-date(.*)", r"Expires On:(.*)", r"Fecha de vencimiento:(.*)", r"Expiration:.........(.*)", r"Fecha de Vencimiento:(.*)", r"Registry Expiry Date:(.*)", r"Expires on..............:(.*)", r"Expiration Time:(.*)", r"Expiration Date:(.*)", r"Expired:(.*)", r"Date d'expiration:(.*)", r"expiration date:(.*)", ] if self.whois_record: # The whois record is not empty. if "current_test_data" in PyFunceble.INTERN: # The end-user want more information whith his test. # We update the whois_record index. PyFunceble.INTERN["current_test_data"][ "whois_record"] = self.whois_record for string in to_match: # We loop through the list of regex. # We try tro extract the expiration date from the WHOIS record. expiration_date = Regex(self.whois_record, string, return_data=True, rematch=True, group=0).match() if expiration_date: # The expiration date could be extracted. # We get the extracted expiration date. self.expiration_date = expiration_date[0].strip() # We initate a regex which will help us know if a number # is present into the extracted expiration date. regex_rumbers = r"[0-9]" if Regex(self.expiration_date, regex_rumbers, return_data=False).match(): # The extracted expiration date has a number. # We format the extracted expiration date. self.expiration_date = self._format() if (self.expiration_date and not Regex( self.expiration_date, r"[0-9]{2}\-[a-z]{3}\-2[0-9]{3}", return_data=False, ).match()): # The formatted expiration date does not match our unified format. # We log the problem. Logs().expiration_date(self.expiration_date) # We log the whois record. Logs().whois(self.whois_record) if "current_test_data" in PyFunceble.INTERN: # The end-user want more information whith his test. # We update the expiration_date index. PyFunceble.INTERN["current_test_data"][ "expiration_date"] = self.expiration_date # We generate the files and print the status. # It's an active element! Generate( PyFunceble.STATUS["official"]["up"], "WHOIS", self.expiration_date, ).status_file() # We log the whois record. Logs().whois(self.whois_record) # We save the whois record into the database. Whois(expiration_date=self.expiration_date).add() # We handle und return the official up status. return PyFunceble.STATUS["official"]["up"] # The extracted expiration date does not have a number. # We log the whois record. Logs().whois(self.whois_record) # We return None, we could not get the expiration date. return None # The whois record is empty. # We return None, we could not get the whois record. return None

Exemplo n.º 2

0

Exibir arquivo

Arquivo: adblock.py Projeto: mrijinm/PyFunceble

class AdBlock: # pylint: disable=too-few-public-methods """ Provide the adblock decoding logic. :param list_from_file: The file in list format. :type list_from_file: list """ def __init__(self, list_from_file, aggressive=False): self.to_format = self._remove_ignored(list_from_file) self.aggressive = aggressive # We set the options separator. self.options_separator = "$" # We set the separator of options self.option_separator = "," # We create an instance of the checker. self.checker = Check() def _remove_ignored(self, list_from_file): """ Removed the ignored element from the given list. :param list_from_file: The list which represent the file we are decoding. :type list_from_list: list :return: The filtered list. :rtype: list """ return [x for x in list_from_file if not self._is_to_ignore(x)] @classmethod def _is_to_ignore(cls, line): """ Check if we have to ignore the given line. :param line: The line from the file. :type line: str """ # We set the list of regex to match to be # considered as ignored. to_ignore = [r"(^!|^@@|^\/|^\[|^\.|^-|^_|^\?|^&)" ] # , r"(\$|,)(image)"] for element in to_ignore: # We loop through the list of regex. if Regex(line, element, return_data=False).match(): # The currently read line match the currently read # regex. # We return true, it has to be ignored. return True # Wer return False, it does not has to be ignored. return False def _handle_options(self, options): """ Handle the data from the options. :param options: The list of options from the rule. :type options: list :return: The list of domains to return globally. :rtype: list """ # We initiate a variable which will save our result result = [] # We initiate the regex which will be used to extract the domain listed # under the option domain= regex_domain_option = r"domain=(.*)" for option in options: # We loop through the list of option. try: # We try to extract the list of domains from the currently read # option. domains = Regex(option, regex_domain_option, return_data=True, rematch=True, group=0).match()[-1] if domains: # We could extract something. if self.aggressive: # pragma: no cover result.extend([ x for x in domains.split("|") if x and not x.startswith("~") ]) else: # We return True. return True except TypeError: pass # We return the result. return result def _extract_base(self, element): """ Extract the base of the given element. .. example: given "hello/world?world=beautiful" return "hello" :param element: The element we are working with. :type element: str|list """ if isinstance(element, list): # The given element is a list. # We get the base of each element of the list. return [self._extract_base(x) for x in element] # We get the base if it is an URL. base = self.checker.is_url_valid(url=element, return_base=True) if base: # It is an URL. # We return the extracted base. return base if "/" in element: # / is in the given element. # We return the first element before the # first / return element.split("/")[0] # / is not in the given element. # We return the given element. return element def decode(self): """ Decode/extract the domains to test from the adblock formated file. :return: The list of domains to test. :rtype: list """ # We initiate a variable which will save what we are going to return. result = [] # We initiate the first regex we are going to use to get # the element to format. regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$" # We initiate the third regex we are going to use to get # the element to format. regex_v3 = ( r"(?:#+(?:[a-z]+?)?\[[a-z]+(?:\^|\*)\=(?:\'|\"))(.*\..*)(?:(?:\'|\")\])" ) # We initiate the fourth regex we are going to use to get # the element to format. regex_v4 = r"^\|(.*\..*)\|$" for line in self.to_format: # We loop through the different line. rematch = rematch_v3 = rematch_v4 = None # We extract the different group from our first regex. rematch = Regex(line, regex, return_data=True, rematch=True, group=0).match() # We extract the different group from our fourth regex. # # Note: We execute the following in second because it is more # specific that others. rematch_v4 = Regex(line, regex_v4, return_data=True, rematch=True, group=0).match() # We extract the different group from our third regex. rematch_v3 = Regex(line, regex_v3, return_data=True, rematch=True, group=0).match() if rematch: # The first extraction was successfull. if self.options_separator in line: options = line.split(self.options_separator)[-1].split( self.option_separator) if (not options[-1] or "third-party" in options or "script" in options or "popup" in options or "xmlhttprequest" in options): # We extend the result with the extracted elements. result.extend(self._extract_base(rematch)) extra = self._handle_options(options) if extra and isinstance(extra, list): # pragma: no cover extra.extend(self._extract_base(rematch)) result.extend(self._extract_base(extra)) elif extra: result.extend(self._extract_base(rematch)) else: # We extend the result with the extracted elements. result.extend(self._extract_base(rematch)) if rematch_v4: # The fourth extraction was successfull. # We extend the formatted elements from the extracted elements. result.extend(List(self._format_decoded(rematch_v4)).format()) if rematch_v3: # The second extraction was successfull. # We extend the formatted elements from the extracted elements. result.extend(List(self._format_decoded(rematch_v3)).format()) # We return the result. return List(result).format() def _format_decoded(self, to_format, result=None): # pragma: no cover """ Format the exctracted adblock line before passing it to the system. :param to_format: The extracted line from the file. :type to_format: str :param result: A list of the result of this method. :type result: list :return: The list of domains or IP to test. :rtype: list """ if not result: # The result is not given. # We set the result as an empty list. result = [] for data in List(to_format).format(): # We loop through the different lines to format. if data: # The currently read line is not empty. if "^" in data: # There is an accent in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split("^"), result) if "#" in data: # There is a dash in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split("#"), result) if "," in data: # There is a comma in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split(","), result) if "!" in data: # There is an exclamation mark in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split("!"), result) if "|" in data: # There is a vertival bar in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split("|"), result) if data: # The currently read line is not empty. data = self._extract_base(data) if data and (self.checker.is_domain_valid(data) or self.checker.is_ip_valid(data)): # The extraced base is not empty. # and # * The currently read line is a valid domain. # or # * The currently read line is a valid IP. # We append the currently read line to the result. result.append(data) elif data: # * The currently read line is not a valid domain. # or # * The currently read line is not a valid IP. # We try to get the url base. url_base = self.checker.is_url_valid(data, return_base=True) if url_base: # The url_base is not empty or equal to False or None. # We append the url base to the result. result.append(url_base) # We return the result element. return result