def init(self): if url_normalize is None: raise MissingDependencyError("url-normalize") url_version = pkg_resources.get_distribution("url-normalize").version if tuple(int(v) for v in url_version.split('.')) < ( 1, 4, 1) and self.default_scheme is not None: raise ValueError( "Parameter 'default_scheme' given but 'url-normalize' version %r does not support it. " "Get at least version '1.4.1'." % url_version) if get_tld is None: raise MissingDependencyError("tld") try: update_tld_names() except tld.exceptions.TldIOError: self.logger.info("Could not update TLD names cache.") if self.domain_whitelist != '': self._domain_whitelist.extend(self.domain_whitelist.split(',')) if self.substitutions != '': temp = self.substitutions.split(';') if len(temp) % 2 != 0: raise InvalidArgument( 'substitutions', got=self.substitutions, expected="even number of ; separated strings") for i in range(int(len(temp) / 2)): self._substitutions.append([temp[2 * i], temp[2 * i + 1]]) if not ClassificationType.is_valid(self.classification_type): self.classification_type = 'unknown' if self.default_scheme is not None: self.url_kwargs = {'default_scheme': self.default_scheme} else: self.url_kwargs = {}
def init(self): if url_normalize is None: raise ValueError("Could not import 'url-normalize'. Please install it.") url_version = pkg_resources.get_distribution("url-normalize").version if tuple(int(v) for v in url_version.split('.')) < (1, 4, 1) and hasattr(self.parameters, 'default_scheme'): raise ValueError("Parameter 'default_scheme' given but 'url-normalize' version %r does not support it. " "Get at least version '1.4.1'." % url_version) if get_tld is None: raise ValueError("Could not import 'tld'. Please install it.") try: update_tld_names() except tld.exceptions.TldIOError: self.logger.info("Could not update TLD names cache.") self.domain_whitelist = [] if getattr(self.parameters, "domain_whitelist", '') != '': self.domain_whitelist.extend(self.parameters.domain_whitelist.split(',')) self.substitutions = [] if getattr(self.parameters, "substitutions", '') != '': temp = self.parameters.substitutions.split(';') if len(temp) % 2 != 0: raise InvalidArgument( 'substitutions', got=self.parameters.substitutions, expected="even number of ; separeted strings") for i in range(int(len(temp) / 2)): self.substitutions.append([temp[2 * i], temp[2 * i + 1]]) self.classification_type = getattr(self.parameters, "classification_type", "unknown") if not ClassificationType.is_valid(self.classification_type): self.classification_type = 'unknown' if hasattr(self.parameters, 'default_scheme'): self.url_kwargs = {'default_scheme': self.parameters.default_scheme} else: self.url_kwargs = {}
def init(self): if url_normalize is None: raise ValueError("Could not import 'url-normalize'. Please install it.") if get_tld is None: raise ValueError("Could not import 'tld'. Please install it.") update_tld_names() self.domain_whitelist = [] if getattr(self.parameters, "domain_whitelist", '') != '': self.domain_whitelist.extend(self.parameters.domain_whitelist.split(',')) self.substitutions = [] if getattr(self.parameters, "substitutions", '') != '': temp = self.parameters.substitutions.split(';') if len(temp) % 2 != 0: raise InvalidArgument( 'substitutions', got=self.parameters.substitutions, expected="even number of ; separeted strings") for i in range(int(len(temp) / 2)): self.substitutions.append([temp[2 * i], temp[2 * i + 1]]) self.classification_type = getattr(self.parameters, "classification_type", "unknown") if not ClassificationType.is_valid(self.classification_type): self.classification_type = 'unknown'