示例#1
0
文件: parser.py 项目: motok/intelmq
    def init(self):
        if url_normalize is None:
            raise MissingDependencyError("url-normalize")
        url_version = pkg_resources.get_distribution("url-normalize").version
        if tuple(int(v) for v in url_version.split('.')) < (
                1, 4, 1) and self.default_scheme is not None:
            raise ValueError(
                "Parameter 'default_scheme' given but 'url-normalize' version %r does not support it. "
                "Get at least version '1.4.1'." % url_version)
        if get_tld is None:
            raise MissingDependencyError("tld")
        try:
            update_tld_names()
        except tld.exceptions.TldIOError:
            self.logger.info("Could not update TLD names cache.")
        if self.domain_whitelist != '':
            self._domain_whitelist.extend(self.domain_whitelist.split(','))
        if self.substitutions != '':
            temp = self.substitutions.split(';')
            if len(temp) % 2 != 0:
                raise InvalidArgument(
                    'substitutions',
                    got=self.substitutions,
                    expected="even number of ; separated strings")
            for i in range(int(len(temp) / 2)):
                self._substitutions.append([temp[2 * i], temp[2 * i + 1]])
        if not ClassificationType.is_valid(self.classification_type):
            self.classification_type = 'unknown'

        if self.default_scheme is not None:
            self.url_kwargs = {'default_scheme': self.default_scheme}
        else:
            self.url_kwargs = {}
示例#2
0
    def init(self):
        if url_normalize is None:
            raise ValueError("Could not import 'url-normalize'. Please install it.")
        url_version = pkg_resources.get_distribution("url-normalize").version
        if tuple(int(v) for v in url_version.split('.')) < (1, 4, 1) and hasattr(self.parameters, 'default_scheme'):
            raise ValueError("Parameter 'default_scheme' given but 'url-normalize' version %r does not support it. "
                             "Get at least version '1.4.1'." % url_version)
        if get_tld is None:
            raise ValueError("Could not import 'tld'. Please install it.")
        try:
            update_tld_names()
        except tld.exceptions.TldIOError:
            self.logger.info("Could not update TLD names cache.")
        self.domain_whitelist = []
        if getattr(self.parameters, "domain_whitelist", '') != '':
            self.domain_whitelist.extend(self.parameters.domain_whitelist.split(','))
        self.substitutions = []
        if getattr(self.parameters, "substitutions", '') != '':
            temp = self.parameters.substitutions.split(';')
            if len(temp) % 2 != 0:
                raise InvalidArgument(
                    'substitutions',
                    got=self.parameters.substitutions,
                    expected="even number of ; separeted strings")
            for i in range(int(len(temp) / 2)):
                self.substitutions.append([temp[2 * i], temp[2 * i + 1]])
        self.classification_type = getattr(self.parameters, "classification_type", "unknown")
        if not ClassificationType.is_valid(self.classification_type):
            self.classification_type = 'unknown'

        if hasattr(self.parameters, 'default_scheme'):
            self.url_kwargs = {'default_scheme': self.parameters.default_scheme}
        else:
            self.url_kwargs = {}
示例#3
0
 def init(self):
     if url_normalize is None:
         raise ValueError("Could not import 'url-normalize'. Please install it.")
     if get_tld is None:
         raise ValueError("Could not import 'tld'. Please install it.")
     update_tld_names()
     self.domain_whitelist = []
     if getattr(self.parameters, "domain_whitelist", '') != '':
         self.domain_whitelist.extend(self.parameters.domain_whitelist.split(','))
     self.substitutions = []
     if getattr(self.parameters, "substitutions", '') != '':
         temp = self.parameters.substitutions.split(';')
         if len(temp) % 2 != 0:
             raise InvalidArgument(
                 'substitutions',
                 got=self.parameters.substitutions,
                 expected="even number of ; separeted strings")
         for i in range(int(len(temp) / 2)):
             self.substitutions.append([temp[2 * i], temp[2 * i + 1]])
     self.classification_type = getattr(self.parameters, "classification_type", "unknown")
     if not ClassificationType.is_valid(self.classification_type):
         self.classification_type = 'unknown'