Пример #1
0
    def parse(self, data_string):
        if len(data_string) < 4:
            return

        dot_count = data_string.count(".")

        if dot_count >= 2 or data_string.count(":") >= 2:
            if self.is_ipv4_address(data_string):
                # lowering the confidence because "Technically"
                # an ipv4 address could be a phone number
                self.confidence -= 5
                # if there's whitespace in the ip address, lower confidence
                if strings_intersect(string.whitespace, data_string):
                    self.confidence -= 50
                yield self.result("IP Address (v4)")

            elif self.is_ipv6_address(data_string):
                yield self.result("IP Address (v6)")

        letters = [c for c in data_string if c in string.ascii_letters]

        if dot_count > 0 and len(letters) >= 4:
            if self.is_valid_url(data_string):
                yield self.result("URL")

            elif '://' not in data_string:
                if self.is_valid_url('http://' + data_string):
                    # confidence hit since we had to modify the data
                    self.confidence -= 25
                    yield self.result("URL", data='http://'+data_string)
Пример #2
0
    def parse(self, data_string):
        """
        parses for uris

        :param data_string: the string we want to parse
        :type data_string: str
        :return: yields parse result(s) if there are any
        :rtype: ParseResult
        """
        if len(data_string) < 4:
            return

        dot_count = data_string.count(".")

        if dot_count >= 2 or data_string.count(":") >= 2:
            if self.is_ipv4_address(data_string):
                # lowering the confidence because "Technically"
                # an ipv4 address could be a phone number
                self.confidence -= 5
                # if there's whitespace in the ip address, lower confidence
                if strings_intersect(string.whitespace, data_string):
                    self.confidence -= 50
                yield self.result("IP Address (v4)")

            elif self.is_ipv6_address(data_string):
                yield self.result("IP Address (v6)")

        letters = [c for c in data_string if c in string.ascii_letters]

        if dot_count > 0 and len(letters) >= 4:
            if self.is_valid_url(data_string):
                yield self.result("URL")

            elif '://' not in data_string:
                if self.is_valid_url('http://' + data_string):
                    # confidence hit since we had to modify the data
                    self.confidence -= 25
                    yield self.result("URL", data='http://' + data_string)
Пример #3
0
    def test_strings_intersect(self):

        self.assertFalse(strings_intersect("abc", "def"))

        self.assertTrue(strings_intersect("abc", "cde"))