def parse(self, data_string): if len(data_string) < 4: return dot_count = data_string.count(".") if dot_count >= 2 or data_string.count(":") >= 2: if self.is_ipv4_address(data_string): # lowering the confidence because "Technically" # an ipv4 address could be a phone number self.confidence -= 5 # if there's whitespace in the ip address, lower confidence if strings_intersect(string.whitespace, data_string): self.confidence -= 50 yield self.result("IP Address (v4)") elif self.is_ipv6_address(data_string): yield self.result("IP Address (v6)") letters = [c for c in data_string if c in string.ascii_letters] if dot_count > 0 and len(letters) >= 4: if self.is_valid_url(data_string): yield self.result("URL") elif '://' not in data_string: if self.is_valid_url('http://' + data_string): # confidence hit since we had to modify the data self.confidence -= 25 yield self.result("URL", data='http://'+data_string)
def parse(self, data_string): """ parses for uris :param data_string: the string we want to parse :type data_string: str :return: yields parse result(s) if there are any :rtype: ParseResult """ if len(data_string) < 4: return dot_count = data_string.count(".") if dot_count >= 2 or data_string.count(":") >= 2: if self.is_ipv4_address(data_string): # lowering the confidence because "Technically" # an ipv4 address could be a phone number self.confidence -= 5 # if there's whitespace in the ip address, lower confidence if strings_intersect(string.whitespace, data_string): self.confidence -= 50 yield self.result("IP Address (v4)") elif self.is_ipv6_address(data_string): yield self.result("IP Address (v6)") letters = [c for c in data_string if c in string.ascii_letters] if dot_count > 0 and len(letters) >= 4: if self.is_valid_url(data_string): yield self.result("URL") elif '://' not in data_string: if self.is_valid_url('http://' + data_string): # confidence hit since we had to modify the data self.confidence -= 25 yield self.result("URL", data='http://' + data_string)
def test_strings_intersect(self): self.assertFalse(strings_intersect("abc", "def")) self.assertTrue(strings_intersect("abc", "cde"))