class TestCheckURL(TestCase): @given(urls()) @example("http://example.com/some/path?x=2") @example("http://example.com/some/path") @example("http://example.com/") @example("http://example.com") def test_full_url_is_ok(self, url): self.assertEqual(config._check_url(url), url) @given(domains()) @example(None) @example("example.com") @example("://example.com") def test_raises_for_missing_or_wrong_scheme(self, url): self.assertRaises(ValueError, config._check_url, url) @given(_urls_with_out_of_bounds_port()) def test_raises_for_out_of_bounds_port_number(self, url): self.assertRaises(ValueError, config._check_url, url) @given(domain=domains(), port=_everything_except(int)) def test_raises_for_bogus_port_number(self, domain, port): assume(str(port) not in ("[]", "")) # urllib bug: https://bugs.python.org/issue36338 url = "http://%s:%s" % (domain, port) self.assertRaises(ValueError, config._check_url, url)
def repo_url_strategy(): def url_encode(s): return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c) for c in s) paths = st.lists(st.text(string.printable).map(url_encode)).map("/".join) ports = st.integers(min_value=0, max_value=65535).map(":{}".format) fragments = (st.text( alphabet="abcdefghijklmnopqrstuvwxyz0123456789_-.").map( url_encode).map( "#egg={}".format).map(lambda x: "" if x == "#egg=" else x)) refs = (st.text( alphabet="abcdefghijklmnopqrstuvwxyz0123456789_-").map(url_encode).map( "@{}".format).map(lambda x: "" if x == "@" else x)) scheme = (st.sampled_from(vcs_schemes).filter(lambda x: "+" in x).map( "{}://".format).map(lambda x: x.replace("file://", "file:///"))) auth = (auth_strings().map( "{}@".format).map(lambda x: "" if x == "@" else x).map( lambda x: x.replace(":@", "@") if x.endswith(":@") else x)) domain = domains().map(lambda x: x.lower()) return st.builds( "{}{}{}{}{}{}{}".format, scheme, auth, domain, st.just("|") | ports, paths, refs, fragments, ).map(lambda x: x.replace("|", ":") if "git+git@" in x else x.replace("|", "/"))
def urls(): """ Strategy for generating urls. """ def url_encode(s): return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c) for c in s) return st.builds( URI, scheme=st.sampled_from(uri_schemes), host=domains(), port=st.integers(min_value=1, max_value=65535), path=st.lists(st.text(string.printable).map(url_encode)).map("/".join), query=st.lists( st.text( max_size=10, alphabet=st.characters(blacklist_characters="/?#", blacklist_categories=("Cs", )), ), min_size=2, max_size=2, ).map("=".join).map(vistir.misc.to_text), ref=st.text(max_size=64, alphabet="abcdefghijklmnopqrstuvwxyz0123456789"), subdirectory=st.text(max_size=64, alphabet="abcdefghijklmnopqrstuvwxyz0123456789"), extras=st.lists( st.text(max_size=20, alphabet="abcdefghijklmnopqrstuvwxyz0123456789_-."), min_size=0, max_size=10, ), )
def vcs_requirements(): def url_encode(s): return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c) for c in s) return st.builds( parsed_url, scheme=st.sampled_from(vcs_schemes), netloc=domains(), path=st.lists(st.text(string.printable).map(url_encode)).map("/".join), fragment=valid_names(), )
def string_schema(schema: dict) -> st.SearchStrategy[str]: """Handle schemata for strings.""" # also https://json-schema.org/latest/json-schema-validation.html#rfc.section.7 min_size = schema.get("minLength", 0) max_size = schema.get("maxLength", float("inf")) strategy = st.text(min_size=min_size, max_size=schema.get("maxLength")) if "format" in schema: url_synonyms = [ "uri", "uri-reference", "iri", "iri-reference", "uri-template" ] domains = prov.domains() # type: ignore formats = { # A value of None indicates a known but unsupported format. **{name: rfc3339(name) for name in RFC3339_FORMATS}, "date": rfc3339("full-date"), "time": rfc3339("full-time"), # Hypothesis' provisional strategies are not type-annotated. # We should get a principled plan for them at some point I guess... "email": st.emails(), # type: ignore "idn-email": st.emails(), # type: ignore "hostname": domains, "idn-hostname": domains, "ipv4": prov.ip4_addr_strings(), # type: ignore "ipv6": prov.ip6_addr_strings(), # type: ignore **{ name: domains.map("https://{}".format) for name in url_synonyms }, "json-pointer": st.just(""), "relative-json-pointer": st.just(""), "regex": REGEX_PATTERNS, } if schema["format"] not in formats: raise InvalidArgument( f"Unsupported string format={schema['format']}") strategy = formats[schema["format"]] if "pattern" in schema: # pragma: no cover # This isn't really supported, but we'll do our best. strategy = strategy.filter( lambda s: re.search(schema["pattern"], string=s) is not None) elif "pattern" in schema: try: re.compile(schema["pattern"]) strategy = st.from_regex(schema["pattern"]) except re.error: # Patterns that are invalid in Python, or just malformed return st.nothing() # TODO: mypy should be able to tell that the lambda is returning a bool # without the explicit cast, but can't as of v 0.720 - report upstream. return strategy.filter(lambda s: bool(min_size <= len(s) <= max_size))
def urls (): """ Build http/https URL """ scheme = st.sampled_from (['http', 'https']) # Path must start with a slash pathSt = st.builds (lambda x: '/' + x, st.text ()) args = st.fixed_dictionaries ({ 'scheme': scheme, 'host': domains (), 'port': st.one_of (st.none (), st.integers (min_value=1, max_value=2**16-1)), 'path': pathSt, 'query_string': st.text (), 'fragment': st.text (), }) return st.builds (lambda x: URL.build (**x), args)
def string_schema(schema: dict) -> st.SearchStrategy[str]: """Handle schemata for strings.""" # also https://json-schema.org/latest/json-schema-validation.html#rfc.section.7 min_size = schema.get("minLength", 0) max_size = schema.get("maxLength", float("inf")) strategy: str = st.text(min_size=min_size, max_size=schema.get("maxLength")) if "format" in schema: url_synonyms = [ "uri", "uri-reference", "iri", "iri-reference", "uri-template" ] domains = prov.domains() strategy = { # A value of None indicates a known but unsupported format. **{name: rfc3339(name) for name in RFC3339_FORMATS}, "date": rfc3339("full-date"), "time": rfc3339("full-time"), "email": st.emails(), "idn-email": st.emails(), "hostname": domains, "idn-hostname": domains, "ipv4": prov.ip4_addr_strings(), "ipv6": prov.ip6_addr_strings(), **{ name: domains.map("https://{}".format) for name in url_synonyms }, "json-pointer": st.just(""), "relative-json-pointer": st.just(""), "regex": REGEX_PATTERNS, }.get(schema["format"]) if strategy is None: raise InvalidArgument( f"Unsupported string format={schema['format']}") if "pattern" in schema: # pragma: no cover # This isn't really supported, but we'll do our best. strategy = strategy.filter( lambda s: re.search(schema["pattern"], string=s) is not None) elif "pattern" in schema: try: re.compile(schema["pattern"]) strategy = st.from_regex(schema["pattern"]) except re.error: # Patterns that are invalid in Python, or just malformed strategy = st.nothing() return strategy.filter(lambda s: min_size <= len(s) <= max_size)
def string_schema(schema: dict) -> st.SearchStrategy[str]: """Handle schemata for strings.""" # also https://json-schema.org/latest/json-schema-validation.html#rfc.section.7 min_size = schema.get("minLength", 0) max_size = schema.get("maxLength", float("inf")) strategy: Any = st.text(min_size=min_size, max_size=schema.get("maxLength")) assert not ( "format" in schema and "pattern" in schema ), "format and regex constraints are supported, but not both at once." if "pattern" in schema: strategy = st.from_regex(schema["pattern"]) elif "format" in schema: url_synonyms = [ "uri", "uri-reference", "iri", "iri-reference", "uri-template" ] domains = prov.domains() # type: ignore strategy = { # A value of None indicates a known but unsupported format. **{name: rfc3339(name) for name in RFC3339_FORMATS}, "date": rfc3339("full-date"), "time": rfc3339("full-time"), "email": st.emails(), # type: ignore "idn-email": st.emails(), # type: ignore "hostname": domains, "idn-hostname": domains, "ipv4": prov.ip4_addr_strings(), # type: ignore "ipv6": prov.ip6_addr_strings(), # type: ignore **{ name: domains.map("https://{}".format) for name in url_synonyms }, "json-pointer": st.just(""), "relative-json-pointer": st.just(""), "regex": REGEX_PATTERNS, }.get(schema["format"]) if strategy is None: raise InvalidArgument( f"Unsupported string format={schema['format']}") return strategy.filter( lambda s: min_size <= len(s) <= max_size) # type: ignore
def url(): """ Build http/https URL """ scheme = st.sampled_from(["http", "https"]) # Path must start with a slash pathSt = st.builds(lambda x: "/" + x, st.text()) args = st.fixed_dictionaries({ "scheme": scheme, "host": domains(), "port": st.one_of(st.none(), st.integers(min_value=10, max_value=2**16 - 1)), "path": pathSt, "query_string": st.text(), "fragment": st.text(), }) return st.builds(lambda x: URL.build(**x), args)
def auth_url_strategy(): # taken from the hypothesis provisional url generation strategy def url_encode(s): return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c) for c in s) schemes = [ "{0}://".format(scheme) for scheme in uri_schemes if scheme != "file" ] schemes.append("file:///") return st.builds( AuthUrl, scheme=st.sampled_from(schemes), auth=auth_strings().filter(lambda x: x != ":").map( lambda x: "" if not x else "{0}@".format(x)), domain=domains().filter(lambda x: x != "").map(lambda x: x.lower()), port=st.integers(min_value=0, max_value=65535), path=st.lists( st.text(string.printable).map(url_encode).filter( lambda x: x not in ["", ".", ".."])).map("/".join), )
@pytest.mark.parametrize("max_length", [-1, 0, 3, 4.0, 256]) @pytest.mark.parametrize("max_element_length", [-1, 0, 4.0, 64, 128]) def test_invalid_domain_arguments(max_length, max_element_length): with pytest.raises(InvalidArgument): domains(max_length=max_length, max_element_length=max_element_length).example() @pytest.mark.parametrize("max_length", [None, 4, 8, 255]) @pytest.mark.parametrize("max_element_length", [None, 1, 2, 4, 8, 63]) def test_valid_domains_arguments(max_length, max_element_length): domains(max_length=max_length, max_element_length=max_element_length).example() @pytest.mark.parametrize("strategy", [domains(), urls()]) def test_find_any_non_empty(strategy): find_any(strategy, lambda s: len(s) > 0) @given(_url_fragments_strategy) # There's a lambda in the implementation that only gets run if we generate at # least one percent-escape sequence, so we derandomize to ensure that coverage # isn't flaky. @settings(derandomize=True) def test_url_fragments_contain_legal_chars(fragment): assert fragment.startswith("#") # Strip all legal escape sequences. Any remaining % characters were not # part of a legal escape sequence. without_escapes = re.sub(r"(?ai)%[0-9a-f][0-9a-f]", "", fragment[1:])
@given(ip6_addr_strings()) def test_is_IP6_addr(address): # Works for non-normalised addresses produced by this strategy, but not # a particularly general test assert address == address.upper() as_hex = address.split(":") assert len(as_hex) == 8 assert all(len(part) == 4 for part in as_hex) raw = unhexlify(address.replace(u":", u"").encode("ascii")) assert len(raw) == 16 @pytest.mark.parametrize("max_length", [-1, 0, 3, 4.0, 256]) @pytest.mark.parametrize("max_element_length", [-1, 0, 4.0, 64, 128]) def test_invalid_domain_arguments(max_length, max_element_length): with pytest.raises(InvalidArgument): domains(max_length=max_length, max_element_length=max_element_length).example() @pytest.mark.parametrize("max_length", [None, 4, 8, 255]) @pytest.mark.parametrize("max_element_length", [None, 1, 2, 4, 8, 63]) def test_valid_domains_arguments(max_length, max_element_length): domains(max_length=max_length, max_element_length=max_element_length).example() @pytest.mark.parametrize( "strategy", [domains(), ip4_addr_strings(), ip6_addr_strings(), urls()] ) def test_find_any_non_empty(strategy): find_any(strategy, lambda s: len(s) > 0)
url = url.lower() iocs = find_iocs(url) failure = False try: assert len(iocs['urls']) == 1 assert iocs['urls'][0] == url except AssertionError as e: failure = True print('Failed on url: {}'.format(url)) if failure: raise AssertionError('Error parsing urls') @given(domains()) @settings(deadline=None) def test_domain_parsing(domain): domain = domain.lower() iocs = find_iocs(domain) failure = False try: assert len(iocs['domains']) == 1 assert iocs['domains'][0] == domain except AssertionError as e: failure = True print('Failed on domain: {}'.format(domain)) if failure: raise AssertionError('Error parsing domains')
def collector_fqdns(draw): return draw(domains()) + draw( sampled_from([".routeviews.org", ".ripe.net"]))
re.compile(result) except re.error: assume(False) return result REGEX_PATTERNS = regex_patterns() STRING_FORMATS = { **{name: rfc3339(name) for name in RFC3339_FORMATS}, "date": rfc3339("full-date"), "time": rfc3339("full-time"), "email": st.emails(), "idn-email": st.emails(), "hostname": prov.domains(), "idn-hostname": prov.domains(), "ipv4": st.ip_addresses(v=4).map(str), "ipv6": st.ip_addresses(v=6).map(str), **{ name: prov.domains().map("https://{}".format) for name in [ "uri", "uri-reference", "iri", "iri-reference", "uri-template" ] }, "json-pointer": st.just(""), "relative-json-pointer": st.just(""), "regex": REGEX_PATTERNS, }
def _urls_with_bogus_port(draw): domain = draw(domains()) port = draw(_everything_except(int)) return "http://%s:%s" % (domain, port)
def test_invalid_domain_arguments(max_length, max_element_length): with pytest.raises(InvalidArgument): domains(max_length=max_length, max_element_length=max_element_length).example()
REGEX_PATTERNS.map("{}+".format), REGEX_PATTERNS.map("{}?".format), REGEX_PATTERNS.map("{}*".format), ) result = draw(st.lists(fragments, min_size=1, max_size=3).map("".join)) try: re.compile(result) except re.error: assume(False) # The @composite decorator *is* well-typed, but expressing this is painful :/ return result # type: ignore REGEX_PATTERNS = regex_patterns() _domains = prov.domains() # type: ignore STRING_FORMATS = { # A value of None indicates a known but unsupported format. **{name: rfc3339(name) for name in RFC3339_FORMATS}, "date": rfc3339("full-date"), "time": rfc3339("full-time"), # Hypothesis' provisional strategies are not type-annotated. # We should get a principled plan for them at some point I guess... "email": st.emails(), # type: ignore "idn-email": st.emails(), # type: ignore "hostname": _domains, "idn-hostname": _domains, "ipv4": prov.ip4_addr_strings(), # type: ignore "ipv6": prov.ip6_addr_strings(), # type: ignore **{
def test_valid_domains_arguments(max_length, max_element_length): domains(max_length=max_length, max_element_length=max_element_length).example()
def _urls_with_out_of_bounds_port(draw): domain = draw(domains()) port = draw(PORTS_INVALID) return "http://%s:%d" % (domain, port)