def test_uri_part_tokenizer(): text = 'http://a.b/foo/bar?c=d#stuff' pattern = ANALYSIS_SETTINGS['tokenizer']['uri_part']['pattern'] assert(re.split(pattern, text) == [ 'http', '', '', 'a', 'b', 'foo', 'bar', 'c', 'd', 'stuff' ]) text = url_quote_plus(text) assert(re.split(pattern, 'http://jump.to/?u=' + text) == [ 'http', '', '', 'jump', 'to', '', 'u', 'http', '', '', 'a', 'b', 'foo', 'bar', 'c', 'd', 'stuff' ])
def test_uri_part_tokenizer(): text = "http://a.b/foo/bar?c=d#stuff" pattern = ANALYSIS_SETTINGS["tokenizer"]["uri_part"]["pattern"] assert re.split(pattern, text) == [ "http", "", "", "a", "b", "foo", "bar", "c", "d", "stuff", ] text = url_quote_plus(text) assert re.split(pattern, "http://jump.to/?u=" + text) == [ "http", "", "", "jump", "to", "", "u", "http", "", "", "a", "b", "foo", "bar", "c", "d", "stuff", ]
def _normalize_queryvalue(value): return url_quote_plus(url_unquote_plus(value), safe=UNRESERVED_QUERY_VALUE)
def _normalize_queryname(name): return url_quote_plus(url_unquote_plus(name), safe=UNRESERVED_QUERY_NAME)