Exemplo n.º 1
0
    def test_finds_query(self):
        url = 'http://mysubdomain.example.com?myquery=test'
        result = url_parser.parse_url(url)
        self.assertEqual(result['query']['myquery'], 'test')

        url = 'http://mysubdomain.example.com?myquery=test&one=two&test'
        result = url_parser.parse_url(url)
        self.assertEqual(result['query']['myquery'], 'test')
        self.assertEqual(result['query']['one'], 'two')
        self.assertIsNone(result['query']['test'])

        url = 'http://mysubdomain.example.com/file.js?myquery=test&one=two'
        result = url_parser.parse_url(url)
        self.assertEqual(result['query']['myquery'], 'test')
        self.assertEqual(result['query']['one'], 'two')

        url = 'http://mysubdomain.example.com/path/and/file.js?myquery=test&one=two'
        result = url_parser.parse_url(url)
        self.assertEqual(result['query']['myquery'], 'test')
        self.assertEqual(result['query']['one'], 'two')

        url = 'http://mysubdomain.example.com/path/?myquery=test&one=two'
        result = url_parser.parse_url(url)
        self.assertEqual(result['query']['myquery'], 'test')
        self.assertEqual(result['query']['one'], 'two')
Exemplo n.º 2
0
    def test_finds_top_domain(self):
        url = 'http://mysubdomain.example.com'
        result = url_parser.parse_url(url)
        self.assertEqual(result['top_domain'], 'com')

        url = 'http://mysubdomain.example.co.uk'
        result = url_parser.parse_url(url)
        self.assertEqual(result['top_domain'], 'co.uk')
Exemplo n.º 3
0
    def test_finds_file(self):
        url = 'http://mysubdomain.example.com/cool.jpg'
        result = url_parser.parse_url(url)
        self.assertEqual(result['file'], 'cool.jpg')

        url = 'http://mysubdomain.example.com/directory/here/sample.mp4'
        result = url_parser.parse_url(url)
        self.assertEqual(result['file'], 'sample.mp4')
Exemplo n.º 4
0
    def test_finds_dir(self):
        url = 'http://mysubdomain.example.com/folder/'
        result = url_parser.parse_url(url)
        self.assertEqual(result['dir'], '/folder/')

        url = 'http://mysubdomain.example.com/multiple/folders/'
        result = url_parser.parse_url(url)
        self.assertEqual(result['dir'], '/multiple/folders/')

        url = 'http://mysubdomain.example.com/multiple/folders/with_a_file.js'
        result = url_parser.parse_url(url)
        self.assertEqual(result['dir'], '/multiple/folders/')
Exemplo n.º 5
0
    def test_finds_path(self):
        url = 'http://mysubdomain.example.com/path'
        result = url_parser.parse_url(url)
        self.assertEqual(result['path'], '/path')

        url = 'http://mysubdomain.example.com/this/is/the/path'
        result = url_parser.parse_url(url)
        self.assertEqual(result['path'], '/this/is/the/path')

        url = 'http://mysubdomain.example.com/path/with/file.js'
        result = url_parser.parse_url(url)
        self.assertEqual(result['path'], '/path/with/file.js')
Exemplo n.º 6
0
    def test_finds_protocol(self):
        url = 'http://mysubdomain.example.com'
        result = url_parser.parse_url(url)
        self.assertEqual(result['protocol'], 'http')

        url = 'https://mysubdomain.example.com'
        result = url_parser.parse_url(url)
        self.assertEqual(result['protocol'], 'https')

        url = 'ftp://mysubdomain.example.com'
        result = url_parser.parse_url(url)
        self.assertEqual(result['protocol'], 'ftp')
Exemplo n.º 7
0
 def test_domain_that_includes_a_top_domain_in_query(self):
     url = 'http://test.com.hello.nogo.no?my_query_domain=www.test.com'
     result = url_parser.parse_url(url)
     self.assertEqual(result['top_domain'], 'no')
     self.assertEqual(result['domain'], 'nogo')
     self.assertEqual(result['sub_domain'], 'test.com.hello')
     self.assertEqual(result['query']['my_query_domain'], 'www.test.com')
Exemplo n.º 8
0
 def test_domain_that_starts_with_same_letters_as_top_domain(self):
     url = 'http://domains-stars-with-same-top-domain.nogo.no/'
     result = url_parser.parse_url(url)
     self.assertEqual(result['top_domain'], 'no')
     self.assertEqual(result['domain'], 'nogo')
     self.assertEqual(result['sub_domain'],
                      'domains-stars-with-same-top-domain')
Exemplo n.º 9
0
 def test_catastrophic_backtracking(self):
     url = 'http://very_long-and-complixated_subdomaind-for-page.mywebpageishere.com/'
     result = url_parser.parse_url(url)
     self.assertEqual(result['top_domain'], 'com')
     self.assertEqual(result['domain'], 'mywebpageishere')
     self.assertEqual(result['sub_domain'],
                      'very_long-and-complixated_subdomaind-for-page')
Exemplo n.º 10
0
 def getVideoId(self, url):
     url_data = url_parser.parse_url(url)
     print('url_data:', url_data)
     try:
         return url_data['query']['v']
     except KeyError:
         raise excepCust.Invalid_Url()
Exemplo n.º 11
0
def getkey():  # Extract Key
    end = "https://ss.apple.com"
    key = ses.get(end).url
    pars = url_parser.parse_url(key)
    joa = str(pars['path'])
    extr = re.findall("appIdKey=(.*?)&", joa)[0]
    ses.cookies.clear()
    return extr
Exemplo n.º 12
0
    def test_finds_fragment(self):
        url = 'http://mysubdomain.example.com#my_fragment'
        result = url_parser.parse_url(url)
        self.assertEqual(result['fragment'], 'my_fragment')

        url = 'http://mysubdomain.example.com/path/#my_fragment'
        result = url_parser.parse_url(url)
        self.assertEqual(result['fragment'], 'my_fragment')

        url = 'http://mysubdomain.example.com/path/file.js#my_fragment'
        result = url_parser.parse_url(url)
        self.assertEqual(result['fragment'], 'my_fragment')

        url = 'http://mysubdomain.example.com#my_fragment?myargs=test'
        result = url_parser.parse_url(url)
        self.assertEqual(result['fragment'], 'my_fragment')

        url = 'http://mysubdomain.example.com/test/path.js#my_fragment?myargs=test'
        result = url_parser.parse_url(url)
        self.assertEqual(result['fragment'], 'my_fragment')
Exemplo n.º 13
0
 def test_removes_extra_dot_from_www(self):
     url = 'http://www..example.com'
     result = url_parser.parse_url(url)
     has_dot = '.' in result['www']
     self.assertFalse(has_dot)
Exemplo n.º 14
0
from url_parser import parse_url, get_url, get_base_url

url = parse_url(
    "https://open.prospecta.app/my_user_login?user=url-parser&password=H3ll0"
)  # returns url sections as a dict
url_object = get_url(
    "https://open.prospecta.app/my_user_login?user=url-parser&password=H3ll0"
)  # Does the same, bur returns a object
basic_url = get_base_url(
    "https://open.prospecta.app/my_user_login?user=url-parser&password=H3ll0"
)  # Returns just the main url

print(url["domain"])  # Outputs -> prospecta
print(url_object.domain)  # Outputs -> prospecta
print(basic_url)  # Outputs -> https://open.prospecta.app
Exemplo n.º 15
0
 def test_does_not_mistake_file_for_dir(self):
     url = 'http://mysubdomain.example.com/folder/test'
     result = url_parser.parse_url(url)
     self.assertEqual(result['dir'], '/folder/')
     self.assertNotEqual(result['dir'], '/folder/test')
Exemplo n.º 16
0
 def test_returns_null_if_protocol_is_missing(self):
     url = 'www.example.com'
     result = url_parser.parse_url(url)
     self.assertIsNone(result['protocol'])
Exemplo n.º 17
0
 def test_domain_that_includes_a_top_domain_in_sub_domain(self):
     url = 'http://test.com.hello.nogo.no/'
     result = url_parser.parse_url(url)
     self.assertEqual(result['top_domain'], 'no')
     self.assertEqual(result['domain'], 'nogo')
     self.assertEqual(result['sub_domain'], 'test.com.hello')
Exemplo n.º 18
0
 def test_parses_url_without_www(self):
     url = 'example.com'
     result = url_parser.parse_url(url)
     self.assertEqual(result['domain'], 'example')
     self.assertEqual(result['top_domain'], 'com')
Exemplo n.º 19
0
# implement pip as a subprocess:
#subprocess.check_call([sys.executable, '-m', 'pip', 'install',
#'url_parser'])

#subprocess.check_call([sys.executable, '-m', 'pip', 'install',
#'selenium'])

from url_parser import parse_url
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

url = 'https://nomdeplume4reb.github.io/portfolio/'

#use url_parser library to parse parts of give url
parsed = parse_url('https://nomdeplume4reb.github.io/portfolio/')

#covert none types to empty string
convert_None = lambda i: i or ''

#create variables for parts of url
tld = parsed['top_domain']
domain = parsed['domain'] + '.' + parsed['top_domain']

#because 'hostname' is not included in url_parser library, I have to
sub = ''
if parsed['sub_domain'] == None:
    sub = ''
else:
    sub = '.'
hostname = convert_None(
Exemplo n.º 20
0
 def test_finds_multiple_subdomains(self):
     url = 'my.subdomain.example.com'
     result = url_parser.parse_url(url)
     self.assertEqual(result['sub_domain'], 'my.subdomain')
Exemplo n.º 21
0
 def test_returns_null_if_sub_domain_is_missing(self):
     url = 'http://example.com'
     result = url_parser.parse_url(url)
     self.assertIsNone(result['sub_domain'])