Exemplo n.º 1
0
 def test_join_url(self):
     """Test parse url, add new values to 'query' and join url back"""
     url = 'http://yandex.ru/mail/?id=123#anchor'
     parsed_url = urlparse3.parse_url(url)
     parsed_url.query['name'] = 'alex'
     self.assertEqual('http://yandex.ru/mail/?id=123&name=alex#anchor',
                      parsed_url.geturl())
     parsed_url.fragment = 'fragment'
     self.assertEqual('http://yandex.ru/mail/?id=123&name=alex#fragment',
                      parsed_url.geturl())
     url = 'http://yandex.ru/path/?id=1&id=2&id=3&name=alex#anchor'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(len(set(parsed_url.query['id'])), 3, 
                      'Missing parameters in query')
     for i in parsed_url.query['id']:
         self.assertIn(i, ['1', '2', '3'])
     parsed_url.query['id'] = ['1', '2']
     self.assertEqual('http://yandex.ru/path/?id=1&id=2&name=alex#anchor',
                      parsed_url.geturl())
                      
     url = 'http://yandex.ru'
     parsed_url = urlparse3.parse_url(url)
     parsed_url.path = 'search'
     self.assertEqual('http://yandex.ru/search/', 
                      parsed_url.geturl())
     
     url = 'http://yandex.ru'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual('http://yandex.ru/', 
                      parsed_url.geturl())
Exemplo n.º 2
0
 def checkConnection(self):
     try:
         parsedUrl = urlparse3.parse_url(self.url)
         socket.getaddrinfo(parsedUrl.domain, None)  #检测域名是否可以解析
     except Exception, e:
         print e
         return False
Exemplo n.º 3
0
 def test_parse_query_with_abs_path(self):
     location_query = 'http%3A%2F%2Fwww.google.ru%2Fc%2F922%2Fsubdir-one-two%2F%3Fsitelink%3DtopmenuW%26l%3D8'
     url = 'https://domain.com/subdirectory/?location={0}'.format(
         location_query)
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(parsed_url.query['location'], location_query)
     self.assertEqual(url, parsed_url.geturl())
Exemplo n.º 4
0
def urlparse(url):
    weburl = urlparse3.parse_url(url)
    print(weburl.domain)
    print(weburl.fragment)
    print(weburl.geturl())
    print(weburl.username)
    print(weburl.password)
    print(weburl.path)
    print(weburl.port)
    print(weburl.query)
    print(weburl.scheme)
Exemplo n.º 5
0
 def test_parse_url(self):
     """Test parsing url"""
     url = 'http://yandex.ru/mail/?id=123#anchor'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(parsed_url.scheme, 'http')
     self.assertEqual(parsed_url.domain, 'yandex.ru')
     self.assertEqual(parsed_url.path, '/mail/')
     self.assertEqual(parsed_url.query, {'id': '123'})
     self.assertEqual(parsed_url.fragment, 'anchor')
     url = 'http://yandex.ru/mail/?id=123&id=321&id=43#anchor'
     parsed_url = urlparse3.parse_url(url)
     self.assertIsNotNone(parsed_url.query.get('id'))
     for i in parsed_url.query['id']:
         if i not in ['123', '321', '43']:
             self.assertIn(i, ['123', '321', '43'])
     url = 'http://google.com/path/'
     parsed_url = urlparse3.parse_url(url)
     parsed_url.query['cardNumber'] = '12345678910'
     self.assertEqual(parsed_url.geturl(),
                      'http://google.com/path/?cardNumber=12345678910')
Exemplo n.º 6
0
 def test_parse_http_auth_url(self):
     """Test parse url with username and password (http basic auth)"""
     url = 'http://*****:*****@domain.com/path/?id=123#anchor'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(parsed_url.username, 'admin')
     self.assertEqual(parsed_url.password, 'password')
     self.assertEqual(parsed_url.domain, 'domain.com')
     self.assertEqual(parsed_url.path, '/path/')
     self.assertEqual(parsed_url.query, {'id': '123'})
     self.assertEqual(parsed_url.fragment, 'anchor')
     self.assertEqual(parsed_url.geturl(), url)
Exemplo n.º 7
0
 def test_query_parameters_order(self):
     """
     Test that query parameters retain the order
     in which they were added.
     """
     url = 'http://domain.com/subdir/?url=http://google.com'
     parsed_url = urlparse3.parse_url(url)
     sort_action = 'sort'
     price_order = 'price'
     parsed_url.query['action'] = sort_action
     parsed_url.query['order'] = price_order
     new_url = parsed_url.geturl()
     self.assertEqual(
         '{0}&action={1}&order={2}'.format(url, sort_action, price_order),
         new_url)
Exemplo n.º 8
0
 def conn_destnation(self):
     # url=urlparse.urlparse(self.headers['path'])
     url = urlparse3.parse_url(self.headers['path'])
     hostname = url[1]
     port = "80"
     if hostname.find(':') > 0:
         addr, port = hostname.split(':')
     else:
         addr = hostname
     port = int(port)
     ip = socket.gethostbyname(addr)
     print(ip, port)
     self.destnation.connect(('127.0.0.1', 8080))
     data = "%s %s %s\r\n" % (self.headers['method'], self.headers['path'],
                              self.headers['protocol'])
     self.destnation.send(data + self.request)
     print(data + self.request)
Exemplo n.º 9
0
 def test_rearrage_parameters_order(self):
     """
     Test that query parameters retain new order
     after rearrange.
     """
     base_url = 'http://domain.com/subdir/'
     url = '{0}?url=http://google.com'.format(base_url)
     parsed_url = urlparse3.parse_url(url)
     url_parameter = parsed_url.query.pop('url')
     sort_action = 'sort'
     price_order = 'price'
     parsed_url.query['action'] = sort_action
     parsed_url.query['order'] = price_order
     parsed_url.query['url'] = url_parameter
     new_url = parsed_url.geturl()
     self.assertEqual(
         '{0}?action={1}&order={2}&url={3}'.format(base_url, sort_action,
                                                   price_order,
                                                   url_parameter), new_url)
Exemplo n.º 10
0
def url_to_filename(url):
    path = urlparse3.parse_url(url).path
    filename = posixpath.basename(path)
    return filename
Exemplo n.º 11
0
    ids = []
    links = []
    names = []
    episodes = []
    episodes_numbers = []

    realises = []

    for el in html(".content_body .a_details"):
        href = pq(el).attr.href
        links.append(href)
        ids.append(href.lstrip('/details.php?id='))

    for el in html(".content_body .a_discuss"):
        parsed = urlparse3.parse_url(baseUrl + pq(el).attr.href)

        em = EpisodeNumber()
        em.season = int(float(parsed.query['s']))
        em.episode = int(float(parsed.query['e']))

        episodes_numbers.append(em)

    for el in html(".content_body .category_icon"):
        names.append(pq(el).attr.title)

    for el in html(".content_body .torrent_title"):
        episodes.append(pq(el)('b').text())

    for i in range(0, len(ids)):
        r = Realize()
Exemplo n.º 12
0
import urlparse3
from timeit import default_timer as timer

total = 0

with open('urls.txt') as f:
    for url in f:

        start = timer()

        a = urlparse3.parse_url(url)
        host = a.domain
        scheme = a.scheme
        path = a.path
        query = a.query
        fragment = a.fragment

        end = timer()

        total += end - start

print("the total time is", total, "seconds")
Exemplo n.º 13
0
def get_url_query(url):
    parse = urlparse3.parse_url(url)
    return dict(parse.query)
Exemplo n.º 14
0
 def test_parse_semicolo_url(self):
     url = 'http://google.com/?name=alex;id=321'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(parsed_url.query['name'], 'alex')
     self.assertEqual(parsed_url.query['id'], '321')
Exemplo n.º 15
0
 def def_parsed_3d_level_domain(self):
     """Test parse 3rd level domain domain"""
     url = 'http://domain.com.ru/?id=123'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(parsed_url.domain, 'domain.com.ru')
     self.assertEqual(parsed_url.query, {'id': '123'})
Exemplo n.º 16
0
 def test_url_with_absolute_url_in_query_param(self):
     url = 'http://yandex.ru/sub/?dir=http://google.com/'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(url, parsed_url.geturl())
Exemplo n.º 17
0
 def test_parse_url_with_dash(self):
     url = 'http://local-domain.sub-domain.ru:8000/news/1/'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(url, parsed_url.geturl())
Exemplo n.º 18
0
 def test_parse_url_with_port(self):
     url = 'http://localhost:8000/news/1/'
     parsed_url = urlparse3.parse_url(url)
     self.assertEqual(url, parsed_url.geturl())