示例#1
0
 def get(self, url, **kw):
     url = URL(url)
     content = get_resource(
         url.path_segment(-1) if url.path_segments() else 'dataset', self.prefix)
     if content:
         return MagicMock(
             mimetype='application/rdf+xml',
             content=content,
             links=[dict(
                 url='{0}.html'.format(url.as_string),
                 ext='.html',
                 rel='alternate',
                 type='text/html')],
             canonical_url=url.as_string())
示例#2
0
 def latest_posts(self):
     """Получаем последные статьи
     """
     items = []
     response = self.session.get('https://vnru.ru/news.html')
     if response.status_code != 200:
         return items
     self._collect_external_links(response)
     links_to_download = []
     for link in response.html.absolute_links:
         url = URL(link)
         if url.path_segment(0) in ['news', 'korotkoj-strokoj'
                                    ] and not link.endswith('#comments'):
             links_to_download.append(link)
     for link in links_to_download:
         response = self.session.get(link)
         if response.status_code == 200:
             self._collect_external_links(response)
             if response.html.find('.article', first=True):
                 date = response.html.find('.article__date',
                                           first=True).text
                 date = self._format_date(date)
                 article = response.html.find('.article', first=True)
                 statistics = article.find('.article-head',
                                           first=True).find('div.icons',
                                                            first=True)
                 items.append({
                     'url':
                     link,
                     'title':
                     response.html.find('h1', first=True).text,
                     'text':
                     article.find('.article-text', first=True).text,
                     'date':
                     date,
                     'views':
                     statistics.find('div.icon__value')[0].text,
                     'likes':
                     article.find('.article-share__like', first=True).text,
                 })
     return items
示例#3
0
文件: url_tests.py 项目: sbraz/purl
 def test_path_extraction_without_trailing_slash(self):
     u = URL(host='google.com', path='/blog/article/1')
     self.assertEqual('1', u.path_segment(2))
示例#4
0
 def test_path_extraction_without_trailing_slash(self):
     u = URL(host='google.com', path='/blog/article/1')
     self.assertEqual('1', u.path_segment(2))
示例#5
0
u = URL('postgres://*****:*****@localhost:1234/test?ssl=true')
print(u.scheme())
print(u.host())
print(u.domain())
print(u.username())
print(u.password())
print(u.netloc())
print(u.port())
print(u.path())
print(u.query())
print(u.path_segments())
print(u.query_param('ssl'))
print(u.query_param('ssl', as_list=True))
print(u.query_params())
print(u.has_query_param('ssl'))
print(u.subdomains())

u = URL.from_string('https://github.com/minwook-shin')
print(u.path_segment(0))

new_url = u.add_path_segment('minwook-shin.github.com')
print(new_url.as_string())

from purl import expand
print(expand(u"{/path*}", {'path': ['sub', 'index']}))

from purl import Template
template = Template("http://example.com{/path*}")
url = template.expand({'path': ['sub', 'index']})
print(url.as_string())
示例#6
0
 def test_path_extraction_without_trailing_slash(self):
     u = URL(host="google.com", path="/blog/article/1")
     self.assertEqual("1", u.path_segment(2))