def get(self, url, **kw): url = URL(url) content = get_resource( url.path_segment(-1) if url.path_segments() else 'dataset', self.prefix) if content: return MagicMock( mimetype='application/rdf+xml', content=content, links=[dict( url='{0}.html'.format(url.as_string), ext='.html', rel='alternate', type='text/html')], canonical_url=url.as_string())
def latest_posts(self): """Получаем последные статьи """ items = [] response = self.session.get('https://vnru.ru/news.html') if response.status_code != 200: return items self._collect_external_links(response) links_to_download = [] for link in response.html.absolute_links: url = URL(link) if url.path_segment(0) in ['news', 'korotkoj-strokoj' ] and not link.endswith('#comments'): links_to_download.append(link) for link in links_to_download: response = self.session.get(link) if response.status_code == 200: self._collect_external_links(response) if response.html.find('.article', first=True): date = response.html.find('.article__date', first=True).text date = self._format_date(date) article = response.html.find('.article', first=True) statistics = article.find('.article-head', first=True).find('div.icons', first=True) items.append({ 'url': link, 'title': response.html.find('h1', first=True).text, 'text': article.find('.article-text', first=True).text, 'date': date, 'views': statistics.find('div.icon__value')[0].text, 'likes': article.find('.article-share__like', first=True).text, }) return items
def test_path_extraction_without_trailing_slash(self): u = URL(host='google.com', path='/blog/article/1') self.assertEqual('1', u.path_segment(2))
u = URL('postgres://*****:*****@localhost:1234/test?ssl=true') print(u.scheme()) print(u.host()) print(u.domain()) print(u.username()) print(u.password()) print(u.netloc()) print(u.port()) print(u.path()) print(u.query()) print(u.path_segments()) print(u.query_param('ssl')) print(u.query_param('ssl', as_list=True)) print(u.query_params()) print(u.has_query_param('ssl')) print(u.subdomains()) u = URL.from_string('https://github.com/minwook-shin') print(u.path_segment(0)) new_url = u.add_path_segment('minwook-shin.github.com') print(new_url.as_string()) from purl import expand print(expand(u"{/path*}", {'path': ['sub', 'index']})) from purl import Template template = Template("http://example.com{/path*}") url = template.expand({'path': ['sub', 'index']}) print(url.as_string())
def test_path_extraction_without_trailing_slash(self): u = URL(host="google.com", path="/blog/article/1") self.assertEqual("1", u.path_segment(2))