from bs4 import BeautifulSoup import requests from pprint import pprint from unscroll import UnscrollClient import datetime import re from random import random URL = 'https://news.microsoft.com/category/press-releases/page/{}/' c = UnscrollClient(api='http://127.0.0.1', username='******', password='******') c.login() favicon_url = c.fetch_favicon_url('https://www.microsoft.com') favthumb = c.cache_thumbnail(favicon_url['url']) print(favthumb) c.create_or_retrieve_scroll('Microsoft PR', thumbnail=favthumb['url']) for i in range(1, 958): pr_url = URL.format(i, ) print(pr_url) r = requests.get(pr_url) parsed = BeautifulSoup(r.content, 'html.parser') els = parsed.find_all('a', class_='f-post-link') events = [] for el in els:
from bs4 import BeautifulSoup import requests from pprint import pprint from unscroll import UnscrollClient import datefinder from random import random ADOBE_URL = "http://news.adobe.com/views/ajax?js=1&page={}&view_name=bw_press_release&view_display_id=panel_pane_7&view_args=all%2Fall&view_path=news&view_base_path=null&view_dom_id=1&pager_element=0" c = UnscrollClient(api='http://127.0.0.1', username='******', password='******') c.login() favicon_url = c.fetch_favicon_url('https://www.adobe.com') favthumb = c.cache_thumbnail(favicon_url['url']) c.create_or_retrieve_scroll('Adobe PR', thumbnail=favthumb['url']) for i in range(1, 92): pr_url = ADOBE_URL.format(i, ) r = requests.get(pr_url) r_as_data = r.json() r_html = r_as_data['display'] parsed = BeautifulSoup(r_html, 'html.parser') els = parsed.find_all('div', class_='view-inner-wrapper') events = [] for el in els: date_source = el.find('div', class_='views-field-created') date_source_txt = date_source.text
from pprint import pprint from unscroll import UnscrollClient from dateparser import parse import datefinder from random import random import re APPLE_URL = 'https://www.apple.com' APPLE_PR_URL = 'https://www.apple.com/pr/library' c = UnscrollClient(api='http://127.0.0.1', username='******', password='******') c.login() favicon_url = c.fetch_favicon_url(APPLE_URL) favthumb = c.cache_thumbnail(favicon_url['url']) print(favthumb) c.create_or_retrieve_scroll('Apple Press Releases, 2000-2017', thumbnail=favthumb['url']) for i in range(1, 66): pr_url = 'https://www.apple.com/newsroom/archive/?page={}'.format(i, ) print(pr_url) r = requests.get(pr_url) parsed = BeautifulSoup(r.content, 'html.parser') dts = parsed.find_all('a', class_='result__item') events = [] for dt in dts: title = dt.find('h3').text