def update_all(self): """ Update all pages in self.pages list once (dont update againt if failed) """ logging.info(cb('[update all] starting...', 'magenta')) try: for index in range(self.size): logging.info(cb('[update all] update ', 'magenta') + cb(self.pages[index].name, 'green')) self.pages[index].update() except (HTTPError, Timeout, ConnectionError): logging.info(cb('update failed: ', 'red')) logging.info(cb('[update all] finished', 'green'))
def update(self): """ Subclass of Page. Use rss feed to update content """ logging.info( cb('update ', 'yellow') + cb(self.name, 'green') + cb("'s content", 'yellow')) # pylint: disable=no-member feed = feedparser.parse(self.url['feed']) for i in range(0, len(feed.entries)): headline = { 'title': feed.entries[i].title, 'link': feed.entries[i].link, 'date': time.mktime(feed.entries[i].published_parsed) } self.content.append(headline) self._filter()
def main(): """ main function """ arg = get_args() if arg.log == 'debug': set_up_logging() units = arg.unit api_key = get_api_key() city_id = get_city_id() while True: try: update_weather(city_id, units, api_key) except MyInternetIsShitty: logging.info(cb('update failed: ', 'red')) time.sleep(3) else: logging.info(cb('update success', 'green')) time.sleep(700)
def start(self): """ Start endless loop of scraping and displaying news """ update = Thread(target=lambda: self.update_news()) display = Thread(target=lambda: self.display_news()) update.start() logging.info(cb('update.start()', 'blue')) # Only display if there is at least one page fetch successfully # because display thread will keep dicing for another page if # the last one is not successful while not self._is_content_avail(): logging.info(cb('content not available', 'red')) time.sleep(3) display.start() logging.info(cb('display.start()', 'blue')) update.join() display.join()
def display_news(self): """ Display news periodically, endless loop, use in parellel with update_news """ page_index, title_index = self._get_random_index() while True: try: self.pages[page_index].display(title_index) except TypeError: # self.content is empty => title_index = None logging.info(cb('display failed', 'red')) time.sleep(0) else: logging.info(cb('display success', 'green')) self._export_link(self.pages[page_index].get_link(title_index)) time.sleep(20) finally: page_index, title_index = self._get_random_index()
def update_news(self): """ Update news periodically, endless loop, use in parellel with display_news """ self.update_all() index = self._get_index() while True: try: self.pages[index].update() except (HTTPError, Timeout, ConnectionError): logging.info(cb('update failed: ', 'red')) time.sleep(2) else: logging.info(cb('update success', 'green')) time.sleep(30) finally: index = self._get_index()
def update(self): """ Update reddit using API """ logging.info( cb('update ', 'yellow') + cb(self.name, 'green') + cb("'s content", 'yellow')) url = self.url['api'] page = requests.get(url, headers={'User-agent': 'news'}) for i in range(0, self.limit): headline = { 'title': page.json()['data']['children'][i]['data']['title'], 'href': page.json()['data']['children'][i]['data']['permalink'], 'nsfw': page.json()['data']['children'][i]['data']['over_18'], 'upvote': str(page.json()['data']['children'][i]['data']['ups']), 'date': page.json()['data']['children'][i]['data']['created'] } self.content.append(headline) self._filter()
def update(self): """ Update self.content using self.selector to target the right elements """ logging.info( cb('update ', 'yellow') + cb(self.name, 'green') + cb("'s content", 'yellow')) url = self.url['working'] if 'working' in self.url else self.url['base'] page_html = requests.get(url, headers={'User-agent': 'news'}).text page_soup = soup(page_html, 'html.parser') title = page_soup.select(self.selector['title']) date = page_soup.select(self.selector['date']) assert len(title) == len(date), 'title and date len is not equal' for title, date in zip(title, date): self.content.append({ 'title': title.text.strip(), 'href': title['href'], 'date': self._parse_time(date) }) self._filter()