Exemplo n.º 1
0
	def update_all(self):
		""" Update all pages in self.pages list once (dont update againt if failed) """
		logging.info(cb('[update all] starting...', 'magenta'))
		try:
			for index in range(self.size):
				logging.info(cb('[update all] update ', 'magenta') + cb(self.pages[index].name, 'green'))
				self.pages[index].update()
		except (HTTPError, Timeout, ConnectionError):
			logging.info(cb('update failed: ', 'red'))
		logging.info(cb('[update all] finished', 'green'))
Exemplo n.º 2
0
    def update(self):
        """ Subclass of Page. Use rss feed to update content """

        logging.info(
            cb('update ', 'yellow') + cb(self.name, 'green') +
            cb("'s content", 'yellow'))

        # pylint: disable=no-member
        feed = feedparser.parse(self.url['feed'])

        for i in range(0, len(feed.entries)):
            headline = {
                'title': feed.entries[i].title,
                'link': feed.entries[i].link,
                'date': time.mktime(feed.entries[i].published_parsed)
            }
            self.content.append(headline)

        self._filter()
Exemplo n.º 3
0
def main():
	""" main function """

	arg = get_args()
	if arg.log == 'debug':
		set_up_logging()

	units = arg.unit
	api_key = get_api_key()
	city_id = get_city_id()

	while True:
		try:
			update_weather(city_id, units, api_key)
		except MyInternetIsShitty:
			logging.info(cb('update failed: ', 'red'))
			time.sleep(3)
		else:
			logging.info(cb('update success', 'green'))
			time.sleep(700)
Exemplo n.º 4
0
	def start(self):
		""" Start endless loop of scraping and displaying news """

		update = Thread(target=lambda: self.update_news())
		display = Thread(target=lambda: self.display_news())

		update.start()
		logging.info(cb('update.start()', 'blue'))

		# Only display if there is at least one page fetch successfully
		# because display thread will keep dicing for another page if
		# the last one is not successful
		while not self._is_content_avail():
			logging.info(cb('content not available', 'red'))
			time.sleep(3)
		display.start()
		logging.info(cb('display.start()', 'blue'))

		update.join()
		display.join()
Exemplo n.º 5
0
	def display_news(self):
		"""
		Display news periodically, endless loop,
		use in parellel with update_news
		"""

		page_index, title_index = self._get_random_index()

		while True:
			try:
				self.pages[page_index].display(title_index)
			except TypeError: # self.content is empty => title_index = None
				logging.info(cb('display failed', 'red'))
				time.sleep(0)
			else:
				logging.info(cb('display success', 'green'))
				self._export_link(self.pages[page_index].get_link(title_index))
				time.sleep(20)
			finally:
				page_index, title_index = self._get_random_index()
Exemplo n.º 6
0
	def update_news(self):
		"""
		Update news periodically, endless loop,
		use in parellel with display_news
		"""

		self.update_all()

		index = self._get_index()
		while True:
			try:
				self.pages[index].update()
			except (HTTPError, Timeout, ConnectionError):
				logging.info(cb('update failed: ', 'red'))
				time.sleep(2)
			else:
				logging.info(cb('update success', 'green'))
				time.sleep(30)
			finally:
				index = self._get_index()
Exemplo n.º 7
0
    def update(self):
        """ Update reddit using API """

        logging.info(
            cb('update ', 'yellow') + cb(self.name, 'green') +
            cb("'s content", 'yellow'))

        url = self.url['api']
        page = requests.get(url, headers={'User-agent': 'news'})

        for i in range(0, self.limit):
            headline = {
                'title': page.json()['data']['children'][i]['data']['title'],
                'href':
                page.json()['data']['children'][i]['data']['permalink'],
                'nsfw': page.json()['data']['children'][i]['data']['over_18'],
                'upvote':
                str(page.json()['data']['children'][i]['data']['ups']),
                'date': page.json()['data']['children'][i]['data']['created']
            }
            self.content.append(headline)

        self._filter()
Exemplo n.º 8
0
    def update(self):
        """ Update self.content using self.selector to target the right elements """

        logging.info(
            cb('update ', 'yellow') + cb(self.name, 'green') +
            cb("'s content", 'yellow'))

        url = self.url['working'] if 'working' in self.url else self.url['base']
        page_html = requests.get(url, headers={'User-agent': 'news'}).text

        page_soup = soup(page_html, 'html.parser')

        title = page_soup.select(self.selector['title'])
        date = page_soup.select(self.selector['date'])
        assert len(title) == len(date), 'title and date len is not equal'

        for title, date in zip(title, date):
            self.content.append({
                'title': title.text.strip(),
                'href': title['href'],
                'date': self._parse_time(date)
            })

        self._filter()