def process(url): """ Fetches news items from the rss url and parses them. Returns a list of NewsStory-s. """ feed = feedparser.parse(url) entries = feed.entries ret = [] for entry in entries: guid = entry.guid title = translate_html(entry.title) link = entry.link description = translate_html(entry.description) pubdate = translate_html(entry.published) try: pubdate = datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S %Z") pubdate.replace(tzinfo=pytz.timezone("GMT")) # pubdate = pubdate.astimezone(pytz.timezone('EST')) # pubdate.replace(tzinfo=None) except ValueError: pubdate = datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S %z") newsStory = NewsStory(guid, title, description, link, pubdate) ret.append(newsStory) return ret
def process(url): """ Fetches news items from the rss url and parses them. Returns a list of NewsStory-s. """ if hasattr(ssl, '_create_unverified_context'): ssl._create_default_https_context = ssl._create_unverified_context feed = feedparser.parse(url) entries = feed.entries ret = [] for entry in entries: guid = entry.guid title = translate_html(entry.title) published = translate_html(entry.published) link = entry.link summary = translate_html(entry.summary) newsStory = NewsStory(guid, title, published, summary, link) ret.append(newsStory) return ret
def process(url): """ Fetches news items from the rss url and parses them. Returns a list of NewsStory-s. """ feed = feedparser.parse(url) entries = feed.entries ret = [] for entry in entries: guid = entry.guid title = translate_html(entry.title) link = entry.link summary = translate_html(entry.summary) try: subject = translate_html(entry.tags[0]['term']) except AttributeError: subject = "" newsStory = NewsStory(guid, title, subject, summary, link) ret.append(newsStory) return ret
def process(url): """ Fetches news items from the rss url and parses them. Returns a list of NewsStory-s. """ feed = feedparser.parse(url) entries = feed.entries ret = [] for entry in entries: guid = entry.guid title = translate_html(entry.title) link = entry.link # Had to add error handling because yahoo news doesn't give # descriptions anymore try: description = translate_html(entry.description) except AttributeError: description = '' pubdate = translate_html(entry.published) try: pubdate = datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S %Z") pubdate.replace(tzinfo=pytz.timezone("GMT")) # pubdate = pubdate.astimezone(pytz.timezone('EST')) # pubdate.replace(tzinfo=None) except ValueError: #added error handling because date format in google page changed try: pubdate = datetime.strptime(pubdate, "%a, %d %b %Y %H:%M:%S %z") except ValueError: #added new date-time format that google outputs pubdate = datetime.strptime(pubdate, "%Y-%m-%dT%H:%M:%S%z") newsStory = NewsStory(guid, title, description, link, pubdate) ret.append(newsStory) return ret
def fprocess(entry): guid = entry.guid title = entry.title.split(" - ")[0] published = entry.published source = entry.source.title link = entry.link web_content = readability.Document(requests.get(link).text) summary = translate_html(web_content.summary()) newsStory = NewsStory(guid, title, summary, published, source, link) return newsStory