def get_news(lang="en-CA", max_items=4, max_age=30): "Pull news items from Library & Archives feed" import atoma url = "https://biblio.laurentian.ca/research/news.xml" news_link = "https://biblio.laurentian.ca/research/news" news_heading = "News" if lang.startswith("fr"): url = "https://biblio.laurentian.ca/research/fr/news.xml" news_link = "https://biblio.laurentian.ca/research/fr/nouvelles" news_heading = "Nouvelles" r = requests.get(url) feed = atoma.parse_rss_bytes(r.content) news_items = [] x = 0 for item in feed.items: if x == max_items or (x > 0 and item.pub_date.timestamp() < (datetime.datetime.today() - datetime.timedelta(days=max_age)).timestamp()): break news_items.append({ "title": item.title, "url": item.link, "published": item.pub_date.date() }) x = x + 1 return news_items
def fetch(self): get = self.session.get if self.session is not None else requests.get resp = get(self.url, headers=generate_headers(self.url)) try: return parse_atom_bytes(resp.content) except: return parse_rss_bytes(resp.content)
def retrieve_feed(self, url): response = requests.get(url) feed = atoma.parse_rss_bytes(response.content) items = [] c = self.conn.cursor() for item in feed.items: items.append({ "title": item.title, "link": item.link, "description": item.description, "description_text": item.description, "pubDate": str(item.pub_date) }) c.execute( """INSERT INTO news (title, link, description, published, feed, liked) values (?, ?, ?, ?, ?, ?)""", (item.title, item.link, item.description, item.pub_date, feed.link, False)) self.conn.commit() return { "channel": { "title": feed.title, "link": feed.link, "url": feed.link }, "items": items }
def test_feed(client, post: Post, second_post: Post, hidden_post: Post): # set the post dates to they are listed in a predictable order post.created = datetime(2019, 12, 2, tzinfo=utc) second_post.created = datetime(2019, 11, 1, tzinfo=utc) post.save() second_post.save() # get the feed url = reverse('blog:feed') response = client.get(url) feed = atoma.parse_rss_bytes(response.content) # check the title and description match those in the config assert feed.title == "Chocolate" assert feed.description == "Ice Cream" # check the link is correct assert urlparse(feed.link).path == reverse('blog:list') # check we only have two visible items assert len(feed.items) == 2 # first post item_one = feed.items[0] assert urlparse(item_one.link).path == post.get_absolute_url() assert item_one.title == post.title assert item_one.pub_date == post.created # second post item_two = feed.items[1] assert urlparse(item_two.link).path == second_post.get_absolute_url() assert item_two.title == second_post.title assert item_two.pub_date == second_post.created
def serializer(self, content: bytes): feed = None try: feed = atoma.parse_rss_bytes(content) except Exception: self._logger.error("can't parser RSS") return feed
async def fetch(self): async with self.session.get(self.url, headers=generate_headers( self.url)) as response: content = await response.read() try: return parse_atom_bytes(content) except: return parse_rss_bytes(content)
def __init__(self, link): super().__init__() xml = get(link) feed = atoma.parse_rss_bytes(xml.content) self.title = feed.title for item in feed.items: self.items.append( RssArticle(item.title, item.description, item.link, item.pub_date)) self.items.reverse()
def _get_feed(feed_content, payload): if payload['source']['type'] == 'atom': feed = atoma.parse_atom_bytes(feed_content) elif payload['source']['type'] == 'rss': feed = atoma.parse_rss_bytes(feed_content) else: raise Exception( "SourceError", "Unkonwn feed type '%s'. Choose 'rss' or 'atom'." % payload['source']['type']) return feed
def parse(self, url): try: content = self.session.get(url).content except requests.exceptions.ConnectionError as e: logger.exception(e) raise SGFeedUpdateFailedException # print(content) try: return atoma.parse_rss_bytes(content) except atoma.exceptions.FeedXMLError as e: logger.error(f"{e}: {content}") raise SGFeedUpdateFailedException
def feed(request): # get feed feed = requests.get( 'https://www.standaard.be/rss/section/1f2838d4-99ea-49f0-9102-138784c7ea7c' ) articles = atoma.parse_rss_bytes(feed.content).items # get article path for each article for article in articles: article.path = article.guid.replace("https://www.standaard.be/cnt/", "") # pass context to template return render(request, 'articles/feed.html', {'articles': articles})
def fetch_and_parse_rssfeed_atom(url_file_stream_or_string, site_cookies_dict=None, user_agent=None, request_headers=None, timeout=10): result = http.download_file(url_file_stream_or_string, site_cookies_dict=site_cookies_dict, user_agent=user_agent, request_headers=request_headers, timeout=timeout) import atoma atoma.rss.supported_rss_versions = [] parsed_feeds = {} try: atoma_result = atoma.parse_rss_bytes(result['content']) parsed_feeds = atoma_result_to_dict(atoma_result) except atoma.FeedXMLError as err: readable_body = http.clean_html_body(result['content']) parsed_feeds["raw_result"] = readable_body parsed_feeds["bozo"] = 1 parsed_feeds["feed"] = {} parsed_feeds["items"] = [] parsed_feeds["bozo_exception"] = err parsed_feeds['parser'] = "atoma" return parsed_feeds
def detail(request, article_path): # get feed again feed = requests.get( 'https://www.standaard.be/rss/section/1f2838d4-99ea-49f0-9102-138784c7ea7c' ) articles = atoma.parse_rss_bytes(feed.content).items # find article with corresponding article path for index, article in enumerate(articles): article.path = article.guid.replace("https://www.standaard.be/cnt/", "") if article_path == article.path: lookup_article_index = index # get description article found lookup_article = articles[lookup_article_index] # clean description article lookup_article.description = lookup_article.description.replace( "<P>", "").replace("</P>", "").replace("<p>", "").replace("</p>", "") # pass context to template return render(request, 'articles/detail.html', {'article': lookup_article})
def RssRead(): #Criar base dados JSON - db.json db = TinyDB('./db.json', sort_keys=True, indent=4, separators=(',', ': '), ensure_ascii=False, encoding='utf-8') titulo_postagem = Query() #Ler o feed Aleteia feed_name = "Aleteia" url = "https://pt.aleteia.org/feed/" response = requests.get(url) feed = atoma.parse_rss_bytes(response.content) #Salvar os itens do feed Aleteia na base de dados json for post in feed.items: data_postagem = post.pub_date.strftime('%d/%m/%Y') data_hoje = datetime.date.today().strftime('%d/%m/%Y') if(data_hoje == post.pub_date.strftime('%d/%m/%Y')): date = post.pub_date.strftime('%d/%m/%Y') if(db.contains(titulo_postagem.titulo == post.title)==False): db.insert({ 'source': 'Aleteia', 'date': date, 'titulo': post.title, 'descrição': post.description, 'link': post.link, 'categorias': post.categories, 'postado': 'não', }) print('RSS Aleteia lido com sucesso!') #Ler o feed VaticanNews feed_name = "Vatican News" url = "https://www.vaticannews.va/pt.rss.xml" response = requests.get(url) feed = atoma.parse_rss_bytes(response.content) #Salvar os itens do feed VaticanNews na base de dados json for post in feed.items: data_postagem = post.pub_date.strftime('%d/%m/%Y') data_hoje = datetime.date.today().strftime('%d/%m/%Y') if(data_hoje == post.pub_date.strftime('%d/%m/%Y')): date = post.pub_date.strftime('%d/%m/%Y') if(db.contains(titulo_postagem.titulo == post.title)== False): db.insert({ 'source': 'VaticanNews', 'date': date, 'titulo': post.title, 'descrição': post.description, 'link': post.link, 'categorias': post.categories, 'postado': 'não', }) print('RSS VaticanNews lido com sucesso!') #Ler o feed Aci Digital feed_name = "Aci Digital" url = "https://www.acidigital.com/rss/rss.php" response = requests.get(url) feed = atoma.parse_rss_bytes(response.content) #Salvar os itens do feed AciDigital na base de dados json for post in feed.items: data_postagem = post.pub_date.strftime('%d/%m/%Y') data_hoje = datetime.date.today().strftime('%d/%m/%Y') if(data_hoje == post.pub_date.strftime('%d/%m/%Y')): date = post.pub_date.strftime('%d/%m/%Y') if(db.contains(titulo_postagem.titulo == post.title)== False): db.insert({ 'source': 'AciDigital', 'date': date, 'titulo': post.title, 'descrição': post.description, 'link': post.link, 'categorias': post.categories, 'postado': 'não', }) print('RSS Acidigital lido com sucesso!')
def _extract_news_feed_items(self, proxies): content = self.parser.get_content(proxies=proxies) news_feed = atoma.parse_rss_bytes(content) return news_feed.items
import atoma import requests response = requests.get('???') feed = atoma.parse_rss_bytes(response.content) feed.items
def get_episodes(): response = requests.get(os.getenv("FEED_URI")) feed = atoma.parse_rss_bytes(response.content) return feed.items
from urllib.request import urlopen, Request from urllib.parse import urlencode from atoma import parse_rss_bytes from utils import escape, html_unescape, u, s TWITTER_API_VERSION = '1.0' TWITTER_API_METHOD = 'HMAC-SHA1' TWITTER_API_END = 'https://api.twitter.com/1.1/statuses/update.json' TWITTER_CONSUMER_KEY = os.environ.get('TWITTER_CONSUMER_KEY') TWITTER_CONSUMER_SECRET = os.environ.get('TWITTER_CONSUMER_SECRET') TWITTER_OAUTH_TOKEN = os.environ.get('TWITTER_OAUTH_TOKEN') TWITTER_OAUTH_SECRET = os.environ.get('TWITTER_OAUTH_SECRET') FEED_URL = os.environ.get('FEED_URL') FEED_DATA = parse_rss_bytes(urlopen(FEED_URL).read()) for post in FEED_DATA.items: ITEM_TIMESTAMP = int(post.pub_date.strftime('%Y%m%d%H%M%S')) LAST_TIMESTAMP = int(datetime.now().strftime('%Y%m%d%H%M%S')) - 10000 ITEM_TITLE = u(html_unescape(post.title)) ITEM_LINK = u(post.guid) TWITTER_STATUS = ITEM_TITLE + ' ' + ITEM_LINK if ITEM_TIMESTAMP >= LAST_TIMESTAMP: SIGNATURE_TIMESTAMP = datetime.now().strftime('%s') SIGNATURE_ONCE = base64.b64encode( s(''.join([str(random.randint(0, 9)) for i in range(24)]))) SIGNATURE_BASE_STRING_AUTH = 'oauth_consumer_key=' + escape(
from datetime import datetime from atoma import parse_rss_bytes from utils import u, html_unescape, escape, filter_json_index_by_year json_index_content = {} twitter_api_version = '1.0' twitter_api_method = 'HMAC-SHA1' twitter_api_end = 'https://api.twitter.com/1.1/statuses/update.json' twitter_consumer_key = os.environ.get('TWITTER_CONSUMER_KEY') twitter_consumer_secret = os.environ.get('TWITTER_CONSUMER_SECRET') twitter_oauth_token = os.environ.get('TWITTER_OAUTH_TOKEN') twitter_oauth_secret = os.environ.get('TWITTER_OAUTH_SECRET') feed_url = os.environ.get('FEED_URL') feed_data = parse_rss_bytes(urlopen(feed_url).read()) current_timestamp = int(datetime.now().strftime('%Y%m%d%H%M%S')) current_hour = int(datetime.now().strftime('%H')) if current_hour not in [2, 6, 9, 14, 17, 21]: print('Script wasnt called in a recommended hour. Aborting.') sys.exit(0) for post in feed_data.items: post_timestamp = post.pub_date.strftime('%Y%m%d%H%M%S') json_index_content[post_timestamp] = { 'title': post.title, 'url': post.guid, 'date': post.pub_date }