forked from Findus23/rss2wallabag
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
121 lines (103 loc) · 4.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import asyncio
import logging
import sys
from time import mktime
from urllib.parse import urljoin
import aiohttp
import feedparser
import yaml
from raven import Client
from raven.handlers.logging import SentryHandler
from raven.conf import setup_logging
from wallabag_api.wallabag import Wallabag
import github_stars
logger = logging.getLogger()
logger.handlers = []
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
logger.setLevel(logging.DEBUG)
with open("config.yaml", 'r') as stream:
try:
config = yaml.load(stream)
except (yaml.YAMLError, FileNotFoundError) as exception:
config = None
exit(1)
ch = logging.StreamHandler(stream=sys.stdout)
ch.setLevel(logging.WARNING if "debug" not in config or not config["debug"] else logging.DEBUG)
ch.setFormatter(formatter)
logger.addHandler(ch)
fh = logging.FileHandler('debug.log')
fh.setFormatter(formatter)
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)
if "sentry_url" in config and ("debug" not in config or not config["debug"]):
client = Client(
dsn=config["sentry_url"],
processors=(
'raven.processors.SanitizePasswordsProcessor',
)
)
handler = SentryHandler(client)
handler.setLevel(logging.WARNING)
setup_logging(handler)
with open("sites.yaml", 'r') as stream:
try:
sites = yaml.load(stream)
except (yaml.YAMLError, FileNotFoundError) as exception:
logger.error(exception)
sites = None
exit(1)
async def fetch(session, url):
try:
async with session.get(url) as response:
return await response.text()
except Exception as e:
logging.exception("failed to fetch {url}".format(url=url))
async def main(loop, sites):
token = await Wallabag.get_token(**config["wallabag"])
async with aiohttp.ClientSession(loop=loop) as session:
wall = Wallabag(host=config["wallabag"]["host"], client_secret=config["wallabag"]["client_secret"],
client_id=config["wallabag"]["client_id"], token=token, aio_sess=session)
await asyncio.gather(*[handle_feed(session, wall, sitetitle, site) for sitetitle, site in sites.items()])
async def handle_feed(session, wall, sitetitle, site):
logger.info("Downloading feed: " + sitetitle)
rss = await fetch(session, site["url"])
logger.info("Parsing feed: " + sitetitle)
f = feedparser.parse(rss)
logger.debug("finished parsing: " + sitetitle)
# feedtitle = f["feed"]["title"]
if "latest_article" in site:
for article in f.entries:
if article.title == site["latest_article"]:
logger.debug("already added: " + article.title)
break
logger.info("article found: " + article.title)
taglist = [sitetitle]
if site["tags"]:
taglist.extend(site["tags"])
tags = ",".join(taglist)
if "published_parsed" in article:
published = mktime(article.published_parsed)
elif "updated_parsed" in article:
published = mktime(article.updated_parsed)
else:
published = None
logger.info("add to wallabag: " + article.title)
if "github" in site and site["github"]:
title = sitetitle + ": " + article.title
else:
title = article.title
url = urljoin(site["url"], article.link)
exists = await wall.entries_exists(url)
if exists["exists"]:
logger.info("already found in wallabag: " + article.title)
if "debug" not in config or not config["debug"]:
await wall.post_entries(url=url, title=title, tags=tags)
else:
logger.debug("no latest_article: " + sitetitle)
if f.entries:
sites[sitetitle]["latest_article"] = f.entries[0].title
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop, sites))
with open("sites.yaml", 'w') as stream:
yaml.dump(sites, stream, default_flow_style=False)