forked from snarfed/bridgy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
webmention.py
156 lines (127 loc) · 4.69 KB
/
webmention.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""Base handler class and common utilities for handling webmentions.
Used in publish.py and blog_webmention.py.
Webmention spec: http://webmention.org/
"""
__author__ = ['Ryan Barrett <bridgy@ryanb.org>']
import logging
import json
import pprint
import sys
import urllib2
import appengine_config
from appengine_config import HTTP_TIMEOUT
from bs4 import BeautifulSoup
from mf2py import parser
import models
import requests
import util
import webapp2
class WebmentionGetHandler(util.Handler):
"""Renders a simple placeholder HTTP page for GETs to webmention endpoints.
"""
def head(self, site=None):
self.response.headers['Link'] = (
'<%s/publish/webmention>; rel="webmention"' % self.request.host_url)
def get(self, site=None):
self.head(site)
self.response.out.write("""\
<!DOCTYPE html>
<html><head>
<link rel="webmention" href="%s/publish/webmention">
</head>
<body>Nothing here! <a href="/about">Try the docs instead.</a></body>
</html>""" % self.request.host_url)
class WebmentionHandler(WebmentionGetHandler):
"""Webmention handler.
Attributes:
source: the Source for this webmention
entity: the Publish or Webmention entity for this webmention
"""
source = None
entity = None
def fetch_mf2(self, url):
"""Fetches a URL and extracts its mf2 data.
Side effects: sets self.entity.html on success, calls self.error() on
errors.
Args:
url: string
Returns:
(requests.Response, mf2 data dict) on success, None on failure
"""
try:
fetched = requests.get(url, timeout=HTTP_TIMEOUT)
fetched.raise_for_status()
except BaseException:
return self.error('Could not fetch source URL %s' % url)
if self.entity:
self.entity.html = fetched.text
doc = BeautifulSoup(fetched.text)
# special case tumblr's markup: div#content > div.post > div.copy
# convert to mf2.
contents = doc.find_all(id='content')
if contents:
post = contents[0].find_next(class_='post')
if post:
post['class'] = 'h-entry'
copy = post.find_next(class_='copy')
if copy:
copy['class'] = 'e-content'
photo = post.find_next(class_='photo-wrapper')
if photo:
img = photo.find_next('img')
if img:
img['class'] = 'u-photo'
doc = unicode(post)
# parse microformats, convert to ActivityStreams
data = parser.Parser(doc=doc, url=fetched.url).to_dict()
logging.debug('Parsed microformats2: %s', pprint.pformat(data))
items = data.get('items', [])
if not items or not items[0]:
return self.error('No microformats2 data found in ' + fetched.url,
data=data, html="""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""" % (fetched.url, util.pretty_link(fetched.url)))
return fetched, data
def error(self, error, html=None, status=400, data=None, log_exception=True,
mail=False):
"""Handle an error. May be overridden by subclasses.
Args:
error: string human-readable error message
html: string HTML human-readable error message
status: int HTTP response status code
data: mf2 data dict parsed from source page
log_exception: boolean, whether to include a stack trace in the log msg
mail: boolean, whether to email me
"""
logging.warning(error, exc_info=log_exception)
if self.entity:
self.entity.status = 'failed'
self.entity.put()
self.response.set_status(status)
resp = {'error': error}
if data:
resp['parsed'] = data
resp = json.dumps(resp, indent=2)
# don't email about specific known failures
if (mail and
# https://github.com/snarfed/bridgy/issues/161
'"error": "invalid_input"' not in error and
# https://github.com/snarfed/bridgy/issues/175
'bX-2i87au' not in error and
# https://github.com/snarfed/bridgy/issues/177
"Invalid argument, 'thread': Unable to find thread" not in error
):
self.mail_me(resp)
self.response.write(resp)
def mail_me(self, resp):
subject = '%s %s' % (self.__class__.__name__,
'%s %s' % (self.entity.type, self.entity.status)
if self.entity else 'failed')
body = 'Request:\n%s\n\nResponse:\n%s' % (self.request.params.items(), resp)
if self.source:
body = 'Source: %s\n\n%s' % (self.source.bridgy_url(self), body)
subject += ': %s' % self.source.label()
util.email_me(subject=subject, body=body)