def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urlparse.urldefrag(util.get_required_param(self, 'source'))[0] self.target_url = urlparse.urldefrag(util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) if urlparse.urlparse(self.target_url).path in ('', '/'): return self.error('Home page webmentions are not currently supported.') # create BlogWebmention entity id = u'%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug('BlogWebmention entity: %s', self.entity.key.urlsafe()) # fetch source page resp = self.fetch_mf2(self.source_url) if not resp: return self.fetched, data = resp item = self.find_mention_item(data) if not item: return self.error('Could not find target URL %s in source page %s' % (self.target_url, self.fetched.url), data=data, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception, e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source!') self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, mail=False) elif code == '404': # post is gone return self.error(msg, status=code, mail=False) elif code or body: return self.error(msg, status=code, mail=True) else: raise
def post(self, source_short_name): logging.info('Params: %s', list(self.request.params.items())) # strip fragments from source and target url self.source_url = urllib.parse.urldefrag( util.get_required_param(self, 'source'))[0] self.target_url = urllib.parse.urldefrag( util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query().filter( source_cls.domains == domain).filter( source_cls.features == 'webmention').filter( source_cls.status == 'enabled').get()) if not self.source: # check for a rel-canonical link. Blogger uses these when it serves a post # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs # epeus.blogspot.com. # https://github.com/snarfed/bridgy/issues/805 mf2 = self.fetch_mf2(self.target_url, require_mf2=False) if not mf2: # fetch_mf2() already wrote the error response return domains = util.dedupe_urls( util.domain_from_link(url) for url in mf2[1]['rels'].get('canonical', [])) if domains: self.source = (source_cls.query().filter( source_cls.domains.IN(domains)).filter( source_cls.features == 'webmention').filter( source_cls.status == 'enabled').get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) # check that the target URL path is supported target_path = urllib.parse.urlparse(self.target_url).path if target_path in ('', '/'): return self.error( 'Home page webmentions are not currently supported.', status=202) for pattern in self.source.PATH_BLOCKLIST: if pattern.match(target_path): return self.error( '%s webmentions are not supported for URL path: %s' % (self.source.GR_CLASS.NAME, target_path), status=202) # create BlogWebmention entity id = '%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe().decode()) # fetch source page fetched = self.fetch_mf2(self.source_url) if not fetched: return resp, mf2 = fetched item = self.find_mention_item(mf2.get('items', [])) if not item: return self.error( 'Could not find target URL %s in source page %s' % (self.target_url, resp.url), data=mf2, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, str): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][ 0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source due to: %s' % e, stack_info=True) self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, report=self.source.is_beta_user()) elif code == '404': # post is gone return self.error(msg, status=code, report=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): return self.error(msg, status=util.ERROR_HTTP_RETURN_CODE, report=False) elif code or body: return self.error(msg, status=code, report=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() self.response.write(json_dumps(self.entity.published))
def dispatch_request(self, site): logger.info(f'Params: {list(request.values.items())}') # strip fragments from source and target url self.source_url = urllib.parse.urldefrag(request.form['source'])[0] self.target_url = urllib.parse.urldefrag(request.form['target'])[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: self.error(f'Could not parse target URL {self.target_url}') # look up source by domain source_cls = models.sources[site] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: # check for a rel-canonical link. Blogger uses these when it serves a post # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs # epeus.blogspot.com. # https://github.com/snarfed/bridgy/issues/805 mf2 = self.fetch_mf2(self.target_url, require_mf2=False) if not mf2: # fetch_mf2() already wrote the error response return domains = util.dedupe_urls( util.domain_from_link(url) for url in mf2[1]['rels'].get('canonical', [])) if domains: self.source = (source_cls.query() .filter(source_cls.domains.IN(domains)) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: self.error( f'Could not find {source_cls.GR_CLASS.NAME} account for {domain}. Is it registered with Bridgy?') # check that the target URL path is supported target_path = urllib.parse.urlparse(self.target_url).path if target_path in ('', '/'): msg = 'Home page webmentions are not currently supported.' logger.info(msg) return {'error': msg}, 202 for pattern in self.source.PATH_BLOCKLIST: if pattern.match(target_path): msg = f'{self.source.GR_CLASS.NAME} webmentions are not supported for URL path: {target_path}' logger.info(msg) return {'error': msg}, 202 # create BlogWebmention entity id = f'{self.source_url} {self.target_url}' self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported return self.entity.published logger.debug(f'BlogWebmention entity: {self.entity.key.urlsafe().decode()}') # fetch source page fetched = self.fetch_mf2(self.source_url) if not fetched: return resp, mf2 = fetched item = self.find_mention_item(mf2.get('items', [])) if not item: self.error(f'Could not find target URL {self.target_url} in source page {resp.url}', data=mf2, log_exception=False) # default author to target domain author_name = domain author_url = f'http://{domain}/' # extract author name and URL from h-card, if any props = item['properties'] author = get_first(props, 'author') if author: if isinstance(author, str): author_name = author else: author_props = author.get('properties', {}) author_name = get_first(author_props, 'name') author_url = get_first(author_props, 'url') # if present, u-url overrides source url u_url = get_first(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += f' <br /> <a href="{source_url}">via {util.domain_from_link(source_url)}</a>' # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = f'Error: {code}: {e}; {body}' if code == '401': logger.warning(f'Disabling source due to: {e}', exc_info=True) self.source.status = 'disabled' self.source.put() self.error(msg, status=code, report=self.source.is_beta_user()) elif code == '404': # post is gone self.error(msg, status=code, report=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): self.error(msg, status=502, report=False) elif code or body: self.error(msg, status=code, report=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() return self.entity.published
def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urlparse.urldefrag(util.get_required_param(self, 'source'))[0] self.target_url = urlparse.urldefrag(util.get_required_param(self, 'target'))[0] # clean target url (strip utm_* query params) self.target_url = util.clean_webmention_url(self.target_url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error(msg, 'Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = SOURCES[source_short_name] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.AS_CLASS.NAME, domain), mail=False) # create BlogWebmention entity id = '%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert(id, source=self.source.key) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug('BlogWebmention entity: %s', self.entity.key.urlsafe()) # fetch source page resp = self.fetch_mf2(self.source_url) if not resp: return self.fetched, data = resp item = self.find_mention_item(data) if not item: return self.error('Could not find target URL %s in source page %s' % (self.target_url, self.fetched.url), data=data, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() text += ' <br /> <a href="%s">via %s</a>' % ( self.entity.source_url(), util.domain_from_link(self.entity.source_url())) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except urllib2.HTTPError, e: body = e.read() logging.error('Error response body: %r', body) return self.error('Error: %s; %s' % (e, body), status=e.code)
def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urlparse.urldefrag(util.get_required_param(self, 'source'))[0] self.target_url = urlparse.urldefrag(util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query() .filter(source_cls.domains == domain) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: # check for a rel-canonical link. Blogger uses these when it serves a post # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs # epeus.blogspot.com. # https://github.com/snarfed/bridgy/issues/805 mf2 = self.fetch_mf2(self.target_url, require_mf2=False) if not mf2: # fetch_mf2() already wrote the error response return domains = util.dedupe_urls( util.domain_from_link(url) for url in mf2[1].get('rels', {}).get('canonical', [])) if domains: self.source = (source_cls.query() .filter(source_cls.domains.IN(domains)) .filter(source_cls.features == 'webmention') .filter(source_cls.status == 'enabled') .get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) # check that the target URL path is supported target_path = urlparse.urlparse(self.target_url).path if target_path in ('', '/'): return self.error('Home page webmentions are not currently supported.', status=202) for pattern in self.source.PATH_BLACKLIST: if pattern.match(target_path): return self.error('%s webmentions are not supported for URL path: %s' % (self.source.GR_CLASS.NAME, target_path), status=202) # create BlogWebmention entity id = '%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe()) # fetch source page resp = self.fetch_mf2(self.source_url) if not resp: return self.fetched, data = resp item = self.find_mention_item(data.get('items', [])) if not item: return self.error('Could not find target URL %s in source page %s' % (self.target_url, self.fetched.url), data=data, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source due to: %s' % e, exc_info=True) self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, mail=self.source.is_beta_user()) elif code == '404': # post is gone return self.error(msg, status=code, mail=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): return self.error(msg, status=util.ERROR_HTTP_RETURN_CODE, mail=False) elif code or body: return self.error(msg, status=code, mail=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() self.response.write(json.dumps(self.entity.published))
def post(self, source_short_name): logging.info('Params: %self', self.request.params.items()) # strip fragments from source and target url self.source_url = urlparse.urldefrag( util.get_required_param(self, 'source'))[0] self.target_url = urlparse.urldefrag( util.get_required_param(self, 'target'))[0] # follow target url through any redirects, strip utm_* query params resp = util.follow_redirects(self.target_url) redirected_target_urls = [r.url for r in resp.history] self.target_url = util.clean_url(resp.url) # parse and validate target URL domain = util.domain_from_link(self.target_url) if not domain: return self.error('Could not parse target URL %s' % self.target_url) # look up source by domain source_cls = models.sources[source_short_name] domain = domain.lower() self.source = (source_cls.query().filter( source_cls.domains == domain).filter( source_cls.features == 'webmention').filter( source_cls.status == 'enabled').get()) if not self.source: return self.error( 'Could not find %s account for %s. Is it registered with Bridgy?' % (source_cls.GR_CLASS.NAME, domain)) if urlparse.urlparse(self.target_url).path in ('', '/'): return self.error( 'Home page webmentions are not currently supported.') # create BlogWebmention entity id = u'%s %s' % (self.source_url, self.target_url) self.entity = BlogWebmention.get_or_insert( id, source=self.source.key, redirected_target_urls=redirected_target_urls) if self.entity.status == 'complete': # TODO: response message saying update isn't supported self.response.write(self.entity.published) return logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe()) # fetch source page resp = self.fetch_mf2(self.source_url) if not resp: return self.fetched, data = resp item = self.find_mention_item(data) if not item: return self.error( 'Could not find target URL %s in source page %s' % (self.target_url, self.fetched.url), data=data, log_exception=False) # default author to target domain author_name = domain author_url = 'http://%s/' % domain # extract author name and URL from h-card, if any props = item['properties'] author = first_value(props, 'author') if author: if isinstance(author, basestring): author_name = author else: author_props = author.get('properties', {}) author_name = first_value(author_props, 'name') author_url = first_value(author_props, 'url') # if present, u-url overrides source url u_url = first_value(props, 'url') if u_url: self.entity.u_url = u_url # generate content content = props['content'][ 0] # find_mention_item() guaranteed this is here text = (content.get('html') or content.get('value')).strip() source_url = self.entity.source_url() text += ' <br /> <a href="%s">via %s</a>' % ( source_url, util.domain_from_link(source_url)) # write comment try: self.entity.published = self.source.create_comment( self.target_url, author_name, author_url, text) except Exception as e: code, body = util.interpret_http_exception(e) msg = 'Error: %s %s; %s' % (code, e, body) if code == '401': logging.warning('Disabling source due to: %s' % e, exc_info=True) self.source.status = 'disabled' self.source.put() return self.error(msg, status=code, mail=self.source.is_beta_user()) elif code == '404': # post is gone return self.error(msg, status=code, mail=False) elif util.is_connection_failure(e) or (code and int(code) // 100 == 5): return self.error(msg, status=util.ERROR_HTTP_RETURN_CODE, mail=False) elif code or body: return self.error(msg, status=code, mail=True) else: raise # write results to datastore self.entity.status = 'complete' self.entity.put() self.response.write(json.dumps(self.entity.published))