def test_webmention_endpoint_cache_key(self): for expected, url in ( ('W http foo.com', 'http://foo.com/x'), ('W https foo.com', 'https://foo.com/x/y'), ('W http foo.com /', 'http://foo.com'), ('W http foo.com /', 'http://foo.com/'), ): got = util.webmention_endpoint_cache_key(url) self.assertEquals(expected, got, (url, got))
def restart(self): """Moves status and targets to 'new' and adds a propagate task.""" self.status = 'new' self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error + self.failed + self.skipped) self.sent = self.error = self.failed = self.skipped = [] # clear any cached webmention endpoints memcache.delete_multi( util.webmention_endpoint_cache_key(url) for url in self.unsent) @ndb.transactional def finish(): self.put() self.add_task(transactional=True) finish()
def restart(self): """Moves status and targets to 'new' and adds a propagate task.""" self.status = 'new' self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error + self.failed + self.skipped) self.sent = self.error = self.failed = self.skipped = [] # clear any cached webmention endpoints with util.webmention_endpoint_cache_lock: for url in self.unsent: util.webmention_endpoint_cache.pop(util.webmention_endpoint_cache_key(url), None) # this datastore put and task add should be transactional, but Cloud Tasks # doesn't support that :( # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available self.put() self.add_task()
def restart(self): """Moves status and targets to 'new' and adds a propagate task.""" self.status = 'new' self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error + self.failed + self.skipped) self.sent = self.error = self.failed = self.skipped = [] # clear any cached webmention endpoints memcache.delete_multi(util.webmention_endpoint_cache_key(url) for url in self.unsent) @ndb.transactional def finish(): self.put() self.add_task(transactional=True) finish()
def post(self): entity = ndb.Key(urlsafe=util.get_required_param(self, 'key')).get() if not entity: self.abort(400, 'key not found') # start all target URLs over if entity.status == 'complete': entity.status = 'new' targets = set(entity.unsent + entity.sent + entity.skipped + entity.error + entity.failed) entity.sent = entity.skipped = entity.error = entity.failed = [] # run OPD to pick up any new SyndicatedPosts. note that we don't refetch # their h-feed, so if they've added a syndication URL since we last crawled, # retry won't make us pick it up. background in #524. if entity.key.kind() == 'Response': source = entity.source.get() for activity in [json.loads(a) for a in entity.activities_json]: originals, mentions = original_post_discovery.discover( source, activity, fetch_hfeed=False, include_redirect_sources=False) targets |= original_post_discovery.targets_for_response( json.loads(entity.response_json), originals=originals, mentions=mentions) entity.unsent = targets entity.put() # clear any cached webmention endpoints memcache.delete_multi(util.webmention_endpoint_cache_key(url) for url in targets) if entity.key.kind() == 'Response': util.add_propagate_task(entity) elif entity.key.kind() == 'BlogPost': util.add_propagate_blogpost_task(entity) else: self.abort(400, 'Unexpected key kind %s', entity.key.kind()) self.messages.add('Retrying. Refresh in a minute to see the results!') self.redirect(self.request.get('redirect_to').encode('utf-8') or entity.source.get().bridgy_url(self))
def restart(self): """Moves status and targets to 'new' and adds a propagate task.""" self.status = 'new' self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error + self.failed + self.skipped) self.sent = self.error = self.failed = self.skipped = [] # clear any cached webmention endpoints with util.webmention_endpoint_cache_lock: for url in self.unsent: util.webmention_endpoint_cache.pop(util.webmention_endpoint_cache_key(url), None) # this datastore put and task add should be transactional, but Cloud Tasks # doesn't support that :( # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available # https://github.com/googleapis/python-tasks/issues/26 # # The new "bundled services" bridge for the old App Engine APIs still # supports them, but only because that's literally on the old backends, # which seems like a dead end. # https://groups.google.com/g/google-appengine/c/22BKInlWty0/m/05ObNEdsAgAJ self.put() self.add_task()
def do_send_webmentions(self): urls = self.entity.unsent + self.entity.error + self.entity.failed unsent = set() self.entity.error = [] self.entity.failed = [] for orig_url in urls: # recheck the url here since the checks may have failed during the poll # or streaming add. url, domain, ok = util.get_webmention_target(orig_url) if ok: if len(url) <= _MAX_STRING_LENGTH: unsent.add(url) else: logging.warning('Giving up on target URL over %s chars! %s', _MAX_STRING_LENGTH, url) self.entity.failed.append(orig_url) self.entity.unsent = sorted(unsent) while self.entity.unsent: target = self.entity.unsent.pop(0) source_url = self.source_url(target) logging.info('Webmention from %s to %s', source_url, target) # see if we've cached webmention discovery for this domain. the cache # value is a string URL endpoint if discovery succeeded, a # WebmentionSend error dict if it failed (semi-)permanently, or None. cache_key = util.webmention_endpoint_cache_key(target) cached = memcache.get(cache_key) if cached: logging.info('Using cached webmention endpoint %r: %s', cache_key, cached) # send! and handle response or error error = None if isinstance(cached, dict): error = cached else: mention = send.WebmentionSend(source_url, target, endpoint=cached) logging.info('Sending...') try: if not mention.send(timeout=999, headers=util.USER_AGENT_HEADER): error = mention.error except BaseException, e: logging.warning('', exc_info=True) error = getattr(mention, 'error') if not error: error = ({'code': 'BAD_TARGET_URL', 'http_status': 499} if 'DNS lookup failed for URL:' in str(e) else {'code': 'EXCEPTION'}) if not cached: val = (error if error and error['code'] in ('NO_ENDPOINT', 'BAD_TARGET_URL') else mention.receiver_endpoint) memcache.set(cache_key, val, time=WEBMENTION_DISCOVERY_CACHE_TIME) if error is None: logging.info('Sent! %s', mention.response) self.record_source_webmention(mention) self.entity.sent.append(target) else: code = error['code'] status = error.get('http_status', 0) if (code == 'NO_ENDPOINT' or (code == 'BAD_TARGET_URL' and status == 204)): # 204 is No Content logging.info('Giving up this target. %s', error) self.entity.skipped.append(target) elif status // 100 == 4: # Give up on 4XX errors; we don't expect later retries to succeed. logging.info('Giving up this target. %s', error) self.entity.failed.append(target) else: self.fail('Error sending to endpoint: %s' % error) self.entity.error.append(target) if target in self.entity.unsent: self.entity.unsent.remove(target)
def do_send_webmentions(self): urls = self.entity.unsent + self.entity.error + self.entity.failed unsent = set() self.entity.error = [] self.entity.failed = [] for orig_url in urls: # recheck the url here since the checks may have failed during the poll # or streaming add. url, domain, ok = util.get_webmention_target(orig_url) if ok: if len(url) <= _MAX_STRING_LENGTH: unsent.add(url) else: logging.info('Giving up on target URL over %s chars! %s', _MAX_STRING_LENGTH, url) self.entity.failed.append(orig_url) self.entity.unsent = sorted(unsent) while self.entity.unsent: target = self.entity.unsent.pop(0) source_url = self.source_url(target) logging.info('Webmention from %s to %s', source_url, target) # see if we've cached webmention discovery for this domain. the cache # value is a string URL endpoint if discovery succeeded, a # WebmentionSend error dict if it failed (semi-)permanently, or None. cache_key = util.webmention_endpoint_cache_key(target) cached = util.webmention_endpoint_cache.get(cache_key) if cached: logging.info('Using cached webmention endpoint %r: %s', cache_key, cached) # send! and handle response or error error = None if isinstance(cached, dict): error = cached else: mention = send.WebmentionSend(source_url, target, endpoint=cached) headers = util.request_headers(source=self.source) logging.info('Sending...') try: if not mention.send(timeout=999, headers=headers): error = mention.error except BaseException as e: logging.info('', stack_info=True) error = getattr(mention, 'error') if not error: error = ({ 'code': 'BAD_TARGET_URL', 'http_status': 499 } if 'DNS lookup failed for URL:' in str(e) else { 'code': 'EXCEPTION' }) error_code = error['code'] if error else None if error_code != 'BAD_TARGET_URL' and not cached: val = error if error_code == 'NO_ENDPOINT' else mention.receiver_endpoint with util.webmention_endpoint_cache_lock: util.webmention_endpoint_cache[cache_key] = val if error is None: logging.info('Sent! %s', mention.response) self.record_source_webmention(mention) self.entity.sent.append(target) else: status = error.get('http_status', 0) if (error_code == 'NO_ENDPOINT' or (error_code == 'BAD_TARGET_URL' and status == 204)): # No Content logging.info('Giving up this target. %s', error) self.entity.skipped.append(target) elif status // 100 == 4: # Give up on 4XX errors; we don't expect later retries to succeed. logging.info('Giving up this target. %s', error) self.entity.failed.append(target) else: self.fail('Error sending to endpoint: %s' % error, level=logging.INFO) self.entity.error.append(target) if target in self.entity.unsent: self.entity.unsent.remove(target) if self.entity.error: logging.info('Propagate task failed') self.release('error') else: self.complete()
def do_send_webmentions(self): urls = self.entity.unsent + self.entity.error + self.entity.failed unsent = set() self.entity.error = [] self.entity.failed = [] for orig_url in urls: # recheck the url here since the checks may have failed during the poll # or streaming add. url, domain, ok = util.get_webmention_target(orig_url) if ok: if len(url) <= _MAX_STRING_LENGTH: unsent.add(url) else: logging.warning("Giving up on target URL over %s chars! %s", _MAX_STRING_LENGTH, url) self.entity.failed.append(orig_url) self.entity.unsent = sorted(unsent) while self.entity.unsent: target = self.entity.unsent.pop(0) source_url = self.source_url(target) logging.info("Webmention from %s to %s", source_url, target) # see if we've cached webmention discovery for this domain. the cache # value is a string URL endpoint if discovery succeeded, a # WebmentionSend error dict if it failed (semi-)permanently, or None. cache_key = util.webmention_endpoint_cache_key(target) cached = memcache.get(cache_key) if cached: logging.info("Using cached webmention endpoint %r: %s", cache_key, cached) # send! and handle response or error error = None if isinstance(cached, dict): error = cached else: mention = send.WebmentionSend(source_url, target, endpoint=cached) logging.info("Sending...") try: if not mention.send(timeout=999, headers=util.REQUEST_HEADERS): error = mention.error except BaseException, e: logging.warning("", exc_info=True) error = getattr(mention, "error") if not error: error = ( {"code": "BAD_TARGET_URL", "http_status": 499} if "DNS lookup failed for URL:" in str(e) else {"code": "EXCEPTION"} ) error_code = error["code"] if error else None if error_code != "BAD_TARGET_URL" and not cached: val = error if error_code == "NO_ENDPOINT" else mention.receiver_endpoint memcache.set(cache_key, val, time=WEBMENTION_DISCOVERY_CACHE_TIME) if error is None: logging.info("Sent! %s", mention.response) self.record_source_webmention(mention) self.entity.sent.append(target) else: status = error.get("http_status", 0) if error_code == "NO_ENDPOINT" or (error_code == "BAD_TARGET_URL" and status == 204): # No Content logging.info("Giving up this target. %s", error) self.entity.skipped.append(target) elif status // 100 == 4: # Give up on 4XX errors; we don't expect later retries to succeed. logging.info("Giving up this target. %s", error) self.entity.failed.append(target) else: self.fail("Error sending to endpoint: %s" % error) self.entity.error.append(target) if target in self.entity.unsent: self.entity.unsent.remove(target)
def do_send_webmentions(self): urls = self.entity.unsent + self.entity.error + self.entity.failed unsent = set() self.entity.error = [] self.entity.failed = [] for orig_url in urls: # recheck the url here since the checks may have failed during the poll # or streaming add. url, domain, ok = util.get_webmention_target(orig_url) if ok: if len(url) <= _MAX_STRING_LENGTH: unsent.add(url) else: logging.info('Giving up on target URL over %s chars! %s', _MAX_STRING_LENGTH, url) self.entity.failed.append(orig_url) self.entity.unsent = sorted(unsent) while self.entity.unsent: target = self.entity.unsent.pop(0) source_url = self.source_url(target) logging.info('Webmention from %s to %s', source_url, target) # see if we've cached webmention discovery for this domain. the cache # value is a string URL endpoint if discovery succeeded, NO_ENDPOINT if # no endpoint was ofund. cache_key = util.webmention_endpoint_cache_key(target) endpoint = util.webmention_endpoint_cache.get(cache_key) if endpoint: logging.info('Using cached webmention endpoint %r: %s', cache_key, endpoint) # send! and handle response or error try: resp = None headers = util.request_headers(source=self.source) if not endpoint: endpoint, resp = webmention.discover(target, headers=headers) with util.webmention_endpoint_cache_lock: util.webmention_endpoint_cache[ cache_key] = endpoint or NO_ENDPOINT if endpoint and endpoint != NO_ENDPOINT: logging.info('Sending...') resp = webmention.send(endpoint, source_url, target, timeout=999, headers=headers) logging.info('Sent! %s', resp) self.record_source_webmention(endpoint, target) self.entity.sent.append(target) else: logging.info('Giving up this target.') self.entity.skipped.append(target) except ValueError: logging.info('Bad URL; giving up this target.') self.entity.skipped.append(target) except BaseException as e: logging.info('', exc_info=True) # Give up on 4XX and DNS errors; we don't expect retries to succeed. code, _ = util.interpret_http_exception(e) if (code and code.startswith('4')) or 'DNS lookup failed' in str(e): logging.info('Giving up this target.') self.entity.failed.append(target) else: self.fail(f'Error sending to endpoint: {resp}', level=logging.INFO) self.entity.error.append(target) if target in self.entity.unsent: self.entity.unsent.remove(target) if self.entity.error: logging.info('Propagate task failed') self.release('error') else: self.complete()