def post(self): logging.debug('Params: %s', self.request.params) key = self.request.params['source_key'] source = ndb.Key(urlsafe=key).get() if not source or source.status == 'disabled' or 'listen' not in source.features: logging.error('Source not found or disabled. Dropping task.') return logging.info('Source: %s %s, %s', source.label(), source.key.string_id(), source.bridgy_url(self)) last_polled = self.request.params['last_polled'] if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT): logging.warning('duplicate poll task! deferring to the other task.') return now = now_fn() source.last_poll_attempt = now # randomize task ETA to within +/- 20% to try to spread out tasks and # prevent thundering herds. task_countdown = source.poll_period().total_seconds() * random.uniform(.8, 1.2) try: self.do_post(source) util.add_poll_task(source, countdown=task_countdown) except models.DisableSource: # the user deauthorized the bridgy app, so disable this source. # let the task complete successfully so that it's not retried. source.status = 'disabled' logging.warning('Disabling source!') except: source.status = 'error' raise finally: gc.collect() # might help avoid hitting the instance memory limit source.put()
def create_new(cls, user_url=None, **kwargs): """Creates and saves a new :class:`Source` and adds a poll task for it. Args: user_url: a string, optional. if provided, supersedes other urls when determining the author_url **kwargs: passed to :meth:`new()` Returns: newly created :class:`Source` """ source = cls.new(**kwargs) if source is None: return None if not source.domain_urls: # defer to the source if it already set this auth_entity = kwargs.get('auth_entity') if auth_entity and hasattr(auth_entity, 'user_json'): source.domain_urls, source.domains = source.urls_and_domains( auth_entity, user_url) logger.debug(f'URLs/domains: {source.domain_urls} {source.domains}') # check if this source already exists existing = source.key.get() if existing: # merge some fields source.features = set(source.features + existing.features) source.populate(**existing.to_dict(include=( 'created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled', 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret', 'webmention_endpoint'))) verb = 'Updated' else: verb = 'Added' author_urls = source.get_author_urls() link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0] if author_urls else 'http://indiewebify.me/#send-webmentions') feature = source.features[0] if source.features else 'listen' blurb = '%s %s. %s' % ( verb, source.label(), 'Try previewing a post from your web site!' if feature == 'publish' else '<a href="%s">Try a webmention!</a>' % link if feature == 'webmention' else "Refresh in a minute to see what we've found!") logger.info(f'{blurb} {source.bridgy_url()}') source.verify() if source.verified(): flash(blurb) source.put() if 'webmention' in source.features: superfeedr.subscribe(source) if 'listen' in source.features and source.AUTO_POLL: util.add_poll_task(source, now=True) util.add_poll_task(source) return source
def post(self): source = ndb.Key(urlsafe=util.get_required_param(self, 'key')).get() if not source: self.abort(400, 'source not found') util.add_poll_task(source, now=True) self.messages.add("Polling now. Refresh in a minute to see what's new!") self.redirect(source.bridgy_url(self))
def post(self, *path_args): self.request.headers['Content-Type'] = 'application/x-www-form-urlencoded' logging.debug('Params: %s', list(self.request.params.items())) key = self.request.params['source_key'] source = self.source = ndb.Key(urlsafe=key).get() if not source or source.status == 'disabled' or 'listen' not in source.features: logging.error('Source not found or disabled. Dropping task.') return logging.info('Source: %s %s, %s', source.label(), source.key.string_id(), source.bridgy_url(self)) last_polled = self.request.params['last_polled'] if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT): logging.warning('duplicate poll task! deferring to the other task.') return logging.info('Last poll: %s', self._last_poll_url(source)) # mark this source as polling source.updates = { 'poll_status': 'polling', 'last_poll_attempt': util.now_fn(), 'rate_limited': False, } source = models.Source.put_updates(source) source.updates = {} try: self.poll(source) except Exception as e: source.updates['poll_status'] = 'error' code, body = util.interpret_http_exception(e) if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource): # the user deauthorized the bridgy app, so disable this source. # let the task complete successfully so that it's not retried. logging.warning('Disabling source due to: %s' % e, stack_info=True) source.updates.update({ 'status': 'disabled', 'poll_status': 'ok', }) body = '%s\nLast poll: %s' % (source.bridgy_url(self), self._last_poll_url(source)) elif code in source.RATE_LIMIT_HTTP_CODES: logging.info('Rate limited. Marking as error and finishing. %s', e) source.updates['rate_limited'] = True else: raise finally: source = models.Source.put_updates(source) util.add_poll_task(source) # feeble attempt to avoid hitting the instance memory limit source = None gc.collect()
def get(self): now = datetime.datetime.now() queries = [cls.query(Source.features == 'listen', Source.status == 'enabled') for cls in models.sources.values()] for source in itertools.chain(*queries): age = now - source.last_poll_attempt if age > max(source.poll_period() * 2, datetime.timedelta(hours=2)): logging.info('%s last polled %s ago. Adding new poll task.', source.bridgy_url(self), age) util.add_poll_task(source)
def get(self): now = datetime.datetime.now() queries = [cls.query(Source.features == 'listen', Source.status.IN(('enabled', 'error'))) for cls in handlers.SOURCES.values()] for source in itertools.chain(*queries): age = now - source.last_polled if age > source.poll_period() * 2: logging.info('%s last polled %s ago. Adding new poll task.', source.bridgy_url(self), age) util.add_poll_task(source)
def post(self, *path_args): logging.debug('Params: %s', self.request.params) key = self.request.params['source_key'] source = ndb.Key(urlsafe=key).get() if not source or source.status == 'disabled' or 'listen' not in source.features: logging.error('Source not found or disabled. Dropping task.') return logging.info('Source: %s %s, %s', source.label(), source.key.string_id(), source.bridgy_url(self)) last_polled = self.request.params['last_polled'] if last_polled != source.last_polled.strftime( util.POLL_TASK_DATETIME_FORMAT): logging.warning( 'duplicate poll task! deferring to the other task.') return logging.info('Last poll: %s/log?start_time=%s&key=%s', self.request.host_url, calendar.timegm(source.last_poll_attempt.utctimetuple()), source.key.urlsafe()) # mark this source as polling source.updates = { 'poll_status': 'polling', 'last_poll_attempt': util.now_fn(), } source = models.Source.put_updates(source) source.updates = {} try: self.poll(source) except models.DisableSource: # the user deauthorized the bridgy app, so disable this source. # let the task complete successfully so that it's not retried. source.updates['status'] = 'disabled' logging.warning('Disabling source!') except: source.updates['poll_status'] = 'error' raise finally: source = models.Source.put_updates(source) # add new poll task. randomize task ETA to within +/- 20% to try to spread # out tasks and prevent thundering herds. task_countdown = source.poll_period().total_seconds() * random.uniform( .8, 1.2) util.add_poll_task(source, countdown=task_countdown) # feeble attempt to avoid hitting the instance memory limit source = None gc.collect()
def post(self, *path_args): logging.debug('Params: %s', self.request.params) key = self.request.params['source_key'] source = ndb.Key(urlsafe=key).get() if not source or source.status == 'disabled' or 'listen' not in source.features: logging.error('Source not found or disabled. Dropping task.') return logging.info('Source: %s %s, %s', source.label(), source.key.string_id(), source.bridgy_url(self)) last_polled = self.request.params['last_polled'] if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT): logging.warning('duplicate poll task! deferring to the other task.') return logging.info('Last poll: %s/log?start_time=%s&key=%s', self.request.host_url, calendar.timegm(source.last_poll_attempt.utctimetuple()), source.key.urlsafe()) # mark this source as polling source.updates = { 'poll_status': 'polling', 'last_poll_attempt': util.now_fn(), } source = models.Source.put_updates(source) source.updates = {} try: self.poll(source) except models.DisableSource: # the user deauthorized the bridgy app, so disable this source. # let the task complete successfully so that it's not retried. source.updates['status'] = 'disabled' logging.warning('Disabling source!') except: source.updates['poll_status'] = 'error' raise finally: source = models.Source.put_updates(source) # add new poll task. randomize task ETA to within +/- 20% to try to spread # out tasks and prevent thundering herds. task_countdown = source.poll_period().total_seconds() * random.uniform(.8, 1.2) util.add_poll_task(source, countdown=task_countdown) # feeble attempt to avoid hitting the instance memory limit source = None gc.collect()
def crawl_now(): source = None @ndb.transactional() def setup_refetch_hfeed(): nonlocal source source = util.load_source() source.last_hfeed_refetch = models.REFETCH_HFEED_TRIGGER source.last_feed_syndication_url = None source.put() setup_refetch_hfeed() util.add_poll_task(source, now=True) flash("Crawling now. Refresh in a minute to see what's new!") return redirect(source.bridgy_url())
def replace_poll_tasks(): """Finds sources missing their poll tasks and adds new ones.""" queries = [ cls.query(Source.features == 'listen', Source.status == 'enabled', Source.last_poll_attempt < util.now_fn() - timedelta(days=2)) for cls in models.sources.values() if cls.AUTO_POLL ] for source in itertools.chain(*queries): age = util.now_fn() - source.last_poll_attempt logger.info( f'{source.bridgy_url()} last polled {age} ago. Adding new poll task.' ) util.add_poll_task(source) return ''
def create_new(cls, handler, **kwargs): """Creates and saves a new Source and adds a poll task for it. Args: handler: the current RequestHandler **kwargs: passed to new() """ source = cls.new(handler, **kwargs) if source is None: return None feature = source.features[0] if source.features else 'listen' if not source.domain_urls: # extract domain from the URL set on the user's profile, if any auth_entity = kwargs.get('auth_entity') if auth_entity and hasattr(auth_entity, 'user_json'): url, domain, ok = source._url_and_domain(auth_entity) if feature == 'publish' and not ok: if not url: handler.messages = {'Your %s profile is missing the website field. ' 'Please add it and try again!' % cls.AS_CLASS.NAME} elif not domain: handler.messages = {'Could not parse the web site in your %s profile: ' '%s\n Please update it and try again!' % (cls.AS_CLASS.NAME, url)} else: handler.messages = {"Could not connect to the web site in your %s profile: " "%s\n Please update it and try again!" % (cls.AS_CLASS.NAME, url)} return None if ok: if not source.domain_urls: source.domain_urls = [url] if not source.domains: source.domains = [domain] # check if this source already exists existing = source.key.get() if existing: # merge some fields source.features = set(source.features + existing.features) source.populate(**existing.to_dict(include=( 'created', 'last_hfeed_fetch', 'last_poll_attempt', 'last_polled', 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret'))) verb = 'Updated' else: verb = 'Added' link = ('http://indiewebify.me/send-webmentions/?url=' + source.domain_urls[0] if source.domain_urls else 'http://indiewebify.me/#send-webmentions') blurb = '%s %s. %s' % (verb, source.label(), { 'listen': "Refresh to see what we've found!", 'publish': 'Try previewing a post from your web site!', 'webmention': '<a href="%s">Try a webmention!</a>' % link, }.get(feature, '')) logging.info('%s %s', blurb, source.bridgy_url(handler)) if not existing: util.email_me(subject=blurb, body=source.bridgy_url(handler)) source.verify() if source.verified(): handler.messages = {blurb} if 'webmention' in source.features: superfeedr.subscribe(source, handler) # TODO: ugh, *all* of this should be transactional source.put() if 'listen' in source.features: util.add_poll_task(source) return source
def post(self): self.setup_refetch_hfeed() util.add_poll_task(self.source, now=True) self.messages.add("Crawling now. Refresh in a minute to see what's new!") self.redirect(self.source.bridgy_url(self))
def post(self): self.get_source() util.add_poll_task(self.source, now=True) self.messages.add("Polling now. Refresh in a minute to see what's new!") self.redirect(self.source.bridgy_url(self))
def create_new(cls, handler, user_url=None, **kwargs): """Creates and saves a new Source and adds a poll task for it. Args: handler: the current RequestHandler user_url: a string, optional. if provided, supersedes other urls when determining the author_url **kwargs: passed to new() """ source = cls.new(handler, **kwargs) if source is None: return None new_features = source.features or ['listen'] if not source.domain_urls: # defer to the source if it already set this auth_entity = kwargs.get('auth_entity') if auth_entity and hasattr(auth_entity, 'user_json'): source.domain_urls, source.domains = source._urls_and_domains( auth_entity, user_url) logging.debug('URLs/domains: %s %s', source.domain_urls, source.domains) if ('publish' in new_features and (not source.domain_urls or not source.domains)): handler.messages = {'No valid web sites found in your %s profile. ' 'Please update it and try again!' % cls.GR_CLASS.NAME} return None # check if this source already exists existing = source.key.get() if existing: # merge some fields source.features = set(source.features + existing.features) source.populate(**existing.to_dict(include=( 'created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled', 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret'))) verb = 'Updated' else: verb = 'Added' author_urls = source.get_author_urls() link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0] if author_urls else 'http://indiewebify.me/#send-webmentions') blurb = '%s %s. %s' % (verb, source.label(), { 'listen': "Refresh in a minute to see what we've found!", 'publish': 'Try previewing a post from your web site!', 'webmention': '<a href="%s">Try a webmention!</a>' % link, }.get(new_features[0], '')) logging.info('%s %s', blurb, source.bridgy_url(handler)) # uncomment to send email notification for each new user # if not existing: # util.email_me(subject=blurb, body=source.bridgy_url(handler)) source.verify() if source.verified(): handler.messages = {blurb} if 'webmention' in source.features: superfeedr.subscribe(source, handler) # TODO: ugh, *all* of this should be transactional source.put() if 'listen' in source.features: util.add_poll_task(source, now=True) util.add_poll_task(source, countdown=source.poll_period().total_seconds()) return source
def post(self, *path_args): logging.debug('Params: %s', self.request.params) key = self.request.params['source_key'] source = ndb.Key(urlsafe=key).get() if not source or source.status == 'disabled' or 'listen' not in source.features: logging.error('Source not found or disabled. Dropping task.') return logging.info('Source: %s %s, %s', source.label(), source.key.string_id(), source.bridgy_url(self)) last_polled = self.request.params['last_polled'] if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT): logging.warning('duplicate poll task! deferring to the other task.') return logging.info('Last poll: %s', self._last_poll_url(source)) # mark this source as polling source.updates = { 'poll_status': 'polling', 'last_poll_attempt': util.now_fn(), 'rate_limited': False, } source = models.Source.put_updates(source) source.updates = {} try: self.poll(source) except Exception as e: source.updates['poll_status'] = 'error' code, body = util.interpret_http_exception(e) if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource): # the user deauthorized the bridgy app, so disable this source. # let the task complete successfully so that it's not retried. logging.warning('Disabling source due to: %s' % e, exc_info=True) source.updates.update({ 'status': 'disabled', 'poll_status': 'ok', }) body = '%s\nLast poll: %s' % (source.bridgy_url(self), self._last_poll_url(source)) elif code in source.RATE_LIMIT_HTTP_CODES: logging.info('Rate limited. Marking as error and finishing. %s', e) source.updates['rate_limited'] = True elif ((code and int(code) // 100 == 5) or code in source.TRANSIENT_ERROR_HTTP_CODES or util.is_connection_failure(e)): logging.error('API call failed. Marking as error and finishing. %s: %s\n%s', code, body, e) self.abort(util.ERROR_HTTP_RETURN_CODE) else: raise finally: source = models.Source.put_updates(source) # add new poll task. randomize task ETA to within +/- 20% to try to spread # out tasks and prevent thundering herds. task_countdown = source.poll_period().total_seconds() * random.uniform(.8, 1.2) util.add_poll_task(source, countdown=task_countdown) # feeble attempt to avoid hitting the instance memory limit source = None gc.collect()
def create_new(cls, handler, user_url=None, **kwargs): """Creates and saves a new :class:`Source` and adds a poll task for it. Args: handler: the current :class:`webapp2.RequestHandler` user_url: a string, optional. if provided, supersedes other urls when determining the author_url **kwargs: passed to :meth:`new()` """ source = cls.new(handler, **kwargs) if source is None: return None new_features = source.features or ['listen'] if not source.domain_urls: # defer to the source if it already set this auth_entity = kwargs.get('auth_entity') if auth_entity and hasattr(auth_entity, 'user_json'): source.domain_urls, source.domains = source._urls_and_domains( auth_entity, user_url) logging.debug('URLs/domains: %s %s', source.domain_urls, source.domains) if ('publish' in new_features and (not source.domain_urls or not source.domains)): handler.messages = { 'No valid web sites found in your %s profile. ' 'Please update it and try again!' % cls.GR_CLASS.NAME } return None # check if this source already exists existing = source.key.get() if existing: # merge some fields source.features = set(source.features + existing.features) source.populate(**existing.to_dict( include=('created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled', 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret'))) verb = 'Updated' else: verb = 'Added' author_urls = source.get_author_urls() link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0] if author_urls else 'http://indiewebify.me/#send-webmentions') blurb = '%s %s. %s' % ( verb, source.label(), { 'listen': "Refresh in a minute to see what we've found!", 'publish': 'Try previewing a post from your web site!', 'webmention': '<a href="%s">Try a webmention!</a>' % link, }.get(new_features[0], '')) logging.info('%s %s', blurb, source.bridgy_url(handler)) # uncomment to send email notification for each new user # if not existing: # util.email_me(subject=blurb, body=source.bridgy_url(handler)) source.verify() if source.verified(): handler.messages = {blurb} # TODO: ugh, *all* of this should be transactional source.put() if 'webmention' in source.features: superfeedr.subscribe(source, handler) if 'listen' in source.features: util.add_poll_task(source, now=True) util.add_poll_task(source, countdown=source.poll_period().total_seconds()) return source
source.updates.update({"status": "disabled", "poll_status": "ok"}) elif code in util.HTTP_RATE_LIMIT_CODES: logging.warning("Rate limited. Marking as error and finishing. %s", e) source.updates["rate_limited"] = True elif (code and int(code) / 100 == 5) or util.is_connection_failure(e): logging.error("API call failed. Marking as error and finishing. %s: %s\n%s", code, body, e) self.abort(ERROR_HTTP_RETURN_CODE) else: raise finally: source = models.Source.put_updates(source) # add new poll task. randomize task ETA to within +/- 20% to try to spread # out tasks and prevent thundering herds. task_countdown = source.poll_period().total_seconds() * random.uniform(0.8, 1.2) util.add_poll_task(source, countdown=task_countdown) # feeble attempt to avoid hitting the instance memory limit source = None gc.collect() def poll(self, source): """Actually runs the poll. Stores property names and values to update in source.updates. """ if source.last_activities_etag or source.last_activity_id: logging.debug("Using ETag %s, last activity id %s", source.last_activities_etag, source.last_activity_id) # # Step 1: fetch activities:
def poll_now(): source = util.load_source() util.add_poll_task(source, now=True) flash("Polling now. Refresh in a minute to see what's new!") return redirect(source.bridgy_url())
def post(self): source = self.auth() util.add_poll_task(source) self.output('OK')
def dispatch_request(self): source = self.auth() util.add_poll_task(source, now=True) return jsonify('OK')
source.updates['rate_limited'] = True elif ((code and int(code) / 100 == 5) or (code == '400' and isinstance(source, flickr.Flickr)) or util.is_connection_failure(e)): logging.error('API call failed. Marking as error and finishing. %s: %s\n%s', code, body, e) self.abort(util.ERROR_HTTP_RETURN_CODE) else: raise finally: source = models.Source.put_updates(source) # add new poll task. randomize task ETA to within +/- 20% to try to spread # out tasks and prevent thundering herds. task_countdown = source.poll_period().total_seconds() * random.uniform(.8, 1.2) util.add_poll_task(source, countdown=task_countdown) # feeble attempt to avoid hitting the instance memory limit source = None gc.collect() def poll(self, source): """Actually runs the poll. Stores property names and values to update in source.updates. """ if source.last_activities_etag or source.last_activity_id: logging.debug('Using ETag %s, last activity id %s', source.last_activities_etag, source.last_activity_id) #
def dispatch_request(self): logger.debug(f'Params: {list(request.values.items())}') key = request.values['source_key'] source = g.source = ndb.Key(urlsafe=key).get() if not source or source.status == 'disabled' or 'listen' not in source.features: logger.error('Source not found or disabled. Dropping task.') return '' logger.info( f'Source: {source.label()} {source.key_id()}, {source.bridgy_url()}' ) if source.AUTO_POLL: last_polled = request.values['last_polled'] if last_polled != source.last_polled.strftime( util.POLL_TASK_DATETIME_FORMAT): logger.warning( 'duplicate poll task! deferring to the other task.') return '' logger.info(f'Last poll: {self._last_poll_url(source)}') # mark this source as polling source.updates = { 'poll_status': 'polling', 'last_poll_attempt': util.now_fn(), 'rate_limited': False, } source = models.Source.put_updates(source) source.updates = {} try: self.poll(source) except Exception as e: source.updates['poll_status'] = 'error' code, _ = util.interpret_http_exception(e) if code in source.DISABLE_HTTP_CODES or isinstance( e, models.DisableSource): # the user deauthorized the bridgy app, so disable this source. # let the task complete successfully so that it's not retried. logger.warning(f'Disabling source due to: {e}', exc_info=True) source.updates.update({ 'status': 'disabled', 'poll_status': 'ok', }) elif code in source.RATE_LIMIT_HTTP_CODES: logger.info( f'Rate limited. Marking as error and finishing. {e}') source.updates['rate_limited'] = True else: raise finally: source = models.Source.put_updates(source) if source.AUTO_POLL: util.add_poll_task(source) # feeble attempt to avoid hitting the instance memory limit source = None gc.collect() return 'OK'