예제 #1
0
  def fetch_mf2(self, url):
    """Fetches a URL and extracts its mf2 data.

    Side effects: sets self.entity.html on success, calls self.error() on
    errors.

    Args:
      url: string

    Returns:
      (requests.Response, mf2 data dict) on success, None on failure
    """
    try:
      fetched = util.requests_get(url)
      fetched.raise_for_status()
    except BaseException as e:
      util.interpret_http_exception(e)  # log exception
      return self.error('Could not fetch source URL %s' % url)

    if self.entity:
      self.entity.html = fetched.text

    # .text is decoded unicode string, .content is raw bytes. if the HTTP
    # headers didn't specify a charset, pass raw bytes to BeautifulSoup so it
    # can look for a <meta> tag with a charset and decode.
    text = (fetched.text if 'charset' in fetched.headers.get('content-type', '')
            else fetched.content)
    doc = BeautifulSoup(text)

    # special case tumblr's markup: div#content > div.post > div.copy
    # convert to mf2.
    contents = doc.find_all(id='content')
    if contents:
      post = contents[0].find_next(class_='post')
      if post:
        post['class'] = 'h-entry'
        copy = post.find_next(class_='copy')
        if copy:
          copy['class'] = 'e-content'
        photo = post.find_next(class_='photo-wrapper')
        if photo:
          img = photo.find_next('img')
          if img:
            img['class'] = 'u-photo'
        doc = unicode(post)

    # parse microformats, convert to ActivityStreams
    data = parser.Parser(doc=doc, url=fetched.url).to_dict()
    logging.debug('Parsed microformats2: %s', json.dumps(data, indent=2))
    items = data.get('items', [])
    if not items or not items[0]:
      return self.error('No microformats2 data found in ' + fetched.url,
                        data=data, html="""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""" % (fetched.url, util.pretty_link(fetched.url)))

    return fetched, data
예제 #2
0
    def get(self, type, source_short_name, string_id, *ids):
        source_cls = SOURCES.get(source_short_name)
        if not source_cls:
            self.abort(400, "Source type '%s' not found. Known sources: %s" % (source_short_name, SOURCES))

        self.source = source_cls.get_by_id(string_id)
        if not self.source:
            self.abort(400, "%s %s not found" % (source_short_name, string_id))

        format = self.request.get("format", "html")
        if format not in ("html", "json"):
            self.abort(400, "Invalid format %s, expected html or json" % format)

        for id in ids:
            if not self.VALID_ID.match(id):
                self.abort(404, "Invalid id %s" % id)

        label = "%s:%s %s %s" % (source_short_name, string_id, type, ids)
        logging.info("Fetching %s", label)
        try:
            obj = self.get_item(*ids)
        except Exception, e:
            # pass through all API HTTP errors if we can identify them
            code, body = util.interpret_http_exception(e)
            if code:
                self.response.status_int = int(code)
                self.response.headers["Content-Type"] = "text/plain"
                self.response.write("%s error:\n%s" % (self.source.AS_CLASS.NAME, body))
                return
            else:
                raise
예제 #3
0
    def get(self):
        sources = {source.key.id(): source for source in Twitter.query()}
        if not sources:
            return

        # just auth as me or the first user. TODO: use app-only auth instead.
        auther = sources.get('schnarfed') or list(sources.values())[0]
        usernames = list(sources.keys())
        users = []
        for i in range(0, len(usernames), TWITTER_USERS_PER_LOOKUP):
            username_batch = usernames[i:i + TWITTER_USERS_PER_LOOKUP]
            url = TWITTER_API_USER_LOOKUP % ','.join(username_batch)
            try:
                users += auther.gr_source.urlopen(url)
            except Exception as e:
                code, body = util.interpret_http_exception(e)
                if not (code == '404' and len(username_batch) == 1):
                    # 404 for a single user means they deleted their account. otherwise...
                    raise

        updated = False
        for user in users:
            source = sources.get(user['screen_name'])
            if source:
                new_actor = auther.gr_source.user_to_actor(user)
                updated = maybe_update_picture(source, new_actor, self)

        if updated:
            util.CachedPage.invalidate('/users')
예제 #4
0
  def get_activities_response(self, **kwargs):
    type = self.auth_entity.get().type
    kwargs.setdefault('fetch_events', True)
    kwargs.setdefault('fetch_news', type == 'user')
    kwargs.setdefault('event_owner_id', self.key.id())

    # temporary workaround for http://github.com/snarfed/bridgy/issues/689
    if self.key.id() == '10207093222641618':
      kwargs['count'] = 38

    try:
      activities = super(FacebookPage, self).get_activities_response(**kwargs)
    except urllib2.HTTPError as e:
      code, body = util.interpret_http_exception(e)
      # use a function so any new exceptions (JSON decoding, missing keys) don't
      # clobber the original exception so we can re-raise it below.
      def dead_token():
        try:
          err = json.loads(body)['error']
          return (err.get('code') in DEAD_TOKEN_ERROR_CODES or
                  err.get('error_subcode') in DEAD_TOKEN_ERROR_SUBCODES or
                  err.get('message') in DEAD_TOKEN_ERROR_MESSAGES)
        except:
          logging.exception("Couldn't determine whether token is still valid")
          return False

      if code == '401':
        if not dead_token() and type == 'user':
          # ask the user to reauthenticate. if this API call fails, it will raise
          # urllib2.HTTPError instead of DisableSource, so that we don't disable
          # the source without notifying.
          #
          # TODO: for pages, fetch the owners/admins and notify them.
          self.gr_source.create_notification(
            self.key.id(),
            "Brid.gy's access to your account has expired. Click here to renew it now!",
            'https://brid.gy/facebook/start')
        raise models.DisableSource()

      raise

    # update the resolved_object_ids and post_publics caches
    def parsed_post_id(id):
      parsed = gr_facebook.Facebook.parse_id(id)
      return parsed.post if parsed.post else id

    resolved = self._load_cache('resolved_object_ids')
    for activity in activities['items']:
      obj = activity.get('object', {})
      obj_id = parsed_post_id(obj.get('fb_id'))
      ids = obj.get('fb_object_for_ids')
      if obj_id and ids:
        resolved[obj_id] = obj_id
        for id in ids:
          resolved[parsed_post_id(id)] = obj_id

    for activity in activities['items']:
      self.is_activity_public(activity)

    return activities
예제 #5
0
파일: handlers.py 프로젝트: snarfed/webutil
def handle_exception(self, e, debug):
  """A webapp2 exception handler that propagates HTTP exceptions into the response.

  Use this as a :meth:`webapp2.RequestHandler.handle_exception()` method by
  adding this line to your handler class definition::

    handle_exception = handlers.handle_exception

  I originally tried to put this in a :class:`webapp2.RequestHandler` subclass,
  but it gave me this exception::

    File ".../webapp2-2.5.1/webapp2_extras/local.py", line 136, in _get_current_object
      raise RuntimeError('no object bound to %s' % self.__name__) RuntimeError: no object bound to app

  These are probably related:

  * http://eemyop.blogspot.com/2013/05/digging-around-in-webapp2-finding-out.html
  * http://code.google.com/p/webapp-improved/source/detail?r=d962ac4625ce3c43a3e59fd7fc07daf8d7b7c46a

  """
  code, body = util.interpret_http_exception(e)
  if code:
    self.response.set_status(int(code))
    self.response.write('HTTP Error %s: %s' % (code, body))
  elif util.is_connection_failure(e):
    self.response.set_status(502)
    self.response.write('Upstream server request failed: %s' % e)
  else:
    raise
예제 #6
0
파일: facebook.py 프로젝트: kylewm/bridgy
    def get_activities_response(self, **kwargs):
        kwargs.setdefault("fetch_events", True)
        kwargs.setdefault("event_owner_id", self.key.id())

        try:
            return super(FacebookPage, self).get_activities_response(**kwargs)
        except urllib2.HTTPError as e:
            code, body = util.interpret_http_exception(e)
            # use a function so any new exceptions (JSON decoding, missing keys) don't
            # clobber the original exception so we can re-raise it below.
            def dead_token():
                try:
                    err = json.loads(body)["error"]
                    return err["code"] in DEAD_TOKEN_ERROR_CODES or err["error_subcode"] in DEAD_TOKEN_ERROR_SUBCODES
                except:
                    return False

            if code == "401":
                if not dead_token():
                    # ask the user to reauthenticate. if this API call fails, it will raise
                    # urllib2.HTTPError instead of DisableSource, so that we don't disable
                    # the source without notifying.
                    self.gr_source.create_notification(
                        self.key.id(),
                        "Brid.gy's access to your account has expired. Click here to renew it now!",
                        "https://brid.gy/facebook/start",
                    )
                raise models.DisableSource()

            raise
예제 #7
0
파일: facebook.py 프로젝트: dougbeal/bridgy
    def get_activities_response(self, **kwargs):
        type = self.auth_entity.get().type
        kwargs.setdefault('fetch_events', True)
        kwargs.setdefault('fetch_news', type == 'user')
        kwargs.setdefault('event_owner_id', self.key.id())

        try:
            activities = super(FacebookPage,
                               self).get_activities_response(**kwargs)
        except urllib2.HTTPError as e:
            code, body = util.interpret_http_exception(e)

            # use a function so any new exceptions (JSON decoding, missing keys) don't
            # clobber the original exception so we can re-raise it below.
            def dead_token():
                try:
                    err = json.loads(body)['error']
                    return (err.get('code') in DEAD_TOKEN_ERROR_CODES
                            or err.get('error_subcode')
                            in DEAD_TOKEN_ERROR_SUBCODES
                            or err.get('message') in DEAD_TOKEN_ERROR_MESSAGES)
                except:
                    logging.exception(
                        "Couldn't determine whether token is still valid")
                    return False

            if code == '401':
                if not dead_token() and type == 'user':
                    # ask the user to reauthenticate. if this API call fails, it will raise
                    # urllib2.HTTPError instead of DisableSource, so that we don't disable
                    # the source without notifying.
                    #
                    # TODO: for pages, fetch the owners/admins and notify them.
                    self.gr_source.create_notification(
                        self.key.id(),
                        "Bridgy's access to your account has expired. Click here to renew it now!",
                        'https://brid.gy/facebook/start')
                raise models.DisableSource()

            raise

        # update the resolved_object_ids and post_publics caches
        def parsed_post_id(id):
            parsed = gr_facebook.Facebook.parse_id(id)
            return parsed.post if parsed.post else id

        resolved = self._load_cache('resolved_object_ids')
        for activity in activities['items']:
            obj = activity.get('object', {})
            obj_id = parsed_post_id(obj.get('fb_id'))
            ids = obj.get('fb_object_for_ids')
            if obj_id and ids:
                resolved[obj_id] = obj_id
                for id in ids:
                    resolved[parsed_post_id(id)] = obj_id

        for activity in activities['items']:
            self.is_activity_public(activity)

        return activities
예제 #8
0
파일: tasks.py 프로젝트: priscila225/bridgy
  def poll(self, source):
    """Actually runs the poll.

    Returns: dict of source property names and values to update (transactionally)
    """
    if source.last_activities_etag or source.last_activity_id:
      logging.debug('Using ETag %s, last activity id %s',
                    source.last_activities_etag, source.last_activity_id)
    source_updates = {}

    #
    # Step 1: fetch activities
    #
    cache = util.CacheDict()
    if source.last_activities_cache_json:
      cache.update(json.loads(source.last_activities_cache_json))

    try:
      response = source.get_activities_response(
        fetch_replies=True, fetch_likes=True, fetch_shares=True, count=50,
        etag=source.last_activities_etag, min_id=source.last_activity_id,
        cache=cache)
    except Exception, e:
      code, body = util.interpret_http_exception(e)
      if code == '401':
        msg = 'Unauthorized error: %s' % e
        logging.warning(msg, exc_info=True)
        raise models.DisableSource(msg)
      elif code in util.HTTP_RATE_LIMIT_CODES:
        logging.warning('Rate limited. Marking as error and finishing. %s', e)
        source_updates.update({'status': 'error', 'rate_limited': True})
        return source_updates
      else:
        raise
예제 #9
0
파일: cron.py 프로젝트: sheyril/bridgy
    def get(self):
        updated = False
        for source in self.source_query():
            if source.features and source.status != 'disabled':
                logging.debug('checking for updated profile pictures for: %s',
                              source.bridgy_url(self))
                try:
                    actor = source.gr_source.get_actor(self.user_id(source))
                except requests.HTTPError as e:
                    # Mastodon API returns HTTP 404 for deleted (etc) users
                    util.interpret_http_exception(e)
                    continue
                updated = maybe_update_picture(source, actor, self)

        if updated:
            util.CachedPage.invalidate('/users')
예제 #10
0
파일: app.py 프로젝트: snarfed/bridgy
  def post(self):
    source = self.load_source(param='key')
    module = self.OAUTH_MODULES[source.key.kind()]
    feature = util.get_required_param(self, 'feature')
    state = util.encode_oauth_state({
      'operation': 'delete',
      'feature': feature,
      'source': source.key.urlsafe(),
      'callback': self.request.get('callback'),
    })

    # Blogger don't support redirect_url() yet
    if module is oauth_blogger_v2:
      return self.redirect('/blogger/delete/start?state=%s' % state)

    path = ('/instagram/callback' if module is indieauth
            else '/wordpress/add' if module is oauth_wordpress_rest
            else '/%s/delete/finish' % source.SHORT_NAME)
    kwargs = {}
    if module is oauth_twitter:
      kwargs['access_type'] = 'read' if feature == 'listen' else 'write'

    handler = module.StartHandler.to(path, **kwargs)(self.request, self.response)
    try:
      self.redirect(handler.redirect_url(state=state))
    except Exception as e:
      code, body = util.interpret_http_exception(e)
      if not code and util.is_connection_failure(e):
        code = '-'
        body = unicode(e)
      if code:
        self.messages.add('%s API error %s: %s' % (source.GR_CLASS.NAME, code, body))
        self.redirect(source.bridgy_url(self))
      else:
        raise
예제 #11
0
파일: tasks.py 프로젝트: dougbeal/bridgy
  def post(self):
    logging.debug('Params: %s', self.request.params)

    key = util.get_required_param(self, 'source_key')
    source = ndb.Key(urlsafe=key).get()
    if not source or source.status == 'disabled' or 'listen' not in source.features:
      logging.error('Source not found or disabled. Dropping task.')
      return
    logging.info('Source: %s %s, %s', source.label(), source.key.string_id(),
                 source.bridgy_url(self))

    post_id = util.get_required_param(self, 'post_id')
    source.updates = {}

    try:
      activities = source.get_activities(
        fetch_replies=True, fetch_likes=True, fetch_shares=True,
        activity_id=post_id, user_id=source.key.id())
      if not activities:
        logging.info('Post %s not found.', post_id)
        return
      assert len(activities) == 1
      self.backfeed(source, activities={activities[0]['id']: activities[0]})
    except Exception, e:
      code, body = util.interpret_http_exception(e)
      if (code and (code in util.HTTP_RATE_LIMIT_CODES or code == '400' or
                    int(code) / 100 == 5)
            or util.is_connection_failure(e)):
        logging.error('API call failed; giving up. %s: %s\n%s', code, body, e)
        self.abort(util.ERROR_HTTP_RETURN_CODE)
      else:
        raise
예제 #12
0
  def get(self, type, source_short_name, string_id, *ids):
    source_cls = models.sources.get(source_short_name)
    if not source_cls:
      self.abort(400, "Source type '%s' not found. Known sources: %s" %
                 (source_short_name, models.sources))

    self.source = source_cls.get_by_id(string_id)
    if not self.source:
      self.abort(400, '%s %s not found' % (source_short_name, string_id))

    format = self.request.get('format', 'html')
    if format not in ('html', 'json'):
      self.abort(400, 'Invalid format %s, expected html or json' % format)

    for id in ids:
      if not self.VALID_ID.match(id):
        self.abort(404, 'Invalid id %s' % id)

    label = '%s:%s %s %s' % (source_short_name, string_id, type, ids)
    logging.info('Fetching %s', label)
    try:
      obj = self.get_item(*ids)
    except Exception, e:
      # pass through all API HTTP errors if we can identify them
      code, body = util.interpret_http_exception(e)
      if code:
        self.response.status_int = int(code)
        self.response.headers['Content-Type'] = 'text/plain'
        self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body))
        return
      else:
        raise
예제 #13
0
  def do_post(self, source):
    if source.last_activities_etag or source.last_activity_id:
      logging.debug('Using ETag %s, last activity id %s',
                    source.last_activities_etag, source.last_activity_id)

    #
    # Step 1: fetch activities
    #
    try:
      response = source.get_activities_response(
        fetch_replies=True, fetch_likes=True, fetch_shares=True, count=50,
        etag=source.last_activities_etag, min_id=source.last_activity_id,
        cache=memcache)
    except Exception, e:
      code, body = util.interpret_http_exception(e)
      if code == '401':
        # TODO: also interpret oauth2client.AccessTokenRefreshError with
        # {'error': 'invalid_grant'} as disabled? it can mean the user revoked
        # access. it can also mean the token expired, or they deleted their
        # account, or even other things.
        # http://code.google.com/p/google-api-python-client/issues/detail?id=187#c1
        msg = 'Unauthorized error: %s' % e
        logging.exception(msg)
        raise models.DisableSource(msg)
      elif code in util.HTTP_RATE_LIMIT_CODES:
        logging.warning('Rate limited. Marking as error and finishing. %s', e)
        source.status = 'error'
        return
      else:
        raise
예제 #14
0
  def get_activities_response(self, **kwargs):
    kwargs.setdefault('fetch_events', True)
    kwargs.setdefault('fetch_news', self.auth_entity.get().type == 'user')
    kwargs.setdefault('event_owner_id', self.key.id())

    try:
      return super(FacebookPage, self).get_activities_response(**kwargs)
    except urllib2.HTTPError as e:
      code, body = util.interpret_http_exception(e)
      # use a function so any new exceptions (JSON decoding, missing keys) don't
      # clobber the original exception so we can re-raise it below.
      def dead_token():
        try:
          err = json.loads(body)['error']
          return (err.get('code') in DEAD_TOKEN_ERROR_CODES or
                  err.get('error_subcode') in DEAD_TOKEN_ERROR_SUBCODES or
                  err.get('message') in DEAD_TOKEN_ERROR_MESSAGES)
        except:
          logging.exception("Couldn't determine whether token is still valid")
          return False

      if code == '401':
        if not dead_token():
          # ask the user to reauthenticate. if this API call fails, it will raise
          # urllib2.HTTPError instead of DisableSource, so that we don't disable
          # the source without notifying.
          self.gr_source.create_notification(
            self.key.id(),
            "Brid.gy's access to your account has expired. Click here to renew it now!",
            'https://brid.gy/facebook/start')
        raise models.DisableSource()

      raise
예제 #15
0
파일: app.py 프로젝트: sheyril/bridgy
  def post(self):
    source = self.load_source(param='key')
    kind = source.key.kind()
    feature = util.get_required_param(self, 'feature')
    state = util.encode_oauth_state({
      'operation': 'delete',
      'feature': feature,
      'source': source.key.urlsafe().decode(),
      'callback': self.request.get('callback'),
    })

    # Blogger don't support redirect_url() yet
    if kind == 'Blogger':
      return self.redirect('/blogger/delete/start?state=%s' % state)

    path = ('/reddit/callback' if kind == 'Reddit'
            else '/wordpress/add' if kind == 'WordPress'
            else '/%s/delete/finish' % source.SHORT_NAME)
    kwargs = {}
    if kind == 'Twitter':
      kwargs['access_type'] = 'read' if feature == 'listen' else 'write'

    handler = source.OAUTH_START_HANDLER.to(path, **kwargs)(self.request, self.response)
    try:
      self.redirect(handler.redirect_url(state=state))
    except Exception as e:
      code, body = util.interpret_http_exception(e)
      if not code and util.is_connection_failure(e):
        code = '-'
        body = str(e)
      if code:
        self.messages.add('%s API error %s: %s' % (source.GR_CLASS.NAME, code, body))
        self.redirect(source.bridgy_url(self))
      else:
        raise
예제 #16
0
    def create_comment(self, post_url, author_name, author_url, content):
        """Creates a new comment in the source silo.

    If the last part of the post URL is numeric, e.g. http://site/post/123999,
    it's used as the post id. Otherwise, we extract the last part of
    the path as the slug, e.g. http: / / site / post / the-slug,
    and look up the post id via the API.

    Args:
      post_url: string
      author_name: string
      author_url: string
      content: string

    Returns:
      JSON response dict with 'id' and other fields
    """
        auth_entity = self.auth_entity.get()
        logging.info('Determining WordPress.com post id for %s', post_url)

        # extract the post's slug and look up its post id
        path = urllib.parse.urlparse(post_url).path
        if path.endswith('/'):
            path = path[:-1]
        slug = path.split('/')[-1]
        try:
            post_id = int(slug)
        except ValueError:
            logging.info('Looking up post id for slug %s', slug)
            url = API_POST_SLUG_URL % (auth_entity.blog_id, slug)
            post_id = self.urlopen(auth_entity, url).get('ID')
            if not post_id:
                return self.error('Could not find post id', report=False)

        logging.info('Post id is %d', post_id)

        # create the comment
        url = API_CREATE_COMMENT_URL % (auth_entity.blog_id, post_id)
        content = '<a href="%s">%s</a>: %s' % (author_url, author_name,
                                               content)
        data = {'content': content.encode()}
        try:
            resp = self.urlopen(auth_entity,
                                url,
                                data=urllib.parse.urlencode(data))
        except urllib.error.HTTPError as e:
            code, body = util.interpret_http_exception(e)
            try:
                parsed = json_loads(body) if body else {}
                if ((code == '400' and parsed.get('error') == 'invalid_input')
                        or (code == '403' and parsed.get('message')
                            == 'Comments on this post are closed')):
                    return parsed  # known error: https://github.com/snarfed/bridgy/issues/161
            except ValueError:
                pass  # fall through
            raise e

        resp['id'] = resp.pop('ID', None)
        return resp
예제 #17
0
 def post(self):
   try:
     self.redirect(self.redirect_url(state=util.get_required_param(self, 'token')))
   except Exception as e:
     if util.is_connection_failure(e) or util.interpret_http_exception(e)[0]:
       self.messages.add("Couldn't fetch your web site: %s" % e)
       return self.redirect('/')
     raise
예제 #18
0
파일: handlers.py 프로젝트: frankk00/bridgy
  def get_post(self, id):
    """Fetch a post.

    Args:
      id: string, site-specific post id
      is_event: bool

    Returns: ActivityStreams object dict
    """
    try:
      posts = self.source.get_activities(
          activity_id=id, user_id=self.source.key.id())
      if posts:
        return posts[0]
      logging.warning('Source post %s not found', id)
    except Exception as e:
      util.interpret_http_exception(e)
예제 #19
0
    def finish(self, auth_entity, state=None):
        if auth_entity:
            user_json = json.loads(auth_entity.user_json)

            # find instagram profile URL
            urls = user_json.get('rel-me', [])
            logging.info('rel-mes: %s', urls)
            for url in util.trim_nulls(urls):
                if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN:
                    username = urllib.parse.urlparse(url).path.strip('/')
                    break
            else:
                self.messages.add(
                    'No Instagram profile found. Please <a href="https://indieauth.com/setup">add an Instagram rel-me link</a>, then try again.'
                )
                return self.redirect('/')

            # check that instagram profile links to web site
            try:
                actor = gr_instagram.Instagram(scrape=True).get_actor(
                    username, ignore_rate_limit=True)
            except Exception as e:
                code, _ = util.interpret_http_exception(e)
                if code in Instagram.RATE_LIMIT_HTTP_CODES:
                    self.messages.add(
                        '<a href="https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427">Apologies, Instagram is temporarily blocking us.</a> Please try again later!'
                    )
                    return self.redirect('/')
                else:
                    raise

            if not actor:
                self.messages.add(
                    "Couldn't find Instagram user '%s'. Please check your site's rel-me link and your Instagram account."
                    % username)
                return self.redirect('/')

            canonicalize = util.UrlCanonicalizer(redirects=False)
            website = canonicalize(auth_entity.key.id())
            urls = [canonicalize(u) for u in microformats2.object_urls(actor)]
            logging.info('Looking for %s in %s', website, urls)
            if website not in urls:
                self.messages.add(
                    "Please add %s to your Instagram profile's website or bio field and try again."
                    % website)
                return self.redirect('/')

            # check that the instagram account is public
            if not gr_source.Source.is_public(actor):
                self.messages.add(
                    'Your Instagram account is private. Bridgy only supports public accounts.'
                )
                return self.redirect('/')

        self.maybe_add_or_delete_source(Instagram,
                                        auth_entity,
                                        state,
                                        actor=actor)
예제 #20
0
파일: tasks.py 프로젝트: mblaney/bridgy
  def post(self, *path_args):
    logging.debug('Params: %s', self.request.params)

    key = self.request.params['source_key']
    source = ndb.Key(urlsafe=key).get()
    if not source or source.status == 'disabled' or 'listen' not in source.features:
      logging.error('Source not found or disabled. Dropping task.')
      return
    logging.info('Source: %s %s, %s', source.label(), source.key.string_id(),
                 source.bridgy_url(self))

    last_polled = self.request.params['last_polled']
    if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT):
      logging.warning('duplicate poll task! deferring to the other task.')
      return

    logging.info('Last poll: %s', self._last_poll_url(source))

    # mark this source as polling
    source.updates = {
      'poll_status': 'polling',
      'last_poll_attempt': util.now_fn(),
      'rate_limited': False,
    }
    source = models.Source.put_updates(source)

    source.updates = {}
    try:
      self.poll(source)
    except Exception, e:
      source.updates['poll_status'] = 'error'
      code, body = util.interpret_http_exception(e)
      if code == '401' or isinstance(e, models.DisableSource):
        # the user deauthorized the bridgy app, so disable this source.
        # let the task complete successfully so that it's not retried.
        logging.warning('Disabling source due to: %s' % e, exc_info=True)
        source.updates.update({
          'status': 'disabled',
          'poll_status': 'ok',
        })
        body = '%s\nLast poll: %s' % (source.bridgy_url(self),
                                      self._last_poll_url(source))
        if source.is_beta_user():
          util.email_me(subject='Bridgy: disabled %s' % source.label(), body=body)

      elif code in util.HTTP_RATE_LIMIT_CODES:
        logging.info('Rate limited. Marking as error and finishing. %s', e)
        source.updates['rate_limited'] = True
      elif ((code and int(code) / 100 == 5) or
            (code == '400' and isinstance(source, flickr.Flickr)) or
            util.is_connection_failure(e)):
        logging.error('API call failed. Marking as error and finishing. %s: %s\n%s',
                      code, body, e)
        self.abort(util.ERROR_HTTP_RETURN_CODE)
      else:
        raise
예제 #21
0
파일: tasks.py 프로젝트: jamietanna/bridgy
  def post(self, *path_args):
    self.request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
    logging.debug('Params: %s', list(self.request.params.items()))

    key = self.request.params['source_key']
    source = self.source = ndb.Key(urlsafe=key).get()
    if not source or source.status == 'disabled' or 'listen' not in source.features:
      logging.error('Source not found or disabled. Dropping task.')
      return
    logging.info('Source: %s %s, %s', source.label(), source.key.string_id(),
                 source.bridgy_url(self))

    last_polled = self.request.params['last_polled']
    if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT):
      logging.warning('duplicate poll task! deferring to the other task.')
      return

    logging.info('Last poll: %s', self._last_poll_url(source))

    # mark this source as polling
    source.updates = {
      'poll_status': 'polling',
      'last_poll_attempt': util.now_fn(),
      'rate_limited': False,
    }
    source = models.Source.put_updates(source)

    source.updates = {}
    try:
      self.poll(source)
    except Exception as e:
      source.updates['poll_status'] = 'error'
      code, body = util.interpret_http_exception(e)
      if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
        # the user deauthorized the bridgy app, so disable this source.
        # let the task complete successfully so that it's not retried.
        logging.warning('Disabling source due to: %s' % e, stack_info=True)
        source.updates.update({
          'status': 'disabled',
          'poll_status': 'ok',
        })
        body = '%s\nLast poll: %s' % (source.bridgy_url(self),
                                      self._last_poll_url(source))
      elif code in source.RATE_LIMIT_HTTP_CODES:
        logging.info('Rate limited. Marking as error and finishing. %s', e)
        source.updates['rate_limited'] = True
      else:
        raise
    finally:
      source = models.Source.put_updates(source)

    util.add_poll_task(source)

    # feeble attempt to avoid hitting the instance memory limit
    source = None
    gc.collect()
예제 #22
0
파일: tasks.py 프로젝트: murindwaz/bridgy
  def post(self, *path_args):
    logging.debug('Params: %s', self.request.params)

    key = self.request.params['source_key']
    source = ndb.Key(urlsafe=key).get()
    if not source or source.status == 'disabled' or 'listen' not in source.features:
      logging.error('Source not found or disabled. Dropping task.')
      return
    logging.info('Source: %s %s, %s', source.label(), source.key.string_id(),
                 source.bridgy_url(self))

    last_polled = self.request.params['last_polled']
    if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT):
      logging.warning('duplicate poll task! deferring to the other task.')
      return

    logging.info('Last poll: %s', self._last_poll_url(source))

    # mark this source as polling
    source.updates = {
      'poll_status': 'polling',
      'last_poll_attempt': util.now_fn(),
      'rate_limited': False,
    }
    source = models.Source.put_updates(source)

    source.updates = {}
    try:
      self.poll(source)
    except Exception, e:
      source.updates['poll_status'] = 'error'
      code, body = util.interpret_http_exception(e)
      if code in source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
        # the user deauthorized the bridgy app, so disable this source.
        # let the task complete successfully so that it's not retried.
        logging.warning('Disabling source due to: %s' % e, exc_info=True)
        source.updates.update({
          'status': 'disabled',
          'poll_status': 'ok',
        })
        body = '%s\nLast poll: %s' % (source.bridgy_url(self),
                                      self._last_poll_url(source))
        if source.is_beta_user():
          util.email_me(subject='Bridgy: disabled %s' % source.label(), body=body)

      elif code in source.RATE_LIMIT_HTTP_CODES:
        logging.info('Rate limited. Marking as error and finishing. %s', e)
        source.updates['rate_limited'] = True
      elif ((code and int(code) / 100 == 5) or
            (code == '400' and isinstance(source, flickr.Flickr)) or
            util.is_connection_failure(e)):
        logging.error('API call failed. Marking as error and finishing. %s: %s\n%s',
                      code, body, e)
        self.abort(util.ERROR_HTTP_RETURN_CODE)
      else:
        raise
예제 #23
0
파일: handlers.py 프로젝트: stedn/bridgy
  def get_post(self, id, **kwargs):
    """Fetch a post.

    Args:
      id: string, site-specific post id
      is_event: bool
      kwargs: passed through to :meth:`get_activities`

    Returns:
      ActivityStreams object dict
    """
    try:
      posts = self.source.get_activities(
          activity_id=id, user_id=self.source.key_id(), **kwargs)
      if posts:
        return posts[0]
      logging.warning('Source post %s not found', id)
    except Exception as e:
      util.interpret_http_exception(e)
예제 #24
0
파일: instagram.py 프로젝트: swamim/bridgy
  def post(self):
    ia_start = util.oauth_starter(indieauth.StartHandler).to('/instagram/callback')(
      self.request, self.response)

    try:
      self.redirect(ia_start.redirect_url(me=util.get_required_param(self, 'user_url')))
    except Exception as e:
      if util.is_connection_failure(e) or util.interpret_http_exception(e)[0]:
        self.messages.add("Couldn't fetch your web site: %s" % e)
        return self.redirect('/')
      raise
예제 #25
0
파일: instagram.py 프로젝트: snarfed/bridgy
  def post(self):
    ia_start = util.oauth_starter(indieauth.StartHandler).to('/instagram/callback')(
      self.request, self.response)

    try:
      self.redirect(ia_start.redirect_url(me=util.get_required_param(self, 'user_url')))
    except Exception as e:
      if util.is_connection_failure(e) or util.interpret_http_exception(e)[0]:
        self.messages.add("Couldn't fetch your web site: %s" % e)
        return self.redirect('/')
      raise
예제 #26
0
    def get(self):
        # https://cloud.google.com/appengine/docs/standard/python/ndb/admin#Metadata_queries
        kinds = [k for k in metadata.get_kinds() if not k.startswith('_')]
        kinds.remove('Response')
        kinds.remove('SyndicatedPost')
        logging.info('Backing up %s', kinds)

        access_token, _ = app_identity.get_access_token(
            'https://www.googleapis.com/auth/datastore')
        app_id = app_identity.get_application_id()

        request = {
            'project_id':
            app_id,
            'output_url_prefix': ('gs://brid-gy.appspot.com/weekly/' +
                                  datetime.datetime.now().strftime('%Y%m%d')),
            'entity_filter': {
                'kinds': kinds,
                # 'namespace_ids': self.request.get_all('namespace_id'),
            },
        }
        headers = {
            'Content-Type': 'application/json',
            'Authorization': 'Bearer ' + access_token,
        }

        try:
            result = urlfetch.fetch(
                url='https://datastore.googleapis.com/v1/projects/%s:export' %
                app_id,
                payload=json_dumps(request),
                method=urlfetch.POST,
                headers=headers)
            if result.status_code == http.client.OK:
                logging.info(result.content)
            else:
                logging.error(result.content)
                self.abort(result.status_code)
        except urlfetch.Error as e:
            util.interpret_http_exception(e)
            raise
예제 #27
0
    def create_comment(self, post_url, author_name, author_url, content):
        """Creates a new comment in the source silo.

    If the last part of the post URL is numeric, e.g. http://site/post/123999,
    it's used as the post id. Otherwise, we extract the last part of
    the path as the slug, e.g. http: / / site / post / the-slug,
    and look up the post id via the API.

    Args:
      post_url: string
      author_name: string
      author_url: string
      content: string

    Returns:
      JSON response dict with 'id' and other fields
    """
        auth_entity = self.auth_entity.get()
        logging.info("Determining WordPress.com post id for %s", post_url)

        # extract the post's slug and look up its post id
        path = urlparse.urlparse(post_url).path
        if path.endswith("/"):
            path = path[:-1]
        slug = path.split("/")[-1]
        try:
            post_id = int(slug)
        except ValueError:
            logging.info("Looking up post id for slug %s", slug)
            url = API_POST_SLUG_URL % (auth_entity.blog_id, slug)
            post_id = self.urlopen(auth_entity, url).get("ID")
            if not post_id:
                return self.error("Could not find post id")

        logging.info("Post id is %d", post_id)

        # create the comment
        url = API_CREATE_COMMENT_URL % (auth_entity.blog_id, post_id)
        content = u'<a href="%s">%s</a>: %s' % (author_url, author_name, content)
        data = {"content": content.encode("utf-8")}
        try:
            resp = self.urlopen(auth_entity, url, data=urllib.urlencode(data))
        except urllib2.HTTPError, e:
            code, body = util.interpret_http_exception(e)
            try:
                parsed = json.loads(body) if body else {}
                if (code == "400" and parsed.get("error") == "invalid_input") or (
                    code == "403" and parsed.get("message") == "Comments on this post are closed"
                ):
                    return parsed  # known error: https://github.com/snarfed/bridgy/issues/161
            except ValueError:
                pass  # fall through
            raise e
예제 #28
0
파일: tasks.py 프로젝트: v1cker/bridgy
    def post(self):
        logging.debug('Params: %s', self.request.params)

        type = self.request.get('type')
        if type:
            assert type in ('event', )

        source = util.load_source(self)
        if not source or source.status == 'disabled' or 'listen' not in source.features:
            logging.error('Source not found or disabled. Dropping task.')
            return
        logging.info('Source: %s %s, %s', source.label(),
                     source.key.string_id(), source.bridgy_url(self))

        post_id = util.get_required_param(self, 'post_id')
        source.updates = {}

        try:
            if type == 'event':
                activities = [source.gr_source.get_event(post_id)]
            else:
                activities = source.get_activities(fetch_replies=True,
                                                   fetch_likes=True,
                                                   fetch_shares=True,
                                                   activity_id=post_id,
                                                   user_id=source.key.id())

            if not activities or not activities[0]:
                logging.info('Post %s not found.', post_id)
                return
            assert len(activities) == 1, activities
            self.backfeed(source,
                          activities={activities[0]['id']: activities[0]})

            obj = activities[0].get('object') or activities[0]
            in_reply_to = util.get_first(obj, 'inReplyTo')
            if in_reply_to:
                parsed = util.parse_tag_uri(in_reply_to.get(
                    'id', ''))  # TODO: fall back to url
                if parsed:
                    util.add_discover_task(source, parsed[1])

        except Exception, e:
            code, body = util.interpret_http_exception(e)
            if (code and (code in source.RATE_LIMIT_HTTP_CODES
                          or code in ('400', '404') or int(code) / 100 == 5)
                    or util.is_connection_failure(e)):
                logging.error('API call failed; giving up. %s: %s\n%s', code,
                              body, e)
                self.abort(util.ERROR_HTTP_RETURN_CODE)
            else:
                raise
예제 #29
0
파일: tasks.py 프로젝트: tantek/bridgy
  def poll(self, source):
    """Actually runs the poll.

    Stores property names and values to update in source.updates.
    """
    if source.last_activities_etag or source.last_activity_id:
      logging.debug('Using ETag %s, last activity id %s',
                    source.last_activities_etag, source.last_activity_id)

    #
    # Step 1: fetch activities:
    # * posts by the user
    # * search all posts for the user's domain URLs to find links
    #
    cache = util.CacheDict()
    if source.last_activities_cache_json:
      cache.update(json.loads(source.last_activities_cache_json))

    try:
      # search for links first so that the user's activities and responses
      # override them if they overlap
      links = source.search_for_links()

      # this user's own activities (and user mentions)
      resp = source.get_activities_response(
        fetch_replies=True, fetch_likes=True, fetch_shares=True,
        fetch_mentions=True, count=50, etag=source.last_activities_etag,
        min_id=source.last_activity_id, cache=cache)
      etag = resp.get('etag')  # used later
      user_activities = resp.get('items', [])

      # these map ids to AS objects
      responses = {a['id']: a for a in links}
      activities = {a['id']: a for a in links + user_activities}

    except Exception, e:
      code, body = util.interpret_http_exception(e)
      if code == '401':
        msg = 'Unauthorized error: %s' % e
        logging.warning(msg, exc_info=True)
        source.updates['poll_status'] = 'ok'
        raise models.DisableSource(msg)
      elif code in util.HTTP_RATE_LIMIT_CODES:
        logging.warning('Rate limited. Marking as error and finishing. %s', e)
        source.updates.update({'poll_status': 'error', 'rate_limited': True})
        return
      elif (code and int(code) / 100 == 5) or util.is_connection_failure(e):
        logging.error('API call failed. Marking as error and finishing. %s: %s\n%s',
                      code, body, e)
        self.abort(ERROR_HTTP_RETURN_CODE)
      else:
        raise
예제 #30
0
파일: indieauth.py 프로젝트: snarfed/bridgy
    def dispatch_request(self):
        token = request.form['token']

        try:
            to_url = self.redirect_url(state=token)
        except Exception as e:
            if util.is_connection_failure(e) or util.interpret_http_exception(
                    e)[0]:
                flash(f"Couldn't fetch your web site: {e}")
                return redirect('/')
            raise

        return redirect(to_url)
예제 #31
0
파일: cron.py 프로젝트: snarfed/bridgy
  def get(self):
    # https://cloud.google.com/appengine/docs/standard/python/ndb/admin#Metadata_queries
    kinds = [k for k in metadata.get_kinds() if not k.startswith('_')]
    kinds.remove('Response')
    kinds.remove('SyndicatedPost')
    logging.info('Backing up %s', kinds)

    access_token, _ = app_identity.get_access_token(
      'https://www.googleapis.com/auth/datastore')
    app_id = app_identity.get_application_id()

    request = {
        'project_id': app_id,
        'output_url_prefix': ('gs://brid-gy.appspot.com/weekly/' +
                              datetime.datetime.now().strftime('%Y%m%d')),
        'entity_filter': {
          'kinds': kinds,
          # 'namespace_ids': self.request.get_all('namespace_id'),
        },
    }
    headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer ' + access_token,
    }

    try:
      result = urlfetch.fetch(
          url='https://datastore.googleapis.com/v1/projects/%s:export' % app_id,
          payload=json.dumps(request),
          method=urlfetch.POST,
          headers=headers)
      if result.status_code == httplib.OK:
        logging.info(result.content)
      else:
        logging.error(result.content)
        self.abort(result.status_code)
    except urlfetch.Error as e:
      util.interpret_http_exception(e)
      raise
예제 #32
0
    def dispatch_request(self):
        g.TRANSIENT_ERROR_HTTP_CODES = (
            self.SOURCE_CLS.TRANSIENT_ERROR_HTTP_CODES +
            self.SOURCE_CLS.RATE_LIMIT_HTTP_CODES)

        query = self.SOURCE_CLS.query().order(self.SOURCE_CLS.key)
        last = LastUpdatedPicture.get_by_id(self.SOURCE_CLS.SHORT_NAME)
        if last and last.last:
            query = query.filter(self.SOURCE_CLS.key > last.last)

        results, _, more = query.fetch_page(PAGE_SIZE)
        for source in results:
            if source.features and source.status != 'disabled':
                user_id = self.user_id(source)
                logger.debug(
                    f'checking for updated profile pictures for {source.bridgy_url()} {user_id}'
                )
                try:
                    actor = source.gr_source.get_actor(user_id)
                except BaseException as e:
                    logging.debug('Failed', exc_info=True)
                    # Mastodon API returns HTTP 404 for deleted (etc) users, and
                    # often one or more users' Mastodon instances are down.
                    code, _ = util.interpret_http_exception(e)
                    if code:
                        continue
                    raise

                if not actor:
                    logger.info(f"Couldn't fetch user")
                    continue

                new_pic = actor.get('image', {}).get('url')
                if not new_pic or source.picture == new_pic:
                    logger.info(f'No new picture found')
                    continue

                @ndb.transactional()
                def update():
                    src = source.key.get()
                    src.picture = new_pic
                    src.put()

                logger.info(
                    f'Updating profile picture from {source.picture} to {new_pic}'
                )
                update()

        LastUpdatedPicture(id=self.SOURCE_CLS.SHORT_NAME,
                           last=source.key if more else None).put()
        return 'OK'
예제 #33
0
파일: tasks.py 프로젝트: mblaney/bridgy
  def post(self):
    logging.debug('Params: %s', self.request.params)

    type = self.request.get('type')
    if type:
      assert type in ('event',)

    key = util.get_required_param(self, 'source_key')
    source = ndb.Key(urlsafe=key).get()
    if not source or source.status == 'disabled' or 'listen' not in source.features:
      logging.error('Source not found or disabled. Dropping task.')
      return
    logging.info('Source: %s %s, %s', source.label(), source.key.string_id(),
                 source.bridgy_url(self))

    post_id = util.get_required_param(self, 'post_id')
    source.updates = {}

    try:
      if type == 'event':
        activities = [source.gr_source.get_event(post_id)]
      else:
        activities = source.get_activities(
          fetch_replies=True, fetch_likes=True, fetch_shares=True,
          activity_id=post_id, user_id=source.key.id())

      if not activities or not activities[0]:
        logging.info('Post %s not found.', post_id)
        return
      assert len(activities) == 1, activities
      self.backfeed(source, activities={activities[0]['id']: activities[0]})

      in_reply_to = util.get_first(activities[0]['object'], 'inReplyTo')
      if in_reply_to:
        parsed = util.parse_tag_uri(in_reply_to.get('id', ''))  # TODO: fall back to url
        if parsed:
          util.add_discover_task(source, parsed[1])

    except Exception, e:
      code, body = util.interpret_http_exception(e)
      if (code and (code in util.HTTP_RATE_LIMIT_CODES or
                    code in ('400', '404') or
                    int(code) / 100 == 5)
            or util.is_connection_failure(e)):
        logging.error('API call failed; giving up. %s: %s\n%s', code, body, e)
        self.abort(util.ERROR_HTTP_RETURN_CODE)
      else:
        raise
예제 #34
0
    def get(self, type, source_short_name, string_id, *ids):
        source_cls = models.sources.get(source_short_name)
        if not source_cls:
            self.abort(
                400, "Source type '%s' not found. Known sources: %s" %
                (source_short_name, filter(None, models.sources.keys())))

        self.source = source_cls.get_by_id(string_id)
        if not self.source:
            self.abort(
                400, 'Source %s %s not found' % (source_short_name, string_id))

        format = self.request.get('format', 'html')
        if format not in ('html', 'json'):
            self.abort(400,
                       'Invalid format %s, expected html or json' % format)

        for id in ids:
            if not self.VALID_ID.match(id):
                self.abort(404, 'Invalid id %s' % id)

        label = '%s:%s %s %s' % (source_short_name, string_id, type, ids)
        cache_key = 'H ' + label
        obj = memcache.get(cache_key)
        if obj:
            logging.info('Using cached object for %s', label)
        else:
            logging.info('Fetching %s', label)
            try:
                obj = self.get_item(*ids)
            except Exception, e:
                # pass through all API HTTP errors if we can identify them
                code, body = util.interpret_http_exception(e)
                if not code and util.is_connection_failure(e):
                    code = 503
                    body = str(e)
                if code:
                    self.response.status_int = int(code)
                    self.response.headers['Content-Type'] = 'text/plain'
                    self.response.write('%s error:\n%s' %
                                        (self.source.GR_CLASS.NAME, body))
                    return
                else:
                    raise
            memcache.set(cache_key, obj, time=CACHE_TIME)
예제 #35
0
파일: tasks.py 프로젝트: snarfed/bridgy
    def post(self, *path_args):
        logging.debug("Params: %s", self.request.params)

        key = self.request.params["source_key"]
        source = ndb.Key(urlsafe=key).get()
        if not source or source.status == "disabled" or "listen" not in source.features:
            logging.error("Source not found or disabled. Dropping task.")
            return
        logging.info("Source: %s %s, %s", source.label(), source.key.string_id(), source.bridgy_url(self))

        last_polled = self.request.params["last_polled"]
        if last_polled != source.last_polled.strftime(util.POLL_TASK_DATETIME_FORMAT):
            logging.warning("duplicate poll task! deferring to the other task.")
            return

        logging.info(
            "Last poll: %s/log?start_time=%s&key=%s",
            self.request.host_url,
            calendar.timegm(source.last_poll_attempt.utctimetuple()),
            source.key.urlsafe(),
        )

        # mark this source as polling
        source.updates = {"poll_status": "polling", "last_poll_attempt": util.now_fn()}
        source = models.Source.put_updates(source)

        source.updates = {}
        try:
            self.poll(source)
        except Exception, e:
            source.updates["poll_status"] = "error"
            code, body = util.interpret_http_exception(e)
            if code == "401" or isinstance(e, models.DisableSource):
                # the user deauthorized the bridgy app, so disable this source.
                # let the task complete successfully so that it's not retried.
                logging.warning("Disabling source due to: %s" % e, exc_info=True)
                source.updates.update({"status": "disabled", "poll_status": "ok"})
            elif code in util.HTTP_RATE_LIMIT_CODES:
                logging.warning("Rate limited. Marking as error and finishing. %s", e)
                source.updates["rate_limited"] = True
            elif (code and int(code) / 100 == 5) or util.is_connection_failure(e):
                logging.error("API call failed. Marking as error and finishing. %s: %s\n%s", code, body, e)
                self.abort(ERROR_HTTP_RETURN_CODE)
            else:
                raise
예제 #36
0
    def post(self):
        key = ndb.Key(urlsafe=util.get_required_param(self, 'key'))
        module = self.OAUTH_MODULES[key.kind()]
        feature = util.get_required_param(self, 'feature')
        state = util.encode_oauth_state({
            'operation':
            'delete',
            'feature':
            feature,
            'source':
            key.urlsafe(),
            'callback':
            self.request.get('callback'),
        })

        # Google+ and Blogger don't support redirect_url() yet
        if module is oauth_googleplus:
            return self.redirect('/googleplus/delete/start?state=%s' % state)

        if module is oauth_blogger_v2:
            return self.redirect('/blogger/delete/start?state=%s' % state)

        source = key.get()
        path = ('/instagram/callback' if module is indieauth else
                '/wordpress/add' if module is oauth_wordpress_rest else
                '/%s/delete/finish' % source.SHORT_NAME)
        kwargs = {}
        if module is oauth_twitter:
            kwargs['access_type'] = 'read' if feature == 'listen' else 'write'

        handler = module.StartHandler.to(path, **kwargs)(self.request,
                                                         self.response)
        try:
            self.redirect(handler.redirect_url(state=state))
        except Exception as e:
            code, body = util.interpret_http_exception(e)
            if not code and util.is_connection_failure(e):
                code = '-'
                body = unicode(e)
            if code:
                self.messages.add('%s API error %s: %s' %
                                  (source.GR_CLASS.NAME, code, body))
                self.redirect(source.bridgy_url(self))
            else:
                raise
예제 #37
0
  def get_site_info(cls, auth_entity):
    """Fetches the site info from the API.

    Args:
      auth_entity: :class:`oauth_dropins.wordpress_rest.WordPressAuth`

    Returns:
      site info dict, or None if API calls are disabled for this blog
    """
    try:
      return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id)
    except urllib.error.HTTPError as e:
      code, body = util.interpret_http_exception(e)
      if (code == '403' and '"API calls to this blog have been disabled."' in body):
        flash(f'You need to <a href="http://jetpack.me/support/json-api/">enable the Jetpack JSON API</a> in {util.pretty_link(auth_entity.blog_url)}\'s WordPress admin console.')
        redirect('/')
        return None
      raise
예제 #38
0
  def get_post(self, post_id, source_fn=None):
    """Utility method fetches the original post
    Args:
      post_id: string, site-specific post id
      source_fn: optional reference to a Source method,
        defaults to Source.get_post.

    Returns: ActivityStreams object dict
    """
    try:
      post = (source_fn or self.source.get_post)(post_id)
      if not post:
        logging.warning('Source post %s not found', post_id)
      return post
    except Exception, e:
      # use interpret_http_exception to log HTTP errors
      if not util.interpret_http_exception(e)[0]:
        logging.warning(
          'Error fetching source post %s', post_id, exc_info=True)
예제 #39
0
파일: cron.py 프로젝트: mblaney/bridgy
  def get(self):
    sources = {source.key.id(): source for source in Twitter.query()}
    if not sources:
      return

    # just auth as me or the first user. TODO: use app-only auth instead.
    auther = sources.get('schnarfed') or sources.values()[0]
    usernames = sources.keys()
    users = []
    for i in range(0, len(usernames), TWITTER_USERS_PER_LOOKUP):
      username_batch = usernames[i:i + TWITTER_USERS_PER_LOOKUP]
      url = TWITTER_API_USER_LOOKUP % ','.join(username_batch)
      try:
        users += auther.gr_source.urlopen(url)
      except Exception, e:
        code, body = util.interpret_http_exception(e)
        if not (code == '404' and len(username_batch) == 1):
          # 404 for a single user means they deleted their account. otherwise...
          raise
예제 #40
0
    def get(self):
        sources = {source.key.id(): source for source in Twitter.query()}
        if not sources:
            return

        # just auth as me or the first user. TODO: use app-only auth instead.
        auther = sources.get('schnarfed') or sources.values()[0]
        usernames = sources.keys()
        users = []
        for i in range(0, len(usernames), TWITTER_USERS_PER_LOOKUP):
            username_batch = usernames[i:i + TWITTER_USERS_PER_LOOKUP]
            url = TWITTER_API_USER_LOOKUP % ','.join(username_batch)
            try:
                users += auther.gr_source.urlopen(url)
            except Exception, e:
                code, body = util.interpret_http_exception(e)
                if not (code == '404' and len(username_batch) == 1):
                    # 404 for a single user means they deleted their account. otherwise...
                    raise
예제 #41
0
  def get(self, type, source_short_name, string_id, *ids):
    source_cls = models.sources.get(source_short_name)
    if not source_cls:
      self.abort(400, "Source type '%s' not found. Known sources: %s" %
                 (source_short_name, filter(None, models.sources.keys())))

    self.source = source_cls.get_by_id(string_id)
    if not self.source:
      self.abort(400, 'Source %s %s not found' % (source_short_name, string_id))

    format = self.request.get('format', 'html')
    if format not in ('html', 'json'):
      self.abort(400, 'Invalid format %s, expected html or json' % format)

    for id in ids:
      if not self.VALID_ID.match(id):
        self.abort(404, 'Invalid id %s' % id)

    label = '%s:%s %s %s' % (source_short_name, string_id, type, ids)
    cache_key = 'H ' + label
    obj = memcache.get(cache_key)
    if obj:
      logging.info('Using cached object for %s', label)
    else:
      logging.info('Fetching %s', label)
      try:
        obj = self.get_item(*ids)
      except Exception, e:
        # pass through all API HTTP errors if we can identify them
        code, body = util.interpret_http_exception(e)
        if not code and util.is_connection_failure(e):
          code = 503
          body = str(e)
        if code:
          self.response.status_int = int(code)
          self.response.headers['Content-Type'] = 'text/plain'
          self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body))
          return
        else:
          raise
      memcache.set(cache_key, obj, time=CACHE_TIME)
예제 #42
0
  def get_site_info(cls, handler, auth_entity):
    """Fetches the site info from the API.

    Args:
      handler: the current RequestHandler
      auth_entity: oauth_dropins.wordpress.WordPressAuth

    Returns: site info dict, or None if API calls are disabled for this blog
    """
    try:
      return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id)
    except urllib2.HTTPError, e:
      code, body = util.interpret_http_exception(e)
      if (code == '403' and '"API calls to this blog have been disabled."' in body):
        handler.messages.add(
          'You need to <a href="http://jetpack.me/support/json-api/">enable '
          'the Jetpack JSON API</a> in %s\'s WordPress admin console.' %
          util.pretty_link(auth_entity.blog_url))
        handler.redirect('/')
        return None
      raise
예제 #43
0
def background_handle_exception(e):
    """Common exception handler for background tasks.

  Catches failed outbound HTTP requests and returns HTTP 304.
  """
    if isinstance(e, HTTPException):
        # raised by this app itself, pass it through
        return str(e), e.code

    transients = getattr(g, 'TRANSIENT_ERROR_HTTP_CODES', ())
    source = getattr(g, 'source', None)
    if source:
        transients += source.RATE_LIMIT_HTTP_CODES + source.TRANSIENT_ERROR_HTTP_CODES

    code, body = util.interpret_http_exception(e)
    if ((code and int(code) // 100 == 5) or code in transients
            or util.is_connection_failure(e)):
        logger.error(f'Marking as error and finishing. {code}: {body}\n{e}')
        return '', util.ERROR_HTTP_RETURN_CODE

    raise e
예제 #44
0
파일: handlers.py 프로젝트: tantek/bridgy
    def get_post(self, id, is_event=False):
        """Fetch a post.

    Args:
      id: string, site-specific post id
      is_event: bool

    Returns: ActivityStreams object dict
    """
        try:
            if is_event:
                post = self.source.gr_source.get_event(id)
            else:
                posts = self.source.get_activities(activity_id=id, user_id=self.source.key.id())
                post = posts[0] if posts else None
            if not post:
                logging.warning("Source post %s not found", id)
            return post
        except Exception, e:
            # use interpret_http_exception to log HTTP errors
            if not util.interpret_http_exception(e)[0]:
                logging.warning("Error fetching source post %s", id, exc_info=True)
예제 #45
0
  def poll(self, source):
    """Actually runs the poll.

    Returns: dict of source property names and values to update (transactionally)
    """
    if source.last_activities_etag or source.last_activity_id:
      logging.debug('Using ETag %s, last activity id %s',
                    source.last_activities_etag, source.last_activity_id)
    source_updates = {}

    #
    # Step 1: fetch activities
    #
    cache = util.CacheDict()
    if source.last_activities_cache_json:
      cache.update(json.loads(source.last_activities_cache_json))

    try:
      response = source.get_activities_response(
        fetch_replies=True, fetch_likes=True, fetch_shares=True, count=50,
        etag=source.last_activities_etag, min_id=source.last_activity_id,
        cache=cache)
    except Exception, e:
      code, body = util.interpret_http_exception(e)
      if code == '401':
        # TODO: also interpret oauth2client.AccessTokenRefreshError with
        # {'error': 'invalid_grant'} as disabled? it can mean the user revoked
        # access. it can also mean the token expired, or they deleted their
        # account, or even other things.
        # http://code.google.com/p/google-api-python-client/issues/detail?id=187#c1
        msg = 'Unauthorized error: %s' % e
        logging.warning(msg, exc_info=True)
        raise models.DisableSource(msg)
      elif code in util.HTTP_RATE_LIMIT_CODES:
        logging.warning('Rate limited. Marking as error and finishing. %s', e)
        source_updates.update({'status': 'error', 'rate_limited': True})
        return source_updates
      else:
        raise
예제 #46
0
    def get_site_info(cls, handler, auth_entity):
        """Fetches the site info from the API.

    Args:
      handler: the current RequestHandler
      auth_entity: oauth_dropins.wordpress.WordPressAuth

    Returns: site info dict, or None if API calls are disabled for this blog
    """
        try:
            return cls.urlopen(auth_entity, API_SITE_URL % auth_entity.blog_id)
        except urllib2.HTTPError, e:
            code, body = util.interpret_http_exception(e)
            if (code == '403' and
                    '"API calls to this blog have been disabled."' in body):
                handler.messages.add(
                    'You need to <a href="http://jetpack.me/support/json-api/">enable '
                    'the Jetpack JSON API</a> in %s\'s WordPress admin console.'
                    % util.pretty_link(auth_entity.blog_url))
                handler.redirect('/')
                return None
            raise
예제 #47
0
파일: pages.py 프로젝트: snarfed/bridgy
def delete_start():
    source = util.load_source()
    kind = source.key.kind()
    feature = request.form['feature']
    state = util.encode_oauth_state({
        'operation': 'delete',
        'feature': feature,
        'source': source.key.urlsafe().decode(),
        'callback': request.values.get('callback'),
    })

    # Blogger don't support redirect_url() yet
    if kind == 'Blogger':
        return redirect(f'/blogger/delete/start?state={state}')

    path = ('/reddit/callback' if kind == 'Reddit' else '/wordpress/add'
            if kind == 'WordPress' else f'/{source.SHORT_NAME}/delete/finish')
    kwargs = {}
    if kind == 'Twitter':
        kwargs['access_type'] = 'read' if feature == 'listen' else 'write'

    try:
        return redirect(source.OAUTH_START(path).redirect_url(state=state))
    except werkzeug.exceptions.HTTPException:
        # raised by us, probably via self.error()
        raise
    except Exception as e:
        code, body = util.interpret_http_exception(e)
        if not code and util.is_connection_failure(e):
            code = '-'
            body = str(e)
        if code:
            flash(f'{source.GR_CLASS.NAME} API error {code}: {body}')
            return redirect(source.bridgy_url())
        else:
            raise
예제 #48
0
    def _run(self):
        """Returns CreationResult on success, None otherwise."""
        logging.info('Params: %s', self.request.params.items())
        assert self.PREVIEW in (True, False)

        # parse and validate target URL
        try:
            parsed = urlparse.urlparse(self.target_url())
        except BaseException:
            return self.error('Could not parse target URL %s' %
                              self.target_url())

        domain = parsed.netloc
        path_parts = parsed.path.rsplit('/', 1)
        source_cls = SOURCE_NAMES.get(path_parts[-1])
        if (domain not in ('brid.gy', 'www.brid.gy', 'localhost:8080')
                or len(path_parts) != 2 or path_parts[0] != '/publish'
                or not source_cls):
            return self.error(
                'Target must be brid.gy/publish/{facebook,flickr,twitter,instagram}'
            )
        elif source_cls == GooglePlusPage:
            return self.error('Sorry, %s is not yet supported.' %
                              source_cls.GR_CLASS.NAME)

        # resolve source URL
        url, domain, ok = util.get_webmention_target(
            self.source_url(), replace_test_domains=False)
        # show nice error message if they're trying to publish a silo post
        if domain in SOURCE_DOMAINS:
            return self.error(
                "Looks like that's a %s URL. Try one from your web site instead!"
                % SOURCE_DOMAINS[domain].GR_CLASS.NAME)
        elif not ok:
            return self.error('Unsupported source URL %s' % url)
        elif not domain:
            return self.error('Could not parse source URL %s' % url)

        # look up source by domain
        domain = domain.lower()
        sources = source_cls.query().filter(
            source_cls.domains == domain).fetch(100)
        if not sources:
            return self.error(
                "Could not find <b>%(type)s</b> account for <b>%(domain)s</b>. Check that your %(type)s profile has %(domain)s in its <em>web site</em> or <em>link</em> field, then try signing up again."
                % {
                    'type': source_cls.GR_CLASS.NAME,
                    'domain': domain
                })

        current_url = ''
        for source in sources:
            logging.info('Source: %s , features %s, status %s, poll status %s',
                         source.bridgy_url(self), source.features,
                         source.status, source.poll_status)
            if source.status != 'disabled' and 'publish' in source.features:
                # use a source that has a domain_url matching the url provided.
                # look through each source to find the one with the closest match.
                for domain_url in source.domain_urls:
                    if (url.lower().startswith(domain_url.lower().strip('/'))
                            and len(domain_url) > len(current_url)):
                        self.source = source
                        current_url = domain_url

        if not self.source:
            return self.error(
                'Publish is not enabled for your account. Please visit https://brid.gy and sign up!'
            )

        content_param = 'bridgy_%s_content' % self.source.SHORT_NAME
        if content_param in self.request.params:
            return self.error('The %s parameter is not supported' %
                              content_param)

        # show nice error message if they're trying to publish their home page
        for domain_url in self.source.domain_urls:
            domain_url_parts = urlparse.urlparse(domain_url)
            source_url_parts = urlparse.urlparse(self.source_url())
            if (source_url_parts.netloc == domain_url_parts.netloc
                    and source_url_parts.path.strip('/')
                    == domain_url_parts.path.strip('/')
                    and not source_url_parts.query):
                return self.error(
                    "Looks like that's your home page. Try one of your posts instead!"
                )

        # done with the sanity checks, ready to fetch the source url. create the
        # Publish entity so we can store the result.
        entity = self.get_or_add_publish_entity(url)
        if (entity.status == 'complete' and entity.type != 'preview'
                and not self.PREVIEW and not appengine_config.DEBUG):
            return self.error(
                "Sorry, you've already published that page, and Bridgy Publish doesn't yet support updating or deleting existing posts. Ping Ryan if you want that feature!"
            )
        self.entity = entity

        # fetch source page
        resp = self.fetch_mf2(url)
        if not resp:
            return
        self.fetched, data = resp

        # find rel-shortlink, if any
        # http://microformats.org/wiki/rel-shortlink
        # https://github.com/snarfed/bridgy/issues/173
        soup = util.beautifulsoup_parse(self.fetched.text)
        shortlinks = (soup.find_all('link', rel='shortlink') +
                      soup.find_all('a', rel='shortlink') +
                      soup.find_all('a', class_='shortlink'))
        if shortlinks:
            self.shortlink = shortlinks[0]['href']

        # loop through each item and its children and try to preview/create it. if
        # it fails, try the next one. break after the first one that works.
        resp = None
        types = set()
        queue = collections.deque(data.get('items', []))
        while queue:
            item = queue.popleft()
            item_types = set(item.get('type'))
            if 'h-feed' in item_types and 'h-entry' not in item_types:
                queue.extend(item.get('children', []))
                continue
            elif not item_types & PUBLISHABLE_TYPES:
                continue

            try:
                result = self.attempt_single_item(item)
                if self.entity.published:
                    break
                if result.abort:
                    if result.error_plain:
                        self.error(result.error_plain,
                                   html=result.error_html,
                                   data=item)
                    return
                # try the next item
                for embedded in ('rsvp', 'invitee', 'repost', 'repost-of',
                                 'like', 'like-of', 'in-reply-to'):
                    if embedded in item.get('properties', []):
                        item_types.add(embedded)
                logging.info(
                    'Object type(s) %s not supported; error=%s; trying next.',
                    item_types, result.error_plain)
                types = types.union(item_types)
                queue.extend(item.get('children', []))
            except BaseException, e:
                code, body = util.interpret_http_exception(e)
                return self.error('Error: %s %s' % (body or '', e),
                                  status=code or 500,
                                  mail=True)
예제 #49
0
    def fetch_mf2(self, url, id=None, require_mf2=True, raise_errors=False):
        """Fetches a URL and extracts its mf2 data.

    Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()`
    on errors.

    Args:
      url: string
      id: string, optional id of specific element to extract and parse. defaults
        to the whole page.
      require_mf2: boolean, whether to return error if no mf2 are found
      raise_errors: boolean, whether to let error exceptions propagate up or
        handle them

    Returns:
      (:class:`requests.Response`, mf2 data dict) on success, None on failure
    """
        try:
            resp = util.requests_get(url)
            resp.raise_for_status()
        except werkzeug.exceptions.HTTPException:
            # raised by us, probably via self.error()
            raise
        except BaseException as e:
            if raise_errors:
                raise
            util.interpret_http_exception(e)  # log exception
            self.error(f'Could not fetch source URL {url}')

        if self.entity:
            self.entity.html = resp.text

        # parse microformats
        soup = util.parse_html(resp)
        mf2 = util.parse_mf2(soup, url=resp.url, id=id)
        if id and not mf2:
            self.error(f'Got fragment {id} but no element found with that id.')

        # special case tumblr's markup: div#content > div.post > div.copy
        # convert to mf2 and re-parse
        if not mf2.get('items'):
            contents = soup.find_all(id='content')
            if contents:
                post = contents[0].find_next(class_='post')
                if post:
                    post['class'] = 'h-entry'
                    copy = post.find_next(class_='copy')
                    if copy:
                        copy['class'] = 'e-content'
                    photo = post.find_next(class_='photo-wrapper')
                    if photo:
                        img = photo.find_next('img')
                        if img:
                            img['class'] = 'u-photo'
                    # TODO: i should be able to pass post or contents[0] to mf2py instead
                    # here, but it returns no items. mf2py bug?
                    doc = str(post)
                    mf2 = util.parse_mf2(doc, resp.url)

        logger.debug(f'Parsed microformats2: {json_dumps(mf2, indent=2)}')
        items = mf2.get('items', [])
        if require_mf2 and (not items or not items[0]):
            self.error('No microformats2 data found in ' + resp.url,
                       data=mf2,
                       html=f"""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="{resp.url}">{util.pretty_link(resp.url)}</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""")

        return resp, mf2
예제 #50
0
  def post(self, source_short_name):
    logging.info('Params: %self', self.request.params.items())
    # strip fragments from source and target url
    self.source_url = urlparse.urldefrag(util.get_required_param(self, 'source'))[0]
    self.target_url = urlparse.urldefrag(util.get_required_param(self, 'target'))[0]

    # follow target url through any redirects, strip utm_* query params
    resp = util.follow_redirects(self.target_url)
    redirected_target_urls = [r.url for r in resp.history]
    self.target_url = util.clean_url(resp.url)

    # parse and validate target URL
    domain = util.domain_from_link(self.target_url)
    if not domain:
      return self.error('Could not parse target URL %s' % self.target_url)

    # look up source by domain
    source_cls = models.sources[source_short_name]
    domain = domain.lower()
    self.source = (source_cls.query()
                   .filter(source_cls.domains == domain)
                   .filter(source_cls.features == 'webmention')
                   .filter(source_cls.status == 'enabled')
                   .get())
    if not self.source:
      # check for a rel-canonical link. Blogger uses these when it serves a post
      # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs
      # epeus.blogspot.com.
      # https://github.com/snarfed/bridgy/issues/805
      mf2 = self.fetch_mf2(self.target_url, require_mf2=False)
      if not mf2:
        # fetch_mf2() already wrote the error response
        return
      domains = util.dedupe_urls(
        util.domain_from_link(url)
        for url in mf2[1].get('rels', {}).get('canonical', []))
      if domains:
        self.source = (source_cls.query()
                       .filter(source_cls.domains.IN(domains))
                       .filter(source_cls.features == 'webmention')
                       .filter(source_cls.status == 'enabled')
                       .get())

    if not self.source:
      return self.error(
        'Could not find %s account for %s. Is it registered with Bridgy?' %
        (source_cls.GR_CLASS.NAME, domain))

    # check that the target URL path is supported
    target_path = urlparse.urlparse(self.target_url).path
    if target_path in ('', '/'):
      return self.error('Home page webmentions are not currently supported.',
                        status=202)
    for pattern in self.source.PATH_BLACKLIST:
      if pattern.match(target_path):
        return self.error('%s webmentions are not supported for URL path: %s' %
                          (self.source.GR_CLASS.NAME, target_path), status=202)

    # create BlogWebmention entity
    id = '%s %s' % (self.source_url, self.target_url)
    self.entity = BlogWebmention.get_or_insert(
      id, source=self.source.key, redirected_target_urls=redirected_target_urls)
    if self.entity.status == 'complete':
      # TODO: response message saying update isn't supported
      self.response.write(self.entity.published)
      return
    logging.debug("BlogWebmention entity: '%s'", self.entity.key.urlsafe())

    # fetch source page
    resp = self.fetch_mf2(self.source_url)
    if not resp:
      return
    self.fetched, data = resp

    item = self.find_mention_item(data.get('items', []))
    if not item:
      return self.error('Could not find target URL %s in source page %s' %
                        (self.target_url, self.fetched.url),
                        data=data, log_exception=False)

    # default author to target domain
    author_name = domain
    author_url = 'http://%s/' % domain

    # extract author name and URL from h-card, if any
    props = item['properties']
    author = first_value(props, 'author')
    if author:
      if isinstance(author, basestring):
        author_name = author
      else:
        author_props = author.get('properties', {})
        author_name = first_value(author_props, 'name')
        author_url = first_value(author_props, 'url')

    # if present, u-url overrides source url
    u_url = first_value(props, 'url')
    if u_url:
      self.entity.u_url = u_url

    # generate content
    content = props['content'][0]  # find_mention_item() guaranteed this is here
    text = (content.get('html') or content.get('value')).strip()
    source_url = self.entity.source_url()
    text += ' <br /> <a href="%s">via %s</a>' % (
      source_url, util.domain_from_link(source_url))

    # write comment
    try:
      self.entity.published = self.source.create_comment(
        self.target_url, author_name, author_url, text)
    except Exception as e:
      code, body = util.interpret_http_exception(e)
      msg = 'Error: %s %s; %s' % (code, e, body)
      if code == '401':
        logging.warning('Disabling source due to: %s' % e, exc_info=True)
        self.source.status = 'disabled'
        self.source.put()
        return self.error(msg, status=code, mail=self.source.is_beta_user())
      elif code == '404':
        # post is gone
        return self.error(msg, status=code, mail=False)
      elif util.is_connection_failure(e) or (code and int(code) // 100 == 5):
        return self.error(msg, status=util.ERROR_HTTP_RETURN_CODE, mail=False)
      elif code or body:
        return self.error(msg, status=code, mail=True)
      else:
        raise

    # write results to datastore
    self.entity.status = 'complete'
    self.entity.put()
    self.response.write(json.dumps(self.entity.published))
예제 #51
0
    def get(self, type, source_short_name, string_id, *ids):
        source_cls = models.sources.get(source_short_name)
        if not source_cls:
            self.abort(
                400, "Source type '%s' not found. Known sources: %s" %
                (source_short_name, filter(None, models.sources.keys())))

        self.source = source_cls.get_by_id(string_id)
        if not self.source:
            self.abort(
                400, 'Source %s %s not found' % (source_short_name, string_id))
        elif (self.source.status == 'disabled'
              or ('listen' not in self.source.features
                  and 'email' not in self.source.features)):
            self.abort(
                400, 'Source %s is disabled for backfeed' %
                self.source.bridgy_path())

        format = self.request.get('format', 'html')
        if format not in ('html', 'json'):
            self.abort(400,
                       'Invalid format %s, expected html or json' % format)

        for id in ids:
            if not self.VALID_ID.match(id):
                self.abort(404, 'Invalid id %s' % id)

        label = '%s:%s %s %s' % (source_short_name, string_id, type, ids)
        cache_key = 'H ' + label
        obj = memcache.get(cache_key)
        if obj and not appengine_config.DEBUG:
            logging.info('Using cached object for %s', label)
        else:
            logging.info('Fetching %s', label)
            try:
                obj = self.get_item(*ids)
            except models.DisableSource as e:
                self.abort(
                    401,
                    "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!"
                )
            except ValueError as e:
                self.abort(400,
                           '%s error:\n%s' % (self.source.GR_CLASS.NAME, e))
            except Exception as e:
                # pass through all API HTTP errors if we can identify them
                code, body = util.interpret_http_exception(e)
                # temporary, trying to debug a flaky test failure
                # eg https://circleci.com/gh/snarfed/bridgy/769
                if code:
                    self.response.status_int = int(code)
                    self.response.headers['Content-Type'] = 'text/plain'
                    self.response.write('%s error:\n%s' %
                                        (self.source.GR_CLASS.NAME, body))
                    return
                else:
                    raise
            memcache.set(cache_key, obj, time=CACHE_TIME)

        if not obj:
            self.abort(404, label)

        if self.source.is_blocked(obj):
            self.abort(410, 'That user is currently blocked')

        # use https for profile pictures so we don't cause SSL mixed mode errors
        # when serving over https.
        author = obj.get('author', {})
        image = author.get('image', {})
        url = image.get('url')
        if url:
            image['url'] = util.update_scheme(url, self)

        mf2_json = microformats2.object_to_json(obj, synthesize_content=False)

        # try to include the author's silo profile url
        author = first_props(mf2_json.get('properties', {})).get('author', {})
        author_uid = first_props(author.get('properties', {})).get('uid', '')
        if author_uid:
            parsed = util.parse_tag_uri(author_uid)
            if parsed:
                silo_url = self.source.gr_source.user_url(parsed[1])
                urls = author.get('properties', {}).setdefault('url', [])
                if silo_url not in microformats2.get_string_urls(urls):
                    urls.append(silo_url)

        # write the response!
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        if format == 'html':
            self.response.headers['Content-Type'] = 'text/html; charset=utf-8'
            url = obj.get('url', '')
            self.response.out.write(
                TEMPLATE.substitute({
                    'refresh':
                    (('<meta http-equiv="refresh" content="0;url=%s">' %
                      url) if url else ''),
                    'url':
                    url,
                    'body':
                    microformats2.json_to_html(mf2_json),
                    'title':
                    self.get_title(obj),
                }))
        elif format == 'json':
            self.response.headers[
                'Content-Type'] = 'application/json; charset=utf-8'
            self.response.out.write(json.dumps(mf2_json, indent=2))
예제 #52
0
    def fetch_mf2(self, url, require_mf2=True, raise_errors=False):
        """Fetches a URL and extracts its mf2 data.

    Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()`
    on errors.

    Args:
      url: string
      require_mf2: boolean, whether to return error if no mf2 are found
      raise_errors: boolean, whether to let error exceptions propagate up or
        handle them

    Returns:
      (:class:`requests.Response`, mf2 data dict) on success, None on failure
    """
        try:
            fetched = util.requests_get(url)
            fetched.raise_for_status()
        except BaseException as e:
            if raise_errors:
                raise
            util.interpret_http_exception(e)  # log exception
            return self.error('Could not fetch source URL %s' % url)

        if self.entity:
            self.entity.html = fetched.text

        # .text is decoded unicode string, .content is raw bytes. if the HTTP
        # headers didn't specify a charset, pass raw bytes to BeautifulSoup so it
        # can look for a <meta> tag with a charset and decode.
        text = (fetched.text if 'charset' in fetched.headers.get(
            'content-type', '') else fetched.content)
        doc = util.beautifulsoup_parse(text)

        # parse microformats
        data = util.mf2py_parse(doc, fetched.url)

        # special case tumblr's markup: div#content > div.post > div.copy
        # convert to mf2 and re-parse
        if not data.get('items'):
            contents = doc.find_all(id='content')
            if contents:
                post = contents[0].find_next(class_='post')
                if post:
                    post['class'] = 'h-entry'
                    copy = post.find_next(class_='copy')
                    if copy:
                        copy['class'] = 'e-content'
                    photo = post.find_next(class_='photo-wrapper')
                    if photo:
                        img = photo.find_next('img')
                        if img:
                            img['class'] = 'u-photo'
                    doc = unicode(post)
                    data = util.mf2py_parse(doc, fetched.url)

        logging.debug('Parsed microformats2: %s', json.dumps(data, indent=2))
        items = data.get('items', [])
        if require_mf2 and (not items or not items[0]):
            return self.error('No microformats2 data found in ' + fetched.url,
                              data=data,
                              html="""
No <a href="http://microformats.org/get-started">microformats</a> or
<a href="http://microformats.org/wiki/microformats2">microformats2</a> found in
<a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a>
for details (skip to level 2, <em>Publishing on the IndieWeb</em>).
""" % (fetched.url, util.pretty_link(fetched.url)))

        return fetched, data
예제 #53
0
  def get(self, type, source_short_name, string_id, *ids):
    source_cls = models.sources.get(source_short_name)
    if not source_cls:
      self.abort(400, "Source type '%s' not found. Known sources: %s" %
                 (source_short_name, filter(None, models.sources.keys())))

    self.source = source_cls.get_by_id(string_id)
    if not self.source:
      self.abort(400, 'Source %s %s not found' % (source_short_name, string_id))
    elif self.source.status == 'disabled' or 'listen' not in self.source.features:
      self.abort(400, 'Source %s is disabled for backfeed' % self.source.bridgy_path())

    format = self.request.get('format', 'html')
    if format not in ('html', 'json'):
      self.abort(400, 'Invalid format %s, expected html or json' % format)

    for id in ids:
      if not self.VALID_ID.match(id):
        self.abort(404, 'Invalid id %s' % id)

    label = '%s:%s %s %s' % (source_short_name, string_id, type, ids)
    cache_key = 'H ' + label
    obj = memcache.get(cache_key)
    if obj:
      logging.info('Using cached object for %s', label)
    else:
      logging.info('Fetching %s', label)
      try:
        obj = self.get_item(*ids)
      except models.DisableSource as e:
        self.abort(401, "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!")
      except Exception as e:
        # pass through all API HTTP errors if we can identify them
        code, body = util.interpret_http_exception(e)
        if not code and util.is_connection_failure(e):
          code = 503
          body = str(e)
        if code:
          self.response.status_int = int(code)
          self.response.headers['Content-Type'] = 'text/plain'
          self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body))
          return
        else:
          raise
      memcache.set(cache_key, obj, time=CACHE_TIME)

    if not obj:
      self.abort(404, label)

    # use https for profile pictures so we don't cause SSL mixed mode errors
    # when serving over https.
    author = obj.get('author', {})
    image = author.get('image', {})
    url = image.get('url')
    if url:
      image['url'] = util.update_scheme(url, self)

    mf2_json = microformats2.object_to_json(obj, synthesize_content=False)

    # try to include the author's silo profile url
    author = first_props(mf2_json.get('properties', {})).get('author', {})
    author_uid = first_props(author.get('properties', {})).get('uid', '')
    if author_uid:
      parsed = util.parse_tag_uri(author_uid)
      if parsed:
        silo_url = self.source.gr_source.user_url(parsed[1])
        urls = author.get('properties', {}).setdefault('url', [])
        if silo_url not in microformats2.get_string_urls(urls):
          urls.append(silo_url)

    # write the response!
    self.response.headers['Access-Control-Allow-Origin'] = '*'
    if format == 'html':
      self.response.headers['Content-Type'] = 'text/html; charset=utf-8'
      self.response.out.write(TEMPLATE.substitute({
            'url': obj.get('url', ''),
            'body': microformats2.json_to_html(mf2_json),
            'title': self.get_title(obj),
            }))
    elif format == 'json':
      self.response.headers['Content-Type'] = 'application/json; charset=utf-8'
      self.response.out.write(json.dumps(mf2_json, indent=2))
예제 #54
0
  def post(self, source_short_name):
    logging.info('Params: %self', self.request.params.items())
    # strip fragments from source and target url
    self.source_url = urlparse.urldefrag(util.get_required_param(self, 'source'))[0]
    self.target_url = urlparse.urldefrag(util.get_required_param(self, 'target'))[0]

    # follow target url through any redirects, strip utm_* query params
    resp = util.follow_redirects(self.target_url)
    redirected_target_urls = [r.url for r in resp.history]
    self.target_url = util.clean_url(resp.url)

    # parse and validate target URL
    domain = util.domain_from_link(self.target_url)
    if not domain:
      return self.error('Could not parse target URL %s' % self.target_url)

    # look up source by domain
    source_cls = models.sources[source_short_name]
    domain = domain.lower()
    self.source = (source_cls.query()
                   .filter(source_cls.domains == domain)
                   .filter(source_cls.features == 'webmention')
                   .filter(source_cls.status == 'enabled')
                   .get())
    if not self.source:
      return self.error(
        'Could not find %s account for %s. Is it registered with Bridgy?' %
        (source_cls.GR_CLASS.NAME, domain))

    if urlparse.urlparse(self.target_url).path in ('', '/'):
      return self.error('Home page webmentions are not currently supported.')

    # create BlogWebmention entity
    id = u'%s %s' % (self.source_url, self.target_url)
    self.entity = BlogWebmention.get_or_insert(
      id, source=self.source.key, redirected_target_urls=redirected_target_urls)
    if self.entity.status == 'complete':
      # TODO: response message saying update isn't supported
      self.response.write(self.entity.published)
      return
    logging.debug('BlogWebmention entity: %s', self.entity.key.urlsafe())

    # fetch source page
    resp = self.fetch_mf2(self.source_url)
    if not resp:
      return
    self.fetched, data = resp

    item = self.find_mention_item(data)
    if not item:
      return self.error('Could not find target URL %s in source page %s' %
                        (self.target_url, self.fetched.url),
                        data=data, log_exception=False)

    # default author to target domain
    author_name = domain
    author_url = 'http://%s/' % domain

    # extract author name and URL from h-card, if any
    props = item['properties']
    author = first_value(props, 'author')
    if author:
      if isinstance(author, basestring):
        author_name = author
      else:
        author_props = author.get('properties', {})
        author_name = first_value(author_props, 'name')
        author_url = first_value(author_props, 'url')

    # if present, u-url overrides source url
    u_url = first_value(props, 'url')
    if u_url:
      self.entity.u_url = u_url

    # generate content
    content = props['content'][0]  # find_mention_item() guaranteed this is here
    text = (content.get('html') or content.get('value')).strip()
    source_url = self.entity.source_url()
    text += ' <br /> <a href="%s">via %s</a>' % (
      source_url, util.domain_from_link(source_url))

    # write comment
    try:
      self.entity.published = self.source.create_comment(
        self.target_url, author_name, author_url, text)
    except Exception, e:
      code, body = util.interpret_http_exception(e)
      msg = 'Error: %s %s; %s' % (code, e, body)
      if code == '401':
        logging.warning('Disabling source!')
        self.source.status = 'disabled'
        self.source.put()
        return self.error(msg, status=code, mail=False)
      elif code == '404':
        # post is gone
        return self.error(msg, status=code, mail=False)
      elif code or body:
        return self.error(msg, status=code, mail=True)
      else:
        raise
예제 #55
0
  def _run(self):
    """Returns CreationResult on success, None otherwise."""
    logging.info('Params: %s', self.request.params.items())
    assert self.PREVIEW in (True, False)

    # parse and validate target URL
    try:
      parsed = urlparse.urlparse(self.target_url())
    except BaseException:
      return self.error('Could not parse target URL %s' % self.target_url())

    domain = parsed.netloc
    path_parts = parsed.path.rsplit('/', 1)
    source_cls = SOURCE_NAMES.get(path_parts[-1])
    if (domain not in ('brid.gy', 'www.brid.gy', 'localhost:8080') or
        len(path_parts) != 2 or path_parts[0] != '/publish' or not source_cls):
      return self.error(
        'Target must be brid.gy/publish/{facebook,flickr,twitter,instagram}')
    elif source_cls == GooglePlusPage:
      return self.error('Sorry, %s is not yet supported.' %
                        source_cls.GR_CLASS.NAME)

    # resolve source URL
    url, domain, ok = util.get_webmention_target(
      self.source_url(), replace_test_domains=False)
    # show nice error message if they're trying to publish a silo post
    if domain in SOURCE_DOMAINS:
      return self.error(
        "Looks like that's a %s URL. Try one from your web site instead!" %
        SOURCE_DOMAINS[domain].GR_CLASS.NAME)
    elif not ok:
      return self.error('Unsupported source URL %s' % url)
    elif not domain:
      return self.error('Could not parse source URL %s' % url)

    # look up source by domain
    domain = domain.lower()
    sources = source_cls.query().filter(source_cls.domains == domain).fetch(100)
    if not sources:
      return self.error("Could not find <b>%(type)s</b> account for <b>%(domain)s</b>. Check that your %(type)s profile has %(domain)s in its <em>web site</em> or <em>link</em> field, then try signing up again." %
        {'type': source_cls.GR_CLASS.NAME, 'domain': domain})

    for source in sources:
      logging.info('Source: %s , features %s, status %s, poll status %s',
                   source.bridgy_url(self), source.features, source.status,
                   source.poll_status)
      if source.status != 'disabled' and 'publish' in source.features:
        self.source = source
        break
    else:
      return self.error(
        'Publish is not enabled for your account(s). Please visit %s and sign up!' %
        ' or '.join(s.bridgy_url(self) for s in sources))

    content_param = 'bridgy_%s_content' % self.source.SHORT_NAME
    if content_param in self.request.params:
      return self.error('The %s parameter is not supported' % content_param)

    # show nice error message if they're trying to publish their home page
    for domain_url in self.source.domain_urls:
      domain_url_parts = urlparse.urlparse(domain_url)
      source_url_parts = urlparse.urlparse(self.source_url())
      if (source_url_parts.netloc == domain_url_parts.netloc and
          source_url_parts.path.strip('/') == domain_url_parts.path.strip('/') and
          not source_url_parts.query):
        return self.error(
          "Looks like that's your home page. Try one of your posts instead!")

    # done with the sanity checks, ready to fetch the source url. create the
    # Publish entity so we can store the result.
    entity = self.get_or_add_publish_entity(url)
    if (entity.status == 'complete' and entity.type != 'preview' and
        not self.PREVIEW and not appengine_config.DEBUG):
      return self.error("Sorry, you've already published that page, and Bridgy Publish doesn't yet support updating or deleting existing posts. Ping Ryan if you want that feature!")
    self.entity = entity

    # fetch source page
    resp = self.fetch_mf2(url)
    if not resp:
      return
    self.fetched, data = resp

    # find rel-shortlink, if any
    # http://microformats.org/wiki/rel-shortlink
    # https://github.com/snarfed/bridgy/issues/173
    soup = BeautifulSoup(self.fetched.text)
    shortlinks = (soup.find_all('link', rel='shortlink') +
                  soup.find_all('a', rel='shortlink') +
                  soup.find_all('a', class_='shortlink'))
    if shortlinks:
      self.shortlink = shortlinks[0]['href']

    # loop through each item and its children and try to preview/create it. if
    # it fails, try the next one. break after the first one that works.
    resp = None
    types = set()
    queue = collections.deque(data.get('items', []))
    while queue:
      item = queue.popleft()
      item_types = set(item.get('type'))
      if 'h-feed' in item_types and 'h-entry' not in item_types:
        queue.extend(item.get('children', []))
        continue
      elif not item_types & PUBLISHABLE_TYPES:
        continue

      try:
        result = self.attempt_single_item(item)
        if self.entity.published:
          break
        if result.abort:
          if result.error_plain:
            self.error(result.error_plain, html=result.error_html, data=item)
          return
        # try the next item
        for embedded in ('rsvp', 'invitee', 'repost', 'repost-of', 'like',
                         'like-of', 'in-reply-to'):
          if embedded in item.get('properties', []):
            item_types.add(embedded)
        logging.info(
          'Object type(s) %s not supported; error=%s; trying next.',
          item_types, result.error_plain)
        types = types.union(item_types)
        queue.extend(item.get('children', []))
      except BaseException, e:
        code, body = util.interpret_http_exception(e)
        return self.error('Error: %s %s' % (body or '', e), status=code or 500,
                          mail=True)
예제 #56
0
파일: publish.py 프로젝트: snarfed/bridgy
  def _run(self):
    """Returns CreationResult on success, None otherwise."""
    logging.info('Params: %s', self.request.params.items())
    assert self.PREVIEW in (True, False)

    # parse and validate target URL
    try:
      parsed = urlparse.urlparse(self.target_url())
    except BaseException:
      return self.error('Could not parse target URL %s' % self.target_url())

    domain = parsed.netloc
    path_parts = parsed.path.rsplit('/', 1)
    source_cls = SOURCE_NAMES.get(path_parts[-1])
    if (domain not in ('brid.gy', 'www.brid.gy', 'localhost:8080') or
        len(path_parts) != 2 or path_parts[0] != '/publish' or not source_cls):
      return self.error(
        'Target must be brid.gy/publish/{facebook,flickr,github,twitter}')
    elif source_cls == Instagram:
      return self.error('Sorry, %s is not supported.' %
                        source_cls.GR_CLASS.NAME)

    # resolve source URL
    url, domain, ok = util.get_webmention_target(
      self.source_url(), replace_test_domains=False)
    # show nice error message if they're trying to publish a silo post
    if domain in SOURCE_DOMAINS:
      return self.error(
        "Looks like that's a %s URL. Try one from your web site instead!" %
        SOURCE_DOMAINS[domain].GR_CLASS.NAME)
    elif not ok:
      return self.error('Unsupported source URL %s' % url)
    elif not domain:
      return self.error('Could not parse source URL %s' % url)

    # look up source by domain
    self.source = self._find_source(source_cls, url, domain)
    if not self.source:
      return  # _find_source rendered the error

    content_param = 'bridgy_%s_content' % self.source.SHORT_NAME
    if content_param in self.request.params:
      return self.error('The %s parameter is not supported' % content_param)

    # show nice error message if they're trying to publish their home page
    for domain_url in self.source.domain_urls:
      domain_url_parts = urlparse.urlparse(domain_url)
      for source_url in url, self.source_url():
        parts = urlparse.urlparse(source_url)
        if (parts.netloc == domain_url_parts.netloc and
            parts.path.strip('/') == domain_url_parts.path.strip('/') and
            not parts.query):
          return self.error(
            "Looks like that's your home page. Try one of your posts instead!")

    # done with the sanity checks, ready to fetch the source url. create the
    # Publish entity so we can store the result.
    self.entity = self.get_or_add_publish_entity(url)
    try:
      resp = self.fetch_mf2(url, raise_errors=True)
    except BaseException as e:
      status, body = util.interpret_http_exception(e)
      if status == '410':
        return self.delete(url)
      return self.error('Could not fetch source URL %s' % url)

    if not resp:
      return
    self.fetched, data = resp

    # create the Publish entity so we can store the result.
    if (self.entity.status == 'complete' and self.entity.type != 'preview' and
        not self.PREVIEW and not appengine_config.DEBUG):
      return self.error("Sorry, you've already published that page, and Bridgy Publish doesn't yet support updating or deleting existing posts. Details: https://github.com/snarfed/bridgy/issues/84")

    # find rel-shortlink, if any
    # http://microformats.org/wiki/rel-shortlink
    # https://github.com/snarfed/bridgy/issues/173
    soup = util.beautifulsoup_parse(self.fetched.text)
    shortlinks = (soup.find_all('link', rel='shortlink') +
                  soup.find_all('a', rel='shortlink') +
                  soup.find_all('a', class_='shortlink'))
    if shortlinks:
      self.shortlink = shortlinks[0]['href']

    # loop through each item and its children and try to preview/create it. if
    # it fails, try the next one. break after the first one that works.
    result = None
    types = set()
    queue = collections.deque(data.get('items', []))
    while queue:
      item = queue.popleft()
      item_types = set(item.get('type'))
      if 'h-feed' in item_types and 'h-entry' not in item_types:
        queue.extend(item.get('children', []))
        continue
      elif not item_types & PUBLISHABLE_TYPES:
        types = types.union(item_types)
        continue

      try:
        result = self.attempt_single_item(item)
        if self.entity.published:
          break
        if result.abort:
          if result.error_plain:
            self.error(result.error_plain, html=result.error_html, data=item)
          return
        # try the next item
        for embedded in ('rsvp', 'invitee', 'repost', 'repost-of', 'like',
                         'like-of', 'in-reply-to'):
          if embedded in item.get('properties', []):
            item_types.add(embedded)
        logging.info(
          'Object type(s) %s not supported; error=%s; trying next.',
          item_types, result.error_plain)
        types = types.union(item_types)
        queue.extend(item.get('children', []))
      except BaseException, e:
        code, body = util.interpret_http_exception(e)
        if code in self.source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
          # the user deauthorized the bridgy app, or the token expired, so
          # disable this source.
          logging.warning('Disabling source due to: %s' % e, exc_info=True)
          self.source.status = 'disabled'
          self.source.put()
          # TODO: eventually drop this to just if source.is_beta_user(). leaving
          # for everyone right now for initial monitoring.
          util.email_me(subject='Bridgy Publish: disabled %s' % self.source.label(),
                        body=body)
        if isinstance(e, (NotImplementedError, ValueError, urllib2.URLError)):
          code = '400'
        elif not code:
          raise
        msg = 'Error: %s %s' % (body or '', e)
        return self.error(msg, status=code, mail=code not in ('400', '404', '502', '503', '504'))
예제 #57
0
  def _run(self):
    """Returns CreationResult on success, None otherwise."""
    logging.info('Params: %s', list(self.request.params.items()))
    assert self.PREVIEW in (True, False)

    # parse and validate target URL
    try:
      parsed = urllib.parse.urlparse(self.target_url())
    except BaseException:
      return self.error('Could not parse target URL %s' % self.target_url())

    domain = parsed.netloc
    path_parts = parsed.path.rsplit('/', 1)
    source_cls = SOURCE_NAMES.get(path_parts[-1])
    if (domain not in util.DOMAINS or
        len(path_parts) != 2 or path_parts[0] != '/publish' or not source_cls):
      return self.error(
        'Target must be brid.gy/publish/{flickr,github,mastodon,twitter}')
    elif source_cls == Instagram:
      return self.error('Sorry, %s is not supported.' %
                        source_cls.GR_CLASS.NAME)

    # resolve source URL
    url, domain, ok = util.get_webmention_target(
      self.source_url(), replace_test_domains=False)
    # show nice error message if they're trying to publish a silo post
    if domain in SOURCE_DOMAINS:
      return self.error(
        "Looks like that's a %s URL. Try one from your web site instead!" %
        SOURCE_DOMAINS[domain].GR_CLASS.NAME)
    elif not ok:
      return self.error('Unsupported source URL %s' % url)
    elif not domain:
      return self.error('Could not parse source URL %s' % url)

    # look up source by domain
    self.source = self._find_source(source_cls, url, domain)
    if not self.source:
      return  # _find_source rendered the error

    content_param = 'bridgy_%s_content' % self.source.SHORT_NAME
    if content_param in self.request.params:
      return self.error('The %s parameter is not supported' % content_param)

    # show nice error message if they're trying to publish their home page
    for domain_url in self.source.domain_urls:
      domain_url_parts = urllib.parse.urlparse(domain_url)
      for source_url in url, self.source_url():
        parts = urllib.parse.urlparse(source_url)
        if (parts.netloc == domain_url_parts.netloc and
            parts.path.strip('/') == domain_url_parts.path.strip('/') and
            not parts.query):
          return self.error(
            "Looks like that's your home page. Try one of your posts instead!")

    # done with the sanity checks, ready to fetch the source url. create the
    # Publish entity so we can store the result.
    self.entity = self.get_or_add_publish_entity(url)
    try:
      resp = self.fetch_mf2(url, raise_errors=True)
    except BaseException as e:
      status, body = util.interpret_http_exception(e)
      if status == '410':
        return self.delete(url)
      return self.error('Could not fetch source URL %s' % url)

    if not resp:
      return
    self.fetched, mf2 = resp

    # create the Publish entity so we can store the result.
    if (self.entity.status == 'complete' and self.entity.type != 'preview' and
        not self.PREVIEW and not appengine_info.LOCAL):
      return self.error("Sorry, you've already published that page, and Bridgy Publish doesn't support updating existing posts. Details: https://github.com/snarfed/bridgy/issues/84",
                        extra_json={'original': self.entity.published})

    # find rel-shortlink, if any
    # http://microformats.org/wiki/rel-shortlink
    # https://github.com/snarfed/bridgy/issues/173
    shortlinks = mf2['rels'].get('shortlink')
    if shortlinks:
      self.shortlink = urllib.parse.urljoin(url, shortlinks[0])

    # loop through each item and its children and try to preview/create it. if
    # it fails, try the next one. break after the first one that works.
    result = None
    types = set()
    queue = collections.deque(mf2.get('items', []))
    while queue:
      item = queue.popleft()
      item_types = set(item.get('type'))
      if 'h-feed' in item_types and 'h-entry' not in item_types:
        queue.extend(item.get('children', []))
        continue
      elif not item_types & PUBLISHABLE_TYPES:
        types = types.union(item_types)
        continue

      try:
        result = self.attempt_single_item(item)
        if self.entity.published:
          break
        if result.abort:
          if result.error_plain:
            self.error(result.error_plain, html=result.error_html, data=item)
          return
        # try the next item
        for embedded in ('rsvp', 'invitee', 'repost', 'repost-of', 'like',
                         'like-of', 'in-reply-to'):
          if embedded in item.get('properties', []):
            item_types.add(embedded)
        logging.info(
          'Object type(s) %s not supported; error=%s; trying next.',
          item_types, result.error_plain)
        types = types.union(item_types)
        queue.extend(item.get('children', []))
      except BaseException as e:
        code, body = util.interpret_http_exception(e)
        if code in self.source.DISABLE_HTTP_CODES or isinstance(e, models.DisableSource):
          # the user deauthorized the bridgy app, or the token expired, so
          # disable this source.
          logging.warning('Disabling source due to: %s' % e, stack_info=True)
          self.source.status = 'disabled'
          self.source.put()
          # util.email_me(subject='Bridgy Publish: disabled %s' % self.source.label(),
          #               body=body)
        if isinstance(e, (NotImplementedError, ValueError, urllib.error.URLError)):
          code = '400'
        elif not code:
          raise
        msg = 'Error: %s %s' % (body or '', e)
        return self.error(msg, status=code, report=code not in ('400', '404', '502', '503', '504'))

    if not self.entity.published:  # tried all the items
      types.discard('h-entry')
      types.discard('h-note')
      if types:
        msg = ("%s doesn't support type(s) %s, or no content was found." %
               (source_cls.GR_CLASS.NAME, ' + '.join(types)))
      else:
        msg = 'Could not find content in <a href="http://microformats.org/wiki/h-entry">h-entry</a> or any other element!'
      return self.error(msg, data=mf2)

    # write results to datastore, but don't overwrite a previous publish with a
    # preview.
    if not (self.PREVIEW and self.entity.type != 'preview'):
      self.entity.status = 'complete'
      self.entity.put()

    return result
예제 #58
0
    def poll(self, source):
        """Actually runs the poll.

    Stores property names and values to update in source.updates.
    """
        if source.last_activities_etag or source.last_activity_id:
            logging.debug('Using ETag %s, last activity id %s',
                          source.last_activities_etag, source.last_activity_id)

        #
        # Step 1: fetch activities:
        # * posts by the user
        # * search all posts for the user's domain URLs to find links
        #
        cache = util.CacheDict()
        if source.last_activities_cache_json:
            cache.update(json.loads(source.last_activities_cache_json))

        try:
            # search for links first so that the user's activities and responses
            # override them if they overlap
            links = source.search_for_links()

            # this user's own activities (and user mentions)
            resp = source.get_activities_response(
                fetch_replies=True,
                fetch_likes=True,
                fetch_shares=True,
                fetch_mentions=True,
                count=50,
                etag=source.last_activities_etag,
                min_id=source.last_activity_id,
                cache=cache)
            etag = resp.get('etag')  # used later
            user_activities = resp.get('items', [])

            # these map ids to AS objects
            responses = {a['id']: a for a in links}
            activities = {a['id']: a for a in links + user_activities}

        except Exception, e:
            code, body = util.interpret_http_exception(e)
            if code == '401':
                msg = 'Unauthorized error: %s' % e
                logging.warning(msg, exc_info=True)
                source.updates['poll_status'] = 'ok'
                raise models.DisableSource(msg)
            elif code in util.HTTP_RATE_LIMIT_CODES:
                logging.warning(
                    'Rate limited. Marking as error and finishing. %s', e)
                source.updates.update({
                    'poll_status': 'error',
                    'rate_limited': True
                })
                return
            elif (code
                  and int(code) / 100 == 5) or util.is_connection_failure(e):
                logging.error(
                    'API call failed. Marking as error and finishing. %s: %s\n%s',
                    code, body, e)
                self.abort(ERROR_HTTP_RETURN_CODE)
            else:
                raise
예제 #59
0
    def post(self, source_short_name):
        logging.info('Params: %s', list(self.request.params.items()))
        # strip fragments from source and target url
        self.source_url = urllib.parse.urldefrag(
            util.get_required_param(self, 'source'))[0]
        self.target_url = urllib.parse.urldefrag(
            util.get_required_param(self, 'target'))[0]

        # follow target url through any redirects, strip utm_* query params
        resp = util.follow_redirects(self.target_url)
        redirected_target_urls = [r.url for r in resp.history]
        self.target_url = util.clean_url(resp.url)

        # parse and validate target URL
        domain = util.domain_from_link(self.target_url)
        if not domain:
            return self.error('Could not parse target URL %s' %
                              self.target_url)

        # look up source by domain
        source_cls = models.sources[source_short_name]
        domain = domain.lower()
        self.source = (source_cls.query().filter(
            source_cls.domains == domain).filter(
                source_cls.features == 'webmention').filter(
                    source_cls.status == 'enabled').get())
        if not self.source:
            # check for a rel-canonical link. Blogger uses these when it serves a post
            # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs
            # epeus.blogspot.com.
            # https://github.com/snarfed/bridgy/issues/805
            mf2 = self.fetch_mf2(self.target_url, require_mf2=False)
            if not mf2:
                # fetch_mf2() already wrote the error response
                return
            domains = util.dedupe_urls(
                util.domain_from_link(url)
                for url in mf2[1]['rels'].get('canonical', []))
            if domains:
                self.source = (source_cls.query().filter(
                    source_cls.domains.IN(domains)).filter(
                        source_cls.features == 'webmention').filter(
                            source_cls.status == 'enabled').get())

        if not self.source:
            return self.error(
                'Could not find %s account for %s. Is it registered with Bridgy?'
                % (source_cls.GR_CLASS.NAME, domain))

        # check that the target URL path is supported
        target_path = urllib.parse.urlparse(self.target_url).path
        if target_path in ('', '/'):
            return self.error(
                'Home page webmentions are not currently supported.',
                status=202)
        for pattern in self.source.PATH_BLOCKLIST:
            if pattern.match(target_path):
                return self.error(
                    '%s webmentions are not supported for URL path: %s' %
                    (self.source.GR_CLASS.NAME, target_path),
                    status=202)

        # create BlogWebmention entity
        id = '%s %s' % (self.source_url, self.target_url)
        self.entity = BlogWebmention.get_or_insert(
            id,
            source=self.source.key,
            redirected_target_urls=redirected_target_urls)
        if self.entity.status == 'complete':
            # TODO: response message saying update isn't supported
            self.response.write(self.entity.published)
            return
        logging.debug("BlogWebmention entity: '%s'",
                      self.entity.key.urlsafe().decode())

        # fetch source page
        fetched = self.fetch_mf2(self.source_url)
        if not fetched:
            return
        resp, mf2 = fetched

        item = self.find_mention_item(mf2.get('items', []))
        if not item:
            return self.error(
                'Could not find target URL %s in source page %s' %
                (self.target_url, resp.url),
                data=mf2,
                log_exception=False)

        # default author to target domain
        author_name = domain
        author_url = 'http://%s/' % domain

        # extract author name and URL from h-card, if any
        props = item['properties']
        author = first_value(props, 'author')
        if author:
            if isinstance(author, str):
                author_name = author
            else:
                author_props = author.get('properties', {})
                author_name = first_value(author_props, 'name')
                author_url = first_value(author_props, 'url')

        # if present, u-url overrides source url
        u_url = first_value(props, 'url')
        if u_url:
            self.entity.u_url = u_url

        # generate content
        content = props['content'][
            0]  # find_mention_item() guaranteed this is here
        text = (content.get('html') or content.get('value')).strip()
        source_url = self.entity.source_url()
        text += ' <br /> <a href="%s">via %s</a>' % (
            source_url, util.domain_from_link(source_url))

        # write comment
        try:
            self.entity.published = self.source.create_comment(
                self.target_url, author_name, author_url, text)
        except Exception as e:
            code, body = util.interpret_http_exception(e)
            msg = 'Error: %s %s; %s' % (code, e, body)
            if code == '401':
                logging.warning('Disabling source due to: %s' % e,
                                stack_info=True)
                self.source.status = 'disabled'
                self.source.put()
                return self.error(msg,
                                  status=code,
                                  report=self.source.is_beta_user())
            elif code == '404':
                # post is gone
                return self.error(msg, status=code, report=False)
            elif util.is_connection_failure(e) or (code
                                                   and int(code) // 100 == 5):
                return self.error(msg,
                                  status=util.ERROR_HTTP_RETURN_CODE,
                                  report=False)
            elif code or body:
                return self.error(msg, status=code, report=True)
            else:
                raise

        # write results to datastore
        self.entity.status = 'complete'
        self.entity.put()
        self.response.write(json_dumps(self.entity.published))