Exemplo n.º 1
0
  def get(self):
    cookie = 'sessionid=%s' % urllib.quote(
      util.get_required_param(self, 'sessionid').encode('utf-8'))
    logging.info('Fetching with Cookie: %s', cookie)

    host_url = self.request.host_url + '/'
    ig = instagram.Instagram()
    try:
      resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True,
                                        cookie=cookie)
    except Exception as e:
      status, text = util.interpret_http_exception(e)
      if status in ('401', '403'):
        self.response.headers['Content-Type'] = 'application/atom+xml'
        self.response.out.write(atom.activities_to_atom([{
          'object': {
            'url': self.request.url,
            'content': 'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url,
            },
          }], {}, title='instagram-atom', host_url=host_url,
          request_url=self.request.path_url))
        return
      elif status:
        self.response.status = 502 if int(status) // 100 == 5 else status
      elif util.is_connection_failure(e):
        self.response.status = 504  # HTTP 504 Gateway Timeout
      else:
        logging.exception('oops!')
        self.response.status = 500

      if isinstance(text, str):
        text = text.decode('utf-8')
      self.response.text = text or u'Unknown error.'
      return

    actor = resp.get('actor')
    if actor:
      logging.info('Logged in as %s (%s)',
                   actor.get('username'), actor.get('displayName'))
    else:
      logging.warning("Couldn't determine Instagram user!")

    title = 'instagram-atom feed for %s' % ig.actor_name(actor)
    self.response.headers['Content-Type'] = 'application/atom+xml'
    self.response.out.write(atom.activities_to_atom(
      resp.get('items', []), actor, title=title, host_url=host_url,
      request_url=self.request.path_url, xml_base='https://www.instagram.com/'))
Exemplo n.º 2
0
    def get(self):
        cookie = 'sessionid=%s' % urllib.quote(
            util.get_required_param(self, 'sessionid').encode('utf-8'))
        logging.info('Fetching with Cookie: %s', cookie)

        ig = instagram.Instagram()
        try:
            resp = ig.get_activities_response(group_id=source.FRIENDS,
                                              scrape=True,
                                              cookie=cookie)
        except Exception as e:
            status, text = util.interpret_http_exception(e)
            if status:
                self.response.status = 502 if status == 500 else status
            elif util.is_connection_failure(e):
                self.response.status = 504  # HTTP 504 Gateway Timeout
            else:
                logging.exception('oops!')
                self.response.status = 500

            if isinstance(text, str):
                text = text.decode('utf-8')
            self.response.text = text or u'Unknown error.'
            return

        actor = resp.get('actor')
        if actor:
            logging.info('Logged in as %s (%s)', actor.get('username'),
                         actor.get('displayName'))
        else:
            logging.warning("Couldn't determine Instagram user!")

        title = 'instagram-atom feed for %s' % ig.actor_name(actor)
        self.response.headers['Content-Type'] = 'application/atom+xml'
        self.response.out.write(
            atom.activities_to_atom(resp.get('items', []),
                                    actor,
                                    title=title,
                                    host_url=self.request.host_url + '/',
                                    request_url=self.request.path_url,
                                    xml_base='https://www.instagram.com/'))
Exemplo n.º 3
0
def background_handle_exception(handler, e, debug):
  """Common exception handler for background tasks.

  Catches failed outbound HTTP requests and returns HTTP 304.

  Install with eg:

  class MyHandler(webapp2.RequestHandler):
    handle_exception = util.background_handle_exception
    ...
  """
  transients = getattr(handler, 'TRANSIENT_ERROR_HTTP_CODES', ())
  source = getattr(handler, 'source', None)
  if source:
    transients += source.RATE_LIMIT_HTTP_CODES + source.TRANSIENT_ERROR_HTTP_CODES

  code, body = util.interpret_http_exception(e)
  if ((code and int(code) // 100 == 5) or code in transients or
      util.is_connection_failure(e)):
    logging.error('Marking as error and finishing. %s: %s\n%s', code, body, e)
    handler.abort(ERROR_HTTP_RETURN_CODE)
  else:
    raise
Exemplo n.º 4
0
  def get(self):
    """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
    # parse path
    args = urllib.unquote(self.request.path).strip('/').split('/')
    if not args or len(args) > MAX_PATH_LEN:
      raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                             (MAX_PATH_LEN, len(args)))

    # make source instance
    site = args.pop(0)
    if site == 'twitter':
      src = twitter.Twitter(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'facebook':
      src = facebook.Facebook(
        access_token=util.get_required_param(self, 'access_token'))
    elif site == 'flickr':
      src = flickr.Flickr(
        access_token_key=util.get_required_param(self, 'access_token_key'),
        access_token_secret=util.get_required_param(self, 'access_token_secret'))
    elif site == 'instagram':
      src = instagram.Instagram(scrape=True)
    elif site == 'google+':
      auth_entity = util.get_required_param(self, 'auth_entity')
      src = googleplus.GooglePlus(auth_entity=ndb.Key(urlsafe=auth_entity).get())
    else:
      src_cls = source.sources.get(site)
      if not src_cls:
        raise exc.HTTPNotFound('Unknown site %r' % site)
      src = src_cls(**self.request.params)

    # decode tag URI ids
    for i, arg in enumerate(args):
      parsed = util.parse_tag_uri(arg)
      if parsed:
        domain, id = parsed
        if domain != src.DOMAIN:
          raise exc.HTTPBadRequest('Expected domain %s in tag URI %s, found %s' %
                                   (src.DOMAIN, arg, domain))
        args[i] = id

    # check if request is cached
    cache = self.request.get('cache', '').lower() != 'false'
    if cache:
      cache_key = 'R %s' % self.request.path
      cached = memcache.get(cache_key)
      if cached:
        logging.info('Serving cached response %r', cache_key)
        self.write_response(cached['response'], actor=cached['actor'],
                            url=src.BASE_URL)
        return

    # handle default path elements
    args = [None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)]
    user_id = args[0] if args else None

    # get activities
    try:
      response = src.get_activities_response(*args, **self.get_kwargs(src))
    except NotImplementedError as e:
      self.abort(400, str(e))
    except Exception as e:
      if util.is_connection_failure(e):
        # HTTP 504 Gateway Timeout
        self.abort(504, str(e))
      raise

    # fetch actor if necessary
    actor = response.get('actor')
    if not actor and self.request.get('format') == 'atom':
      # atom needs actor
      args = [None if a in defaults else a  # handle default path elements
              for a, defaults in zip(args, PATH_DEFAULTS)]
      user_id = args[0] if args else None
      actor = src.get_actor(user_id) if src else {}

    self.write_response(response, actor=actor, url=src.BASE_URL)

    # cache response
    if cache:
      logging.info('Caching response in %r', cache_key)
      memcache.set(cache_key, {'response': response, 'actor': actor},
                   src.RESPONSE_CACHE_TIME)
Exemplo n.º 5
0
    def get(self):
        expected_inputs = ('activitystreams', 'html', 'json-mf2')
        input = util.get_required_param(self, 'input')
        if input not in expected_inputs:
            raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                                     (input, expected_inputs))
        url = util.get_required_param(self, 'url')

        # check if request is cached
        cache = self.request.get('cache', '').lower() != 'false'
        cache_key = 'U %s' % url
        cached = memcache.get(cache_key) if cache else None

        if cached:
            logging.info('Serving cached response %r', cache_key)
            url = cached['url']
            body = cached['body']
        else:
            # fetch url
            try:
                resp = util.urlopen(url)
            except (ValueError, httplib.InvalidURL) as e:
                self.abort(400, str(e))
            except Exception as e:
                if util.is_connection_failure(e):
                    self.abort(502, str(e))
                raise

            if url != resp.geturl():
                url = resp.geturl()
                logging.info('Redirected to %s', url)
            body = resp.read()

            if cache:
                logging.info('Caching response in %r', cache_key)
                memcache.set(cache_key, {
                    'url': url,
                    'body': body
                }, URL_CACHE_TIME)

        # decode data
        mf2 = None
        if input == 'activitystreams':
            activities = json.loads(body)
        elif input == 'html':
            activities = microformats2.html_to_activities(body, url)
            mf2 = mf2py.parse(doc=body, url=url)
        elif input == 'json-mf2':
            mf2 = json.loads(body)
            mf2['rels'] = {}  # mf2util expects rels
            activities = [
                microformats2.json_to_object(item)
                for item in mf2.get('items', [])
            ]

        author = None
        title = None
        if mf2:
            author = microformats2.find_author(mf2)
            title = mf2util.interpret_feed(mf2, url).get('name')

        self.write_response(
            source.Source.make_activities_base_response(activities),
            url=url,
            actor=author,
            title=title)
Exemplo n.º 6
0
  def get(self):
    expected_inputs = ('activitystreams', 'html', 'json-mf2')
    input = util.get_required_param(self, 'input')
    if input not in expected_inputs:
      raise exc.HTTPBadRequest('Invalid input: %s, expected one of %r' %
                               (input, expected_inputs))
    url = util.get_required_param(self, 'url')

    # check if request is cached
    cache = self.request.get('cache', '').lower() != 'false'
    cache_key = 'U %s' % url
    cached = memcache.get(cache_key) if cache else None

    if cached:
      logging.info('Serving cached response %r', cache_key)
      url = cached['url']
      body = cached['body']
    else:
      # fetch url
      try:
        resp = util.urlopen(url)
      except (ValueError, httplib.InvalidURL) as e:
        self.abort(400, str(e))
      except Exception as e:
        if util.is_connection_failure(e):
          # HTTP 504 Gateway Timeout
          self.abort(504, str(e))
        raise

      if url != resp.geturl():
        url = resp.geturl()
        logging.info('Redirected to %s', url)
      body = resp.read()

      if cache:
        logging.info('Caching response in %r', cache_key)
        memcache.set(cache_key, {'url': url, 'body': body}, URL_CACHE_TIME)

    # decode data
    mf2 = None
    if input == 'html':
      mf2 = mf2py.parse(doc=body, url=url)
    elif input == 'json-mf2':
      mf2 = json.loads(body)
      mf2.setdefault('rels', {})  # mf2util expects rels

    actor = None
    title = None
    if mf2:
      actor = microformats2.find_author(
        mf2, fetch_mf2_func=lambda url: mf2py.parse(url=url))
      title = mf2util.interpret_feed(mf2, url).get('name')

    if input == 'activitystreams':
      activities = json.loads(body)
    elif input == 'html':
      activities = microformats2.html_to_activities(body, url, actor)
    elif input == 'json-mf2':
      activities = [microformats2.json_to_object(item, actor=actor)
                    for item in mf2.get('items', [])]

    self.write_response(source.Source.make_activities_base_response(activities),
                        url=url, actor=actor, title=title)
Exemplo n.º 7
0
    def get(self):
        """Handles an API GET.

    Request path is of the form /site/user_id/group_id/app_id/activity_id ,
    where each element except site is an optional string object id.
    """
        # parse path
        args = urllib.unquote(self.request.path).strip('/').split('/')
        if not args or len(args) > MAX_PATH_LEN:
            raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' %
                                   (MAX_PATH_LEN, len(args)))

        # make source instance
        site = args.pop(0)
        if site == 'twitter':
            src = twitter.Twitter(access_token_key=util.get_required_param(
                self, 'access_token_key'),
                                  access_token_secret=util.get_required_param(
                                      self, 'access_token_secret'))
        elif site == 'facebook':
            src = facebook.Facebook(
                access_token=util.get_required_param(self, 'access_token'))
        elif site == 'flickr':
            src = flickr.Flickr(access_token_key=util.get_required_param(
                self, 'access_token_key'),
                                access_token_secret=util.get_required_param(
                                    self, 'access_token_secret'))
        elif site == 'instagram':
            src = instagram.Instagram(scrape=True)
        elif site == 'google+':
            auth_entity = util.get_required_param(self, 'auth_entity')
            src = googleplus.GooglePlus(auth_entity=ndb.Key(
                urlsafe=auth_entity).get())
        else:
            src_cls = source.sources.get(site)
            if not src_cls:
                raise exc.HTTPNotFound('Unknown site %r' % site)
            src = src_cls(**self.request.params)

        # decode tag URI ids
        for i, arg in enumerate(args):
            parsed = util.parse_tag_uri(arg)
            if parsed:
                domain, id = parsed
                if domain != src.DOMAIN:
                    raise exc.HTTPBadRequest(
                        'Expected domain %s in tag URI %s, found %s' %
                        (src.DOMAIN, arg, domain))
                args[i] = id

        # check if request is cached
        cache = self.request.get('cache', '').lower() != 'false'
        if cache:
            cache_key = 'R %s' % self.request.path
            cached = memcache.get(cache_key)
            if cached:
                logging.info('Serving cached response %r', cache_key)
                self.write_response(cached['response'],
                                    actor=cached['actor'],
                                    url=src.BASE_URL)
                return

        # handle default path elements
        args = [
            None if a in defaults else a
            for a, defaults in zip(args, PATH_DEFAULTS)
        ]
        user_id = args[0] if args else None

        # get activities
        try:
            response = src.get_activities_response(*args,
                                                   **self.get_kwargs(src))
        except NotImplementedError as e:
            self.abort(400, str(e))
        except Exception as e:
            if util.is_connection_failure(e):
                self.abort(502, str(e))
            raise

        # fetch actor if necessary
        actor = response.get('actor')
        if not actor and self.request.get('format') == 'atom':
            # atom needs actor
            args = [
                None if a in defaults else a  # handle default path elements
                for a, defaults in zip(args, PATH_DEFAULTS)
            ]
            user_id = args[0] if args else None
            actor = src.get_actor(user_id) if src else {}

        self.write_response(response, actor=actor, url=src.BASE_URL)

        # cache response
        if cache:
            logging.info('Caching response in %r', cache_key)
            memcache.set(cache_key, {
                'response': response,
                'actor': actor
            }, src.RESPONSE_CACHE_TIME)