class Instagram(browser.BrowserSource): """An Instagram account. The key name is the username. Instagram usernames may have ASCII letters (case insensitive), numbers, periods, and underscores: https://stackoverflow.com/questions/15470180 """ GR_CLASS = gr_instagram.Instagram SHORT_NAME = 'instagram' OAUTH_START_HANDLER = oauth_instagram.StartHandler URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, subdomain='www', approve=r'https://www.instagram.com/p/[^/?]+/$', trailing_slash=True, headers=util.REQUEST_HEADERS) # no reject regexp; non-private Instagram post URLs just 404 # blank granary Instagram object, shared across all instances gr_source = gr_instagram.Instagram() @classmethod def key_id_from_actor(cls, actor): """Returns the actor's username field to be used as this entity's key id.""" return actor['username'] def silo_url(self): """Returns the Instagram account URL, e.g. https://instagram.com/foo.""" return self.gr_source.user_url(self.key.id()) def label_name(self): """Returns the username.""" return self.key_id()
def test_live(self): ig = instagram.Instagram(cookie=INSTAGRAM_SESSIONID_COOKIE) resp = ig.get_activities_response(user_id=USERNAME, group_id=SELF, scrape=True, fetch_replies=True, fetch_likes=True, count=10) for field in 'username', 'displayName', 'url', 'image', 'id': self.assertTrue(resp['actor'][field], field) self.assertTrue(resp['actor']['image']['url']) items = resp['items'] self.assertGreaterEqual(10, len(items)) found = set() for a in items: self.assertTrue(a['actor']) obj = a['object'] for field in 'id', 'url', 'attachments', 'author', 'image': self.assertTrue([field], field) for field in 'content', 'replies', 'tags': if obj.get(field): found.add(field) likes = [t for t in obj.get('tags', []) if t.get('verb') == 'like'] if likes: found.add('likes') for field in 'content', 'replies', 'tags', 'likes': self.assertIn(field, found)
def test_live(self): resp = instagram.Instagram().get_activities_response( user_id=USERNAME, group_id=SELF, scrape=True, fetch_replies=True, fetch_likes=True) for field in 'username', 'displayName', 'url', 'image', 'id': self.assertTrue(resp['actor'][field], field) self.assertTrue(resp['actor']['image']['url']) items = resp['items'] self.assertEqual(12, len(items)) found = set() for a in items: self.assertTrue(a['actor']) for field in 'id', 'url', 'attachments', 'author', 'image': self.assertTrue(a['object'][field], field) for field in 'content', 'replies', 'tags': if a['object'].get(field): found.add(field) for field in 'content', 'replies', 'tags': self.assertIn(field, found)
def test_get_activities_passes_through_access_token(self): self.expect_urlopen( 'https://api.instagram.com/v1/users/self/feed?access_token=asdf', json.dumps({'meta': {'code': 200}, 'data': []})) self.mox.ReplayAll() self.instagram = instagram.Instagram(access_token='asdf') self.instagram.get_activities()
def finish(self, auth_entity, state=None): if auth_entity: user_json = json.loads(auth_entity.user_json) # find instagram profile URL urls = user_json.get('rel-me', []) logging.info('rel-mes: %s', urls) for url in util.trim_nulls(urls): if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN: username = urllib.parse.urlparse(url).path.strip('/') break else: self.messages.add( 'No Instagram profile found. Please <a href="https://indieauth.com/setup">add an Instagram rel-me link</a>, then try again.' ) return self.redirect('/') # check that instagram profile links to web site try: actor = gr_instagram.Instagram(scrape=True).get_actor( username, ignore_rate_limit=True) except Exception as e: code, _ = util.interpret_http_exception(e) if code in Instagram.RATE_LIMIT_HTTP_CODES: self.messages.add( '<a href="https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427">Apologies, Instagram is temporarily blocking us.</a> Please try again later!' ) return self.redirect('/') else: raise if not actor: self.messages.add( "Couldn't find Instagram user '%s'. Please check your site's rel-me link and your Instagram account." % username) return self.redirect('/') canonicalize = util.UrlCanonicalizer(redirects=False) website = canonicalize(auth_entity.key.id()) urls = [canonicalize(u) for u in microformats2.object_urls(actor)] logging.info('Looking for %s in %s', website, urls) if website not in urls: self.messages.add( "Please add %s to your Instagram profile's website or bio field and try again." % website) return self.redirect('/') # check that the instagram account is public if not gr_source.Source.is_public(actor): self.messages.add( 'Your Instagram account is private. Bridgy only supports public accounts.' ) return self.redirect('/') self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)
def test_get_comment_scrape(self): self.expect_requests_get( instagram.HTML_BASE_URL + 'p/BDG6Ms_J0vQ/', HTML_VIDEO_COMPLETE, allow_redirects=False) self.mox.ReplayAll() ig = instagram.Instagram(scrape=True) self.assert_equals(HTML_VIDEO_ACTIVITY_FULL['object']['replies']['items'][0], ig.get_comment('789', activity_id='1208909509631101904_942513'))
def test_get_like_scrape(self): self.expect_requests_get( instagram.HTML_BASE_URL + 'p/BDG6Ms_J0vQ/', HTML_PHOTO_COMPLETE, allow_redirects=False) self.mox.ReplayAll() ig = instagram.Instagram(scrape=True) self.assert_equals(LIKE_OBJS[0], ig.get_like('456', '1208909509631101904_942513', '8'))
def store_feed(): token = request.args['token'] html = request.get_data(as_text=True) as1, actor = instagram.Instagram().scraped_to_activities( html, fetch_extras=False) Feed.get_or_insert(token, html=html, as1_json=json_dumps(as1), actor_json=json_dumps(actor)) return 'OK'
def get(self): """Handles an API GET. Request path is of the form /site/user_id/group_id/app_id/activity_id , where each element except site is an optional string object id. """ # parse path args = urllib.unquote(self.request.path).strip('/').split('/') if not args or len(args) > MAX_PATH_LEN: raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' % (MAX_PATH_LEN, len(args))) # make source instance site = args.pop(0) if site == 'twitter': src = twitter.Twitter( access_token_key=util.get_required_param(self, 'access_token_key'), access_token_secret=util.get_required_param(self, 'access_token_secret')) elif site == 'facebook': src = facebook.Facebook( access_token=util.get_required_param(self, 'access_token')) elif site == 'flickr': src = flickr.Flickr( access_token_key=util.get_required_param(self, 'access_token_key'), access_token_secret=util.get_required_param(self, 'access_token_secret')) elif site == 'instagram': src = instagram.Instagram( access_token=util.get_required_param(self, 'access_token')) elif site == 'google+': auth_entity = util.get_required_param(self, 'auth_entity') src = googleplus.GooglePlus(auth_entity=ndb.Key(urlsafe=auth_entity).get()) else: src_cls = source.sources.get(site) if not src_cls: raise exc.HTTPNotFound('Unknown site %r' % site) src = src_cls(**self.request.params) # handle default path elements args = [None if a in defaults else a for a, defaults in zip(args, PATH_DEFAULTS)] user_id = args[0] if args else None # fetch actor if necessary actor = None if self.request.get('format') == 'atom': # atom needs actor args = [None if a in defaults else a # handle default path elements for a, defaults in zip(args, PATH_DEFAULTS)] user_id = args[0] if args else None actor = src.get_actor(user_id) if src else {} # get activities and write response response = src.get_activities_response(*args, **self.get_kwargs()) self.write_response(response, actor=actor)
def test_preview_comment(self): # comment obj doesn't have a url prior to publishing to_publish = copy.deepcopy(COMMENT_OBJS[0]) del to_publish['url'] self.mox.ReplayAll() preview = instagram.Instagram( allow_comment_creation=True).preview_create(to_publish) self.assertIn('comment', preview.description) self.assertIn('this post', preview.description) self.assertIn('very cute', preview.content)
def test_create_comment_unauthorized(self): # a more realistic test. this is what happens when you try to # create comments with the API, with an unapproved app self.expect_urlopen( 'https://api.instagram.com/v1/media/123_456/comments', data=urllib.urlencode({'access_token': self.instagram.access_token, 'text': COMMENTS[0]['text']}), response='{"meta": {"code": 400, "error_type": u"OAuthPermissionsException", "error_message": "This request requires scope=comments, but this access token is not authorized with this scope. The user must re-authorize your application with scope=comments to be granted write permissions."}}', status=400) self.mox.ReplayAll() to_publish = copy.deepcopy(COMMENT_OBJS[0]) del to_publish['url'] self.assertRaises(urllib2.HTTPError, instagram.Instagram( allow_comment_creation=True).create, to_publish)
def finish(self, auth_entity, state=None): if auth_entity: user_json = json.loads(auth_entity.user_json) # find instagram profile URL urls = user_json.get('rel-me', []) logging.info('rel-mes: %s', urls) for url in util.trim_nulls(urls): if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN: username = urlparse.urlparse(url).path.strip('/') break else: self.messages.add( 'No Instagram profile found. Please <a href="https://indieauth.com/setup">' 'add an Instagram rel-me link</a>, then try again.') return self.redirect('/') # check that instagram profile links to web site actor = gr_instagram.Instagram(scrape=True).get_actor( username, ignore_rate_limit=True) if not actor: self.messages.add( "Couldn't find Instagram user '%s'. Please check your site's rel-me " "link and your Instagram account." % username) return self.redirect('/') canonicalize = util.UrlCanonicalizer(redirects=False) website = canonicalize(auth_entity.key.id()) urls = [canonicalize(u) for u in microformats2.object_urls(actor)] logging.info('Looking for %s in %s', website, urls) if website not in urls: self.messages.add( "Please add %s to your Instagram profile's website or " 'bio field and try again.' % website) return self.redirect('/') # check that the instagram account is public if not gr_source.Source.is_public(actor): self.messages.add('Your Instagram account is private. ' 'Bridgy only supports public accounts.') return self.redirect('/') self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)
def test_create_comment(self): self.expect_urlopen( 'https://api.instagram.com/v1/media/123_456/comments', '{"meta":{"status":200}}', data=urllib.urlencode({'access_token': self.instagram.access_token, 'text': COMMENTS[0]['text']})) self.mox.ReplayAll() to_publish = copy.deepcopy(COMMENT_OBJS[0]) del to_publish['url'] result = instagram.Instagram(allow_comment_creation=True).create(to_publish) # TODO instagram does not give back a comment object; not sure how to # get the comment id. for now, just check that creation was successful # self.assert_equals(source.creation_result(COMMENT_OBJS[0]), # self.instagram.create(to_publish)) self.assertTrue(result.content) self.assertFalse(result.abort)
def cookie(): cookie = 'sessionid=%s' % urllib.parse.quote( request.args['sessionid'].encode('utf-8')) logging.info('Fetching with Cookie: %s', cookie) ig = instagram.Instagram() try: resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True, cookie=cookie) except Exception as e: status, text = util.interpret_http_exception(e) if status in ('403', ): data = atom.activities_to_atom([{ 'object': { 'url': request.url, 'content': 'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % request.host_url, }, }], {}, title='instagram-atom', host_url=request.host_url, request_url=request.url) return data, {'Content-Type': 'application/atom+xml'} elif status == '401': # IG returns 401 sometimes as a form of rate limiting or bot detection return 'Sorry, Instagram is rate limiting us', 429 elif status: return text, status else: logging.exception('oops!') return '', 500 actor = resp.get('actor') if actor: logging.info('Logged in as %s (%s)', actor.get('username'), actor.get('displayName')) else: logging.warning("Couldn't determine Instagram user!") activities = resp.get('items', []) return render(activities, actor=actor)
def get(self): cookie = 'sessionid=%s' % urllib.quote( util.get_required_param(self, 'sessionid').encode('utf-8')) logging.info('Fetching with Cookie: %s', cookie) ig = instagram.Instagram() try: resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True, cookie=cookie) except Exception as e: status, text = util.interpret_http_exception(e) if status: self.response.status = 502 if status == 500 else status elif util.is_connection_failure(e): self.response.status = 504 # HTTP 504 Gateway Timeout else: logging.exception('oops!') self.response.status = 500 if isinstance(text, str): text = text.decode('utf-8') self.response.text = text or u'Unknown error.' return actor = resp.get('actor') if actor: logging.info('Logged in as %s (%s)', actor.get('username'), actor.get('displayName')) else: logging.warning("Couldn't determine Instagram user!") title = 'instagram-atom feed for %s' % ig.actor_name(actor) self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write( atom.activities_to_atom(resp.get('items', []), actor, title=title, host_url=self.request.host_url + '/', request_url=self.request.path_url, xml_base='https://www.instagram.com/'))
def get(self): """Handles an API GET. Request path is of the form /site/user_id/group_id/app_id/activity_id , where each element except site is an optional string object id. """ # parse path args = urllib.parse.unquote(self.request.path).strip('/').split('/') if not args or len(args) > MAX_PATH_LEN: raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' % (MAX_PATH_LEN, len(args))) if len(args) > 1 and args[1] == 'nederland20': return self.abort( 401, 'To protect our users from spam and other malicious activity, this account is temporarily locked. Please log in to https://twitter.com to unlock your account.' ) # make source instance site = args.pop(0) if site == 'twitter': src = twitter.Twitter(access_token_key=util.get_required_param( self, 'access_token_key'), access_token_secret=util.get_required_param( self, 'access_token_secret')) elif site == 'facebook': self.abort( 400, 'Sorry, Facebook is no longer available in the REST API. Try the library instead!' ) elif site == 'flickr': src = flickr.Flickr(access_token_key=util.get_required_param( self, 'access_token_key'), access_token_secret=util.get_required_param( self, 'access_token_secret')) elif site == 'github': src = github.GitHub( access_token=util.get_required_param(self, 'access_token')) elif site == 'instagram': if self.request.get('interactive').lower() == 'true': src = instagram.Instagram(scrape=True) else: self.abort( 400, 'Sorry, Instagram is not currently available in the REST API. Try https://instagram-atom.appspot.com/ instead!' ) elif site == 'mastodon': src = mastodon.Mastodon( instance=util.get_required_param(self, 'instance'), access_token=util.get_required_param(self, 'access_token'), user_id=util.get_required_param(self, 'user_id')) elif site == 'meetup': src = meetup.Meetup(access_token_key=util.get_required_param( self, 'access_token_key'), access_token_secret=util.get_required_param( self, 'access_token_secret')) elif site == 'pixelfed': src = pixelfed.Pixelfed( instance=util.get_required_param(self, 'instance'), access_token=util.get_required_param(self, 'access_token'), user_id=util.get_required_param(self, 'user_id')) elif site == 'reddit': src = reddit.Reddit(refresh_token=util.get_required_param( self, 'refresh_token' )) # the refresh_roken should be returned but is not appearing else: src_cls = source.sources.get(site) if not src_cls: raise exc.HTTPNotFound('Unknown site %r' % site) src = src_cls(**self.request.params) # decode tag URI ids for i, arg in enumerate(args): parsed = util.parse_tag_uri(arg) if parsed: domain, id = parsed if domain != src.DOMAIN: raise exc.HTTPBadRequest( 'Expected domain %s in tag URI %s, found %s' % (src.DOMAIN, arg, domain)) args[i] = id # handle default path elements args = [ None if a in defaults else a for a, defaults in zip(args, PATH_DEFAULTS) ] user_id = args[0] if args else None # get activities (etc) try: if len(args) >= 2 and args[1] == '@blocks': try: response = {'items': src.get_blocklist()} except source.RateLimited as e: if not e.partial: self.abort(429, str(e)) response = {'items': e.partial} else: response = src.get_activities_response(*args, **self.get_kwargs()) except (NotImplementedError, ValueError) as e: self.abort(400, str(e)) # other exceptions are handled by webutil.handlers.handle_exception(), # which uses interpret_http_exception(), etc. # fetch actor if necessary actor = response.get('actor') if not actor and self.request.get('format') == 'atom': # atom needs actor actor = src.get_actor(user_id) if src else {} self.write_response(response, actor=actor, url=src.BASE_URL)
def test_get_actor_scrape(self): self.expect_requests_get(instagram.HTML_BASE_URL + 'foo/', HTML_PROFILE_COMPLETE, allow_redirects=False) self.mox.ReplayAll() self.assert_equals(HTML_VIEWER, instagram.Instagram(scrape=True).get_actor('foo'))
def get(self): """Handles an API GET. Request path is of the form /site/user_id/group_id/app_id/activity_id , where each element except site is an optional string object id. """ # parse path args = urllib.unquote(self.request.path).strip('/').split('/') if not args or len(args) > MAX_PATH_LEN: raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' % (MAX_PATH_LEN, len(args))) # make source instance site = args.pop(0) if site == 'twitter': src = twitter.Twitter(access_token_key=util.get_required_param( self, 'access_token_key'), access_token_secret=util.get_required_param( self, 'access_token_secret')) elif site == 'facebook': src = facebook.Facebook( access_token=util.get_required_param(self, 'access_token')) elif site == 'flickr': src = flickr.Flickr(access_token_key=util.get_required_param( self, 'access_token_key'), access_token_secret=util.get_required_param( self, 'access_token_secret')) elif site == 'instagram': src = instagram.Instagram(scrape=True) elif site == 'google+': auth_entity = util.get_required_param(self, 'auth_entity') src = googleplus.GooglePlus(auth_entity=ndb.Key( urlsafe=auth_entity).get()) else: src_cls = source.sources.get(site) if not src_cls: raise exc.HTTPNotFound('Unknown site %r' % site) src = src_cls(**self.request.params) # decode tag URI ids for i, arg in enumerate(args): parsed = util.parse_tag_uri(arg) if parsed: domain, id = parsed if domain != src.DOMAIN: raise exc.HTTPBadRequest( 'Expected domain %s in tag URI %s, found %s' % (src.DOMAIN, arg, domain)) args[i] = id # check if request is cached cache = self.request.get('cache', '').lower() != 'false' if cache: cache_key = 'R %s' % self.request.path cached = memcache.get(cache_key) if cached: logging.info('Serving cached response %r', cache_key) self.write_response(cached['response'], actor=cached['actor'], url=src.BASE_URL) return # handle default path elements args = [ None if a in defaults else a for a, defaults in zip(args, PATH_DEFAULTS) ] user_id = args[0] if args else None # get activities (etc) try: if len(args) >= 2 and args[1] == '@blocks': response = {'items': src.get_blocklist()} else: response = src.get_activities_response(*args, **self.get_kwargs(src)) except (NotImplementedError, ValueError) as e: self.abort(400, str(e)) # other exceptions are handled by webutil.handlers.handle_exception(), # which uses interpret_http_exception(), etc. # fetch actor if necessary actor = response.get('actor') if not actor and self.request.get('format') == 'atom': # atom needs actor args = [ None if a in defaults else a # handle default path elements for a, defaults in zip(args, PATH_DEFAULTS) ] user_id = args[0] if args else None actor = src.get_actor(user_id) if src else {} self.write_response(response, actor=actor, url=src.BASE_URL) # cache response if cache: logging.info('Caching response in %r', cache_key) memcache.set(cache_key, { 'response': response, 'actor': actor }, src.RESPONSE_CACHE_TIME)
def setUp(self): super(InstagramTest, self).setUp() self.instagram = instagram.Instagram()
def get(self): """Handles an API GET. Request path is of the form /site/user_id/group_id/app_id/activity_id , where each element except site is an optional string object id. """ # parse path args = urllib.unquote(self.request.path).strip('/').split('/') if not args or len(args) > MAX_PATH_LEN: raise exc.HTTPNotFound('Expected 1-%d path elements; found %d' % (MAX_PATH_LEN, len(args))) # make source instance site = args.pop(0) if site == 'twitter': src = twitter.Twitter( access_token_key=util.get_required_param(self, 'access_token_key'), access_token_secret=util.get_required_param(self, 'access_token_secret')) elif site == 'facebook': src = facebook.Facebook( access_token=util.get_required_param(self, 'access_token')) elif site == 'flickr': src = flickr.Flickr( access_token_key=util.get_required_param(self, 'access_token_key'), access_token_secret=util.get_required_param(self, 'access_token_secret')) elif site == 'github': src = github.GitHub( access_token=util.get_required_param(self, 'access_token')) elif site == 'instagram': src = instagram.Instagram(scrape=True) else: src_cls = source.sources.get(site) if not src_cls: raise exc.HTTPNotFound('Unknown site %r' % site) src = src_cls(**self.request.params) # decode tag URI ids for i, arg in enumerate(args): parsed = util.parse_tag_uri(arg) if parsed: domain, id = parsed if domain != src.DOMAIN: raise exc.HTTPBadRequest('Expected domain %s in tag URI %s, found %s' % (src.DOMAIN, arg, domain)) args[i] = id # handle default path elements args = [None if a in defaults else a for a, defaults in zip(args, PATH_DEFAULTS)] user_id = args[0] if args else None # get activities (etc) try: if len(args) >= 2 and args[1] == '@blocks': try: response = {'items': src.get_blocklist()} except source.RateLimited as e: if not e.partial: self.abort(429, str(e)) response = {'items': e.partial} else: response = src.get_activities_response(*args, **self.get_kwargs()) except (NotImplementedError, ValueError) as e: self.abort(400, str(e)) # other exceptions are handled by webutil.handlers.handle_exception(), # which uses interpret_http_exception(), etc. # fetch actor if necessary actor = response.get('actor') if not actor and self.request.get('format') == 'atom': # atom needs actor args = [None if a in defaults else a # handle default path elements for a, defaults in zip(args, PATH_DEFAULTS)] actor = src.get_actor(user_id) if src else {} self.write_response(response, actor=actor, url=src.BASE_URL)
def get(self): cookie = 'sessionid=%s' % urllib.parse.quote( util.get_required_param(self, 'sessionid').encode('utf-8')) logging.info('Fetching with Cookie: %s', cookie) host_url = self.request.host_url + '/' ig = instagram.Instagram() try: resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True, cookie=cookie) except Exception as e: status, text = util.interpret_http_exception(e) if status in ('403', ): self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write( atom.activities_to_atom([{ 'object': { 'url': self.request.url, 'content': 'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url, }, }], {}, title='instagram-atom', host_url=host_url, request_url=self.request.path_url)) return elif status == '401': # IG returns 401 sometimes as a form of rate limiting or bot detection self.response.status = '429' elif status: self.response.status = status else: logging.exception('oops!') self.response.status = 500 self.response.text = text or 'Unknown error.' return actor = resp.get('actor') if actor: logging.info('Logged in as %s (%s)', actor.get('username'), actor.get('displayName')) else: logging.warning("Couldn't determine Instagram user!") activities = resp.get('items', []) format = self.request.get('format', 'atom') if format == 'atom': title = 'instagram-atom feed for %s' % ig.actor_name(actor) self.response.headers['Content-Type'] = 'application/atom+xml' self.response.out.write( atom.activities_to_atom(activities, actor, title=title, host_url=host_url, request_url=self.request.path_url, xml_base='https://www.instagram.com/')) elif format == 'html': self.response.headers['Content-Type'] = 'text/html' self.response.out.write( microformats2.activities_to_html(activities)) else: self.abort(400, 'format must be either atom or html; got %s' % format)