def get_posts_by_username(username, num=None, path=None): agent = Agent() agent.update(Account(username)) account = Account(username) media = set() pointer = None if num == None: media_count = account.media_count else: media_count = num limit = 50 batch_num = math.ceil(media_count / limit) for i in range(batch_num): if i == batch_num - 1: count = media_count - limit * (batch_num - 1) batch_media, pointer = agent.get_media(account, pointer=pointer, count=count) else: batch_media, pointer = agent.get_media(account, pointer=pointer, count=limit) for j, item in enumerate(batch_media): print("Getting media: " + str(i * 50 + j + 1) + " / " + str(media_count)) media.add(Media(item.code)) media_posts = {} for i, item in enumerate(media): post_info = copy.copy(item) post_info.owner = username post_info.likes = dict(post_info.likes) post_info.comments = dict(post_info.comments) media_posts[i] = post_info.__dict__ media_dict = {"posts": media_posts} media_json = json.dumps(media_dict, indent=2) print(media_json) if path == None: path = './data/' + username pathlib.Path(path).mkdir(parents=True, exist_ok=True) filename = path + '/' + username + '__last_posts.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(media_json) return media
def get_likes(self, media, pointer=None, count=20, settings={}, limit=50): if not isinstance(media, Media): raise TypeError("'media' must be Media type") if not isinstance(count, int): raise TypeError("'count' must be int type") if not isinstance(settings, dict): raise TypeError("'settings' must be dict type") if not isinstance(limit, int): raise TypeError("'limit' must be int type") self.update(media, settings) if pointer: variables_string = '{{"shortcode":"{shortcode}","first":{first},"after":"{after}"}}' else: variables_string = '{{"shortcode":"{shortcode}","first":{first}}}' likes = [] while True: data = {"shortcode": media.code, "first": min(limit, count)} if pointer: data["after"] = pointer response = self._graphql_request( query_hash="1cb6ec562846122743b61e492c85999f", variables=variables_string.format(**data), settings=settings, ) try: data = response.json( )["data"]["shortcode_media"]["edge_liked_by"] edges = data["edges"] page_info = data["page_info"] media.likes_count = data["count"] for index in range(min(len(edges), count)): node = edges[index]["node"] account = Account(node["username"]) account.id = node["id"] account.profile_pic_url = node["profile_pic_url"] account.is_verified = node["is_verified"] account.full_name = node["full_name"] media.likes.add(account) likes.append(account) if page_info["has_next_page"]: pointer = page_info["end_cursor"] else: pointer = None if len(edges) < count and page_info["has_next_page"]: count = count - len(edges) variables_string = \ '{{"shortcode":"{shortcode}","first":{first},"after":"{after}"}}' else: return likes, pointer except (ValueError, KeyError) as exception: raise UnexpectedResponse(exception, response.url, response.text)
def get_likes(self, media, settings={}): # Check data if not isinstance(settings, dict): raise TypeError("'settings' must be dict type") if not isinstance(media, Media): raise TypeError("'media' must be Media type") data = self.update(media, settings) likes = [] try: data = data["edge_media_preview_like"] edges = data["edges"] for edge in edges: node = edge["node"] account = Account(node["username"]) account.id = node["id"] account.profile_pic_url = node["profile_pic_url"] if "is_verified" in node: account.is_verified = node["is_verified"] if "full_name" in node: account.full_name = node["full_name"] media.likes.add(account) likes.append(account) except (ValueError, KeyError): raise UnexpectedResponse() return likes, None
def test_clear_cache_story(): account = Account("test") story = Story("test") Story.clear_cache() assert(Story._cache == dict())
def profile_exists(username): try: agent = Agent() account = Account(username) agent.update(account) return True except: return False
def test_follow_unfollow(login, password, username): agent = AgentAccount(login, password) account = Account(username) assert (agent.follow(account)) assert (agent.unfollow(account)) Account.clear_cache()
def test_get_media_account(agent, delay, settings, count, username): account = Account(username) data, pointer = agent.get_media(account, count=count, delay=delay, settings=settings) assert min(account.media_count, count) == len(data) assert (pointer is None) == (account.media_count <= count)
def test_update_account(login, password, username): agent = AgentAccount(login, password) account = Account(username) data = agent.update(account) assert (not data is None) Account.clear_cache()
def test_get_followers_long(login, password, count, username): agent = AgentAccount(login, password) account = Account(username) data, pointer = agent.get_followers(account, count=count) assert (min(account.followers_count, count) == len(data)) assert ((pointer is None) == (account.followers_count <= count)) Account.clear_cache()
def test_clear_cache_comment(id): account = Account("test") media = Media("test") comment = Comment(id, media=media, owner=account, text="test", created_at=0) assert Comment.cache == {id: comment} Comment.clear_cache() assert Comment.cache == dict() assert Media.cache == {"test": media} assert Account.cache == {"test": account}
def test_get_media_account_pointer(agent_account, delay, settings, count, username): account = Account(username) pointer = None data = [] for _ in range(count): tmp, pointer = agent_account.get_media(account, pointer=pointer, settings=settings) sleep(delay) data.extend(tmp) assert (pointer is None) == (account.media_count == len(data))
def test_get_media_account(count, username): anon = Agent() account = Account(username) data, pointer = anon.get_media(account, count=count) assert (min(account.media_count, count) == len(data)) assert ((pointer is None) == (account.media_count <= count)) Account.clear_cache() Media.clear_cache()
def test_get_followers_pointer(agent_account, delay, settings, count, username): account = Account(username) pointer = None data = [] for _ in range(count): tmp, pointer = agent_account.get_followers(account, pointer=pointer, settings=settings) sleep(delay) data.extend(tmp) assert (pointer is None) == (account.followers_count <= count)
def test_get_media_account(login, password, count, username): agent = AgentAccount(login, password) account = Account(username) data, pointer = agent.get_media(account, count=count) assert (min(account.media_count, count) == len(data)) assert ((pointer is None) == (account.media_count <= count)) Account.clear_cache() Media.clear_cache()
def test_clear_cache_comment(): account = Account("test") media = Media("test") comment = Comment(1488, media=media, owner=account, text="test", created_at=0) Media.clear_cache() Comment.clear_cache() assert(Comment._cache == dict()) assert(Media._cache == dict())
async def test_async_get_followers(async_agent_account, delay, settings, count, username): account = Account(username) data, pointer = await async_agent_account.get_followers( account, count=count, delay=delay, settings=settings, ) assert min(account.followers_count, count) == len(data) assert (pointer is None) == (account.followers_count <= count)
def test_get_followers_pointer(login, password, count, username): agent = AgentAccount(login, password) account = Account(username) pointer = None data = [] for i in range(count): tmp, pointer = agent.get_followers(account, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (account.followers_count <= count)) Account.clear_cache()
def test_follow_unfollow(agent_account, delay, settings, username): account = Account(username) agent_account.update(settings=settings) follows_count = agent_account.follows_count assert agent_account.follow(account, settings=settings) sleep(delay) agent_account.update(settings=settings) assert agent_account.follows_count == follows_count + 1 assert agent_account.unfollow(account, settings=settings) sleep(delay) agent_account.update(settings=settings) assert agent_account.follows_count == follows_count
async def test_async_follow_unfollow(async_agent_account, delay, settings, username): account = Account(username) await async_agent_account.update(settings=settings) follows_count = async_agent_account.follows_count assert await async_agent_account.follow(account, settings=settings) await asyncio.sleep(delay) await async_agent_account.update(settings=settings) assert async_agent_account.follows_count == follows_count + 1 assert await async_agent_account.unfollow(account, settings=settings) await asyncio.sleep(delay) await async_agent_account.update(settings=settings) assert async_agent_account.follows_count == follows_count
def get_account_info(username, path=None): agent = Agent() agent.update(Account(username)) account = Account(username) account_info = copy.copy(account) account_info.media = dict(account_info.media) account_info.follows = dict(account_info.follows) account_info.followers = dict(account_info.followers) account_dict = {"account": account_info.__dict__} account_json = json.dumps(account_dict, indent=2) if path == None: path = './data/' + username pathlib.Path(path).mkdir(parents=True, exist_ok=True) filename = path + '/' + username + '__account_info.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(account_json) return account
def test_get_media_account_pointer(count, username): anon = Agent() account = Account(username) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_media(account, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (account.media_count <= count)) Account.clear_cache() Media.clear_cache()
async def test_async_update_account(async_agent, settings, username): account = Account(username) data = await async_agent.update(account, settings=settings) assert not data is None assert not account.id is None assert not account.full_name is None assert not account.profile_pic_url is None assert not account.profile_pic_url_hd is None assert not account.biography is None assert not account.follows_count is None assert not account.followers_count is None assert not account.media_count is None assert not account.is_private is None assert not account.is_verified is None assert not account.country_block is None
def test_update_account(username): anon = Agent() account = Account(username) data = anon.update(account) assert (not data is None) assert (not account.id is None) assert (not account.full_name is None) assert (not account.profile_pic_url is None) assert (not account.profile_pic_url_hd is None) assert (not account.biography is None) assert (not account.follows_count is None) assert (not account.followers_count is None) assert (not account.media_count is None) assert (not account.is_private is None) assert (not account.is_verified is None) assert (not account.country_block is None) Account.clear_cache()
def get_likes(self, media, pointer=None, count=20, settings={}, limit=50): if not isinstance(media, Media): raise TypeError("'media' must be Media type") if not isinstance(count, int): raise TypeError("'count' must be int type") if not isinstance(settings, dict): raise TypeError("'settings' must be dict type") if not isinstance(limit, int): raise TypeError("'limit' must be int type") self.update(media, settings) query_hash = "1cb6ec562846122743b61e492c85999f" if pointer: variables_string = '{{"shortcode":"{shortcode}","first":{first},"after":"{after}"}}' else: variables_string = '{{"shortcode":"{shortcode}","first":{first}}}' likes = [] if "params" in settings: settings["params"]["query_hash"] = query_hash else: settings["params"] = {"query_hash": query_hash} while True: data = {"shortcode": media.code, "first": min(limit, count)} if pointer: data["after"] = pointer settings["params"]["variables"] = variables_string.format(**data) if not "headers" in settings: settings["headers"] = { "X-Instagram-GIS": "%s:%s" % (self._rhx_gis, settings["params"]["variables"]), } else: settings["headers"]["X-Instagram-GIS"] = \ "%s:%s" % (self._rhx_gis, settings["params"]["variables"]) settings["headers"]["X-Instagram-GIS"] = \ hashlib.md5(settings["headers"]["X-Instagram-GIS"].encode("utf-8")).hexdigest() settings["headers"]["X-Requested-With"] = "XMLHttpRequest" response = self._get_request("https://www.instagram.com/graphql/query/", **settings) try: data = response.json()["data"]["shortcode_media"]["edge_liked_by"] edges = data["edges"] page_info = data["page_info"] media.likes_count = data["count"] for index in range(min(len(edges), count)): node = edges[index]["node"] account = Account(node["username"]) account.id = node["id"] account.profile_pic_url = node["profile_pic_url"] account.is_verified = node["is_verified"] account.full_name = node["full_name"] media.likes.add(account) likes.append(account) if page_info["has_next_page"]: pointer = page_info["end_cursor"] else: pointer = None if len(edges) < count and page_info["has_next_page"]: count = count-len(edges) variables_string = \ '{{"shortcode":"{shortcode}","first":{first},"after":"{after}"}}' else: return likes, pointer except (ValueError, KeyError): raise UnexpectedResponse(response.url, response.text)
def test_clear_cache_account(): account = Account("test") Account.clear_cache() assert(Account._cache == dict())
def get_comments(self, media, pointer=None, count=35, settings={}, limit=50): if not isinstance(settings, dict): raise TypeError("'settings' must be dict type") if not isinstance(count, int): raise TypeError("'count' must be int type") if not isinstance(media, Media): raise TypeError("'media' must be Media type") if not isinstance(limit, int): raise TypeError("'limit' must be int type") data = self.update(media, settings) query_hash = "33ba35852cb50da46f5b5e889df7d159" variables_string = '{{"shortcode":"{code}","first":{first},"after":"{after}"}}' comments = [] if pointer is None: try: data = data["edge_media_to_comment"] edges = data["edges"] page_info = data["page_info"] for index in range(min(len(edges), count)): node = edges[index]["node"] c = Comment(node["id"], media=media, owner=Account(node["owner"]["username"]), text=node["text"], created_at=node["created_at"]) media.comments.add(c) comments.append(c) if page_info["has_next_page"]: pointer = page_info["end_cursor"] if len(edges) < count and not pointer is None: count = count-len(edges) else: return comments, pointer except (ValueError, KeyError): raise UnexpectedResponse(media) if not "params" in settings: settings["params"] = {"query_hash": query_hash} else: settings["params"]["query_hash"] = query_hash while True: data = {"after": pointer, "code": media.code, "first": min(limit, count)} settings["params"]["variables"] = variables_string.format(**data) if not "headers" in settings: settings["headers"] = { "X-Instagram-GIS": "%s:%s" % (self._rhx_gis, settings["params"]["variables"]), } else: settings["headers"]["X-Instagram-GIS"] = \ "%s:%s" % (self._rhx_gis, settings["params"]["variables"]) settings["headers"]["X-Instagram-GIS"] = \ hashlib.md5(settings["headers"]["X-Instagram-GIS"].encode("utf-8")).hexdigest() settings["headers"]["X-Requested-With"] = "XMLHttpRequest" response = self._get_request("https://www.instagram.com/graphql/query/", **settings) try: data = response.json()["data"]["shortcode_media"]["edge_media_to_comment"] media.comments_count = data["count"] edges = data["edges"] page_info = data["page_info"] for index in range(min(len(edges), count)): node = edges[index]["node"] c = Comment(node["id"], media=media, owner=Account(node["owner"]["username"]), text=node["text"], created_at=node["created_at"]) media.comments.add(c) comments.append(c) if page_info["has_next_page"]: pointer = page_info["end_cursor"] else: pointer = None if len(edges) < count and page_info["has_next_page"]: count = count-len(edges) else: return comments, pointer except (ValueError, KeyError): raise UnexpectedResponse(response.url, response.text)
def test_clear_cache_account(id): account = Account(id) assert Account.cache == {id: account} Account.clear_cache() assert Account.cache == dict()
def get_followers(self, account=None, pointer=None, count=20, settings={}, limit=50): if account is None: account = self if not isinstance(settings, dict): raise TypeError("'settings' must be dict type") if not isinstance(count, int): raise TypeError("'count' must be int type") if not isinstance(account, Account): raise TypeError("'account' must be Account type") if not isinstance(limit, int): raise TypeError("'limit' must be int type") self.update(account, settings) if pointer is None: variables_string = '{{"id":"{id}","first":{first}}}' else: variables_string = '{{"id":"{id}","first":{first},"after":"{after}"}}' followers = [] while True: data = {"first": min(limit, count), "id": account.id} if not pointer is None: data["after"] = pointer response = self._graphql_request( query_hash="37479f2b8209594dde7facb0d904896a", variables=variables_string.format(**data), settings=settings, ) try: data = response.json()["data"]["user"]["edge_followed_by"] edges = data["edges"] page_info = data["page_info"] account.followers_count = data["count"] for index in range(min(len(edges), count)): node = edges[index]["node"] a = Account(node["username"]) a.id = node["id"] a.profile_pic_url = node["profile_pic_url"] a.is_verified = node["is_verified"] a.full_name = node["full_name"] account.followers.add(a) followers.append(a) if page_info["has_next_page"]: pointer = page_info["end_cursor"] else: pointer = None if len(edges) < count and page_info["has_next_page"]: count = count - len(edges) variables_string = '{{"id":"{id}","first":{first},"after":"{after}"}}' else: return followers, pointer except (ValueError, KeyError) as exception: raise UnexpectedResponse(exception, response.url, response.text)
def get_comments(self, media, pointer=None, count=35, settings={}, limit=50): if not isinstance(settings, dict): raise TypeError("'settings' must be dict type") if not isinstance(count, int): raise TypeError("'count' must be int type") if not isinstance(media, Media): raise TypeError("'media' must be Media type") if not isinstance(limit, int): raise TypeError("'limit' must be int type") data = self.update(media, settings) comments = [] if pointer is None: try: data = data["edge_media_to_comment"] edges = data["edges"] page_info = data["page_info"] for index in range(min(len(edges), count)): node = edges[index]["node"] c = Comment(node["id"], media=media, owner=Account(node["owner"]["username"]), text=node["text"], created_at=node["created_at"]) media.comments.add(c) comments.append(c) if page_info["has_next_page"]: pointer = page_info["end_cursor"] if len(edges) < count and not pointer is None: count = count - len(edges) else: return comments, pointer except (ValueError, KeyError) as exception: raise UnexpectedResponse(exception, media) variables_string = '{{"shortcode":"{code}","first":{first},"after":"{after}"}}' while True: data = { "after": pointer, "code": media.code, "first": min(limit, count) } response = self._graphql_request( query_hash="33ba35852cb50da46f5b5e889df7d159", variables=variables_string.format(**data), settings=settings, ) try: data = response.json( )["data"]["shortcode_media"]["edge_media_to_comment"] media.comments_count = data["count"] edges = data["edges"] page_info = data["page_info"] for index in range(min(len(edges), count)): node = edges[index]["node"] c = Comment(node["id"], media=media, owner=Account(node["owner"]["username"]), text=node["text"], created_at=node["created_at"]) media.comments.add(c) comments.append(c) if page_info["has_next_page"]: pointer = page_info["end_cursor"] else: pointer = None if len(edges) < count and page_info["has_next_page"]: count = count - len(edges) else: return comments, pointer except (ValueError, KeyError) as exception: raise UnexpectedResponse(exception, response.url, response.text)
def get_followers(self, account=None, pointer=None, count=20, settings={}, limit=50): if account is None: account = self if not isinstance(settings, dict): raise TypeError("'settings' must be dict type") if not isinstance(count, int): raise TypeError("'count' must be int type") if not isinstance(account, Account): raise TypeError("'account' must be Account type") if not isinstance(limit, int): raise TypeError("'limit' must be int type") self.update(account, settings) query_hash = "37479f2b8209594dde7facb0d904896a" if pointer is None: variables_string = '{{"id":"{id}","first":{first}}}' else: variables_string = '{{"id":"{id}","first":{first},"after":"{after}"}}' followers = [] if "params" in settings: settings["params"]["query_hash"] = query_hash else: settings["params"] = {"query_hash": query_hash} while True: data = {"first": min(limit, count), "id": account.id} if not pointer is None: data["after"] = pointer settings["params"]["variables"] = variables_string.format(**data) if not "headers" in settings: settings["headers"] = { "X-Instagram-GIS": "%s:%s" % (self._rhx_gis, settings["params"]["variables"]), } else: settings["headers"]["X-Instagram-GIS"] = \ "%s:%s" % (self._rhx_gis, settings["params"]["variables"]) settings["headers"]["X-Instagram-GIS"] = \ hashlib.md5(settings["headers"]["X-Instagram-GIS"].encode("utf-8")).hexdigest() settings["headers"]["X-Requested-With"] = "XMLHttpRequest" response = self._get_request("https://www.instagram.com/graphql/query/", **settings) try: data = response.json()["data"]["user"]["edge_followed_by"] edges = data["edges"] page_info = data["page_info"] account.followers_count = data["count"] for index in range(min(len(edges), count)): node = edges[index]["node"] a = Account(node["username"]) a.id = node["id"] a.profile_pic_url = node["profile_pic_url"] a.is_verified = node["is_verified"] a.full_name = node["full_name"] account.followers.add(a) followers.append(a) if page_info["has_next_page"]: pointer = page_info["end_cursor"] else: pointer = None if len(edges) < count and page_info["has_next_page"]: count = count-len(edges) variables_query = '{{"id":"{id}","first":{first},"after":"{after}"}}' else: return followers, pointer except (ValueError, KeyError): raise UnexpectedResponse(response.url, response.text)