def test_update_location(id): anon = Agent() location = Location(id) data = anon.update(location) Location.clear_cache()
def test_update_tag(name): anon = Agent() tag = Tag(name) data = anon.update(tag) Tag.clear_cache()
def test_update(): anon = Agent() anon.update() assert (not getattr(anon, "_rhx_gis", None) is None) assert (not getattr(anon, "_csrf_token", None) is None)
def profile_exists(username): try: agent = Agent() account = Account(username) agent.update(account) return True except: return False
def test_get_likes(shortcode): anon = Agent() media = Media(shortcode) data, pointer = anon.get_likes(media) assert (media.likes_count >= len(data)) Media.clear_cache()
def test_update_photo_set(shortcode): anon = Agent() photo_set = Media(shortcode) data = anon.update(photo_set) assert (not photo_set.is_video) Media.clear_cache()
def test_update_video(shortcode): anon = Agent() video = Media(shortcode) data = anon.update(video) assert (video.is_video) Media.clear_cache()
def test_get_comments_long(count, shortcode): anon = Agent() media = Media(shortcode) data, pointer = anon.get_comments(media, count=count) assert (min(media.comments_count, count) == len(data)) assert ((pointer is None) == (media.likes_count <= count)) Media.clear_cache()
def test_get_media_account(count, username): anon = Agent() account = Account(username) data, pointer = anon.get_media(account, count=count) assert (min(account.media_count, count) == len(data)) assert ((pointer is None) == (account.media_count <= count)) Account.clear_cache() Media.clear_cache()
def test_get_media_location_long(count, id): anon = Agent() location = Location(id) data, pointer = anon.get_media(location, count=count) assert (min(location.media_count, count) == len(data)) assert ((pointer is None) == (location.media_count <= count)) Location.clear_cache() Media.clear_cache()
def test_get_media_tag_long(count, name): anon = Agent() tag = Tag(name) data, pointer = anon.get_media(tag, count=count) assert (min(tag.media_count, count) == len(data)) assert ((pointer is None) == (tag.media_count <= count)) Tag.clear_cache() Media.clear_cache()
def test_get_comments_pointer(count, shortcode): anon = Agent() media = Media(shortcode) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_comments(media, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (media.likes_count <= count)) Media.clear_cache()
def test_get_media_location_pointer(count, id): anon = Agent() location = Location(id) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_media(location, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (location.media_count <= count)) Location.clear_cache() Media.clear_cache()
def test_get_media_tag_pointer(count, name): anon = Agent() tag = Tag(name) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_media(tag, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (tag.media_count <= count)) Tag.clear_cache() Media.clear_cache()
def test_get_media_account_pointer(count, username): anon = Agent() account = Account(username) pointer = None data = [] for i in range(count): tmp, pointer = anon.get_media(account, pointer=pointer) data.extend(tmp) assert ((pointer is None) == (account.media_count <= count)) Account.clear_cache() Media.clear_cache()
def get_post_comments(post, num=None, path=None): agent = Agent() comments = set() pointer = None if num == None: comments_count = int(post.comments_count) else: comments_count = num limit = 50 batch_num = math.ceil(comments_count / limit) for i in range(batch_num): if i == batch_num - 1: count = comments_count - limit * (batch_num - 1) batch_comments, pointer = agent.get_comments(post, pointer=pointer, count=count) else: batch_comments, pointer = agent.get_comments(post, pointer=pointer, count=limit) for j, item in enumerate(batch_comments): comments.add(Comment(item.id)) comments_info = {} for i, item in enumerate(comments): comment_info = copy.copy(item) comment_info.media = str(comment_info.media) comment_info.owner = str(comment_info.owner) comments_info[i] = comment_info.__dict__ comments_dict = {"comments": comments_info} comments_json = json.dumps(comments_dict, indent=2) if path == None: path = './data' pathlib.Path(path + '/comments').mkdir(parents=True, exist_ok=True) postcode = post.code filename = path + '/comments/' + postcode + '__last_comments.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(comments_json) return comments
def get_wave_warned(posts, users_profiles): agent = Agent() warned = {} for post in posts: warned.update( InstaHelper.get_warned_by_post(agent, post, users_profiles)) return warned
def get_posts_by_tag_name(tagname, num=None, path=None): agent = Agent() agent.update(Tag(tagname)) tag = Tag(tagname) media = set() pointer = None if num == None: media_count = tag.media_count else: media_count = num limit = 50 batch_num = math.ceil(media_count / limit) for i in range(batch_num): if i == batch_num - 1: count = media_count - limit * (batch_num - 1) batch_media, pointer = agent.get_media(tag, pointer=pointer, count=count) else: batch_media, pointer = agent.get_media(tag, pointer=pointer, count=limit) for j, item in enumerate(batch_media): print("Getting media: " + str(i * 50 + j + 1) + " / " + str(media_count)) agent.update(Media(item.code)) media.add(Media(item.code)) media_posts = {} for i, item in enumerate(media): post_info = copy.copy(item) post_info.likes = dict(post_info.likes) post_info.comments = dict(post_info.comments) post_info.location = str(post_info.location) media_posts[i] = post_info.__dict__ media_dict = {"posts": media_posts} media_json = json.dumps(media_dict, indent=2) print(media_json) if path == None: path = './data/tag__' + tagname pathlib.Path(path).mkdir(parents=True, exist_ok=True) filename = path + '/tag__' + tagname + '__last_posts.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(media_json) return media
def get_posts_for_wave(usernames): links = [] posts = [] agent = Agent() for user in usernames: post, link = InstaHelper._get_last_post_link(agent, user) posts.append(post) links.append(link) return [str(x) for x in posts], links
def get_picture(post, path): agent = Agent() name = post.owner.login picture = post.display_url pathlib.Path(path + '/pictures').mkdir(parents=True, exist_ok=True) picname = path + '/pictures/' + name + '__' + post.code + '.jpg' urllib.request.urlretrieve(picture, picname) return picture
def get_tag_info(tagname, path=None): agent = Agent() agent.update(Tag(tagname)) tag = Tag(tagname) tag_info = copy.copy(tag) tag_info.media = dict(tag_info.media) tag_dict = {"tag": tag_info.__dict__} tag_json = json.dumps(tag_dict, indent=2) if path == None: path = './data/tag__' + tagname pathlib.Path(path).mkdir(parents=True, exist_ok=True) filename = path + '/tag__' + tagname + '__tag_info.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(tag_json) return tag
def test_update_account(username): anon = Agent() account = Account(username) data = anon.update(account) assert (not data is None) assert (not account.id is None) assert (not account.full_name is None) assert (not account.profile_pic_url is None) assert (not account.profile_pic_url_hd is None) assert (not account.biography is None) assert (not account.follows_count is None) assert (not account.followers_count is None) assert (not account.media_count is None) assert (not account.is_private is None) assert (not account.is_verified is None) assert (not account.country_block is None) Account.clear_cache()
def get_account_info(username, path=None): agent = Agent() agent.update(Account(username)) account = Account(username) account_info = copy.copy(account) account_info.media = dict(account_info.media) account_info.follows = dict(account_info.follows) account_info.followers = dict(account_info.followers) account_dict = {"account": account_info.__dict__} account_json = json.dumps(account_dict, indent=2) if path == None: path = './data/' + username pathlib.Path(path).mkdir(parents=True, exist_ok=True) filename = path + '/' + username + '__account_info.json' with open(filename, 'w', newline='', encoding='utf8') as f: f.write(account_json) return account
def agent(settings): return Agent(settings=settings)
import os import instaparser from instaparser.agents import Agent from instaparser.entities import Tag, Media def preprocess_text(text): text = text.lower().replace("ё", "е") text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))', 'URL', text) text = re.sub('@[^\s]+', 'USER', text) text = re.sub('#[^\s]+', 'TAG', text) text = re.sub('[^a-zA-Zа-яА-Я1-9]+', ' ', text) text = re.sub('[a-z]+', ' ', text) text = re.sub('\n', ' ', text) return text.strip() agent = Agent() def get_comments_by_tag(word, numberOfPost): try: os.mkdir("{}".format(word)) except FileExistsError: pass tag = Tag(word) medias = agent.get_media(obj=tag, count = numberOfPost ) #get tuple with post identifiers and tag identifier counter = 0 for media in medias[0]: comments = "" post = agent.update(obj=Media(media))#get post with comments for t in post["edge_media_to_comment"]["edges"]: comment = str(t["node"]["text"]) #create str from comments comment = preprocess_text(comment)
def has_post(username): agent = Agent() account = Account(username) media1, _ = agent.get_media(account) return len(media1) != 0
def is_profile_private(username): agent = Agent() account = Account(username) agent.update(account) return account.is_private
from random import random from time import sleep from openpyxl import Workbook from collections import namedtuple from datetime import datetime #VIPS = ['pryanya93','ksenia_lykina_tennis','golubovskaya_polina','pribylova96','lera.solovyeva','pivovarovaofficial', # 'anastasia__frolova','kasatkina','polinaleykina'] VIPS = [ 'pivovarovaofficial', 'anastasia__frolova', 'kasatkina', 'polinaleykina' ] #VIP = 'mariasharapova' #VIP = 'katemakarova1' FRIENDS = 20 # Количество друзей с наибольшей перепиской agent = Agent() #exception_manager.repeats = 10 # Количество повторов при ошибке - поставил в библиотеке for vip in VIPS: print(datetime.now().strftime("%H:%M:%S"), vip, '------------------------------') account = Account(vip) all_medias_codes = [] all_medias = [] try: medias, pointer = agent.get_media(account) except Exception as e: print(datetime.now().strftime("%H:%M:%S"), 'Сбой') medias, pointer = agent.get_media(account) for media in medias: