def mem(): agent = WebAgent() account = Account("cosmopolitan_russia") media1, pointer = agent.get_media(account) count = 0 #счетчик колличества выполнений kek = [] #массив кеков for k in range(198): try: media2, pointer = agent.get_media(account, pointer=pointer, count=50, delay=0.4) for i in media2: kek.append({ 'text': i.caption, 'likes': i.likes_count, 'comments': i.comments_count }) count += 1 except: pass print(1) dmp = json.dumps(kek) with open('mem.json', 'a') as f: print(dmp, file=f)
def get_store_items(store_id, sale_hashtag, remove_hashtags=True): agent = WebAgent() account = Account(store_id) agent.update(account) assert not account.is_private, 'Account is private!' new_acc, created = InstagramAccount.objects.get_or_create(name=store_id) if not created: old_items = Item.objects.filter(account=new_acc) old_items.delete() media, pointer = agent.get_media(account, count=account.media_count) items = [] for post in media: description = post.caption or '' if sale_hashtag: # post_contain_hashtag = any([hashtag in description for hashtag in sale_hashtags]) post_contain_hashtag = sale_hashtag in description if not post_contain_hashtag: continue items.append(_get_item(post, remove_hashtags)) for item in items: if item.get('images'): Item.objects.create(account=new_acc, description=item.get('description'), image_url=item.get('images')[0]) return items
def instagram(userLogin): agent = WebAgent() d = {} try: account = Account(userLogin) try: firstTenPub, pointer = agent.get_media( account) #last ten publication otherPubs, pointer = agent.get_media(account, pointer=pointer, count=account.media_count, delay=1) #other places = [] #will be next time tenDaysLikes = 0 tenDaysComments = 0 allLikes = 0 allComments = 0 for i in firstTenPub: allLikes = allLikes + i.likes_count allComments = allComments + i.comments_count tenDaysLikes += i.likes_count tenDaysComments += i.comments_count for i in otherPubs: allLikes = allLikes + i.likes_count allComments = allComments + i.comments_count ln = len(otherPubs) + len(firstTenPub) d["Avatar: "] = account.profile_pic_url_hd d["Average quantity of likes from the last 10 publications: "] = str( tenDaysLikes // len(firstTenPub)) d["Average quantity of comments from the last 10 publications: "] = str( tenDaysComments // len(firstTenPub)) d["Average quantity of likes: "] = str(allLikes // ln) d["Average quantity of comments: "] = str(allComments // ln) d["Nickname: "] = account.username d["Quantity of posts: "] = str(account.media_count) d["Full Name: "] = account.full_name d["Quantity of follows: "] = str(account.follows_count) d["Quantity of followers: "] = str(account.followers_count) d["Is account private: "] = str(account.is_private) d["Account biography: "] = account.biography except: d["Avatar: "] = account.profile_pic_url_hd d["Nickname: "] = account.username d["Quantity of follows: "] = str(account.follows_count) d["Quantity of followers: "] = str(account.followers_count) d["Is account private: "] = str(account.is_private) d["Account biography: "] = account.biography except: d["Avatar: "] = "https://avatars.mds.yandex.net/get-zen_doc/125920/pub_5bf184d0e9397500ab3a1aec_5bf18854297efb00aaff9147/scale_600" d["Error: "] = "404" #username #information about user return d
def mem(account="ccacc_ount"): last = None agent = WebAgent() account = Account(account) media1 = agent.get_media(account, count=1) tm = time.time() while (1): if (time.time() > tm + 5): media1 = agent.get_media(account) tm = time.time() if last != media1[0].code: last = media1[0].code print(last)
def mem(account=sconfig.our, delay=sconfig.delay): last = None agent = WebAgent() account = Account(account) media1 = agent.get_media(account, count=1) tm = time.time() while (1): if (time.time() > tm + delay): media1 = agent.get_media(account) tm = time.time() if last != media1[0].code: last = media1[0].code print(last)
def get_last_inst(account=sconfig.our, cnt=5): result = [] agent = WebAgent() account = Account(account) media1 = agent.get_media(account, count=cnt) for i in media1[0]: result.append({ 'url': 'https://www.instagram.com/p/' + i.code + '/', 'time': i.date, 'text': i.caption, 'network': 'inst', 'id': i.owner }) return result
def count(name): while True: try: global counter agent = WebAgent() acc_name = name print("Вычисляем...") account = Account(acc_name) media1, pointer = agent.get_media(account) media2, pointer = agent.get_media(account, pointer=pointer, count=100, delay=1) posts = [] for i in media2: try: media = agent.get_likes(i, pointer=None, count=50, limit=5000, delay=10, settings=None) postlike = ([i, media[:1]]) posts.append(postlike) except: continue posts = dict(posts) counter = collections.Counter() string = [] for key, value in posts.items(): for i in value: for x in i: string.append(str(x)) for word in string: counter[word] += 1 return counter except instagram.exceptions.InternetException: logging.error("error: {}".format(sys.exc_info()[0])) counter = "0" break except instagram.exceptions.UnexpectedResponse: logging.error("error: {}".format(sys.exc_info())) counter = "0" break except ValueError: logging.error("error: {}".format(sys.exc_info()[0])) counter = "0" break
def __init__(self, resources): self.config = parse_config('instagram') self.resources = resources self.anon_agent = WebAgent() self.agent = {'agent': self.anon_agent, 'set_time': time.time()} # self.logged_agent = WebAgentAccount(self.config['LOGIN']) # self.logged_agent.auth(self.config['PASSWORD']) self.logged_agent = self.anon_agent self.agent = {'agent': self.logged_agent, 'set_time': time.time()} self.mdb = MongoDBStorage() self.downloader = Downloader('https://www.instagram.com') self.proxy_helper = self.downloader.proxy_helper self.use_proxy = False self.date_limit = False self.old_datetime = datetime.datetime.now() - datetime.timedelta( days=ast.literal_eval(self.config['scraping_date_limit'])) if not os.path.exists('../stream/instagram'): os.makedirs('../stream/instagram')
def Download(name): """ Download function This function download all function from acc Function takes loggin of accaunt's user """ dir_home = os.getcwd() print("Start work with {}".format(name)) try: print("\tExcess with open ") agent = WebAgent() account = Account(name) if account.is_private: print("\tAccount {} is private".format(account)) return 0 else: print("\tAccount {} is not private".format(account)) agent.update(account) agent.get_media(account) medias, point = agent.get_media(account, count=account.media_count) try: os.mkdir("_{}".format(name)) print("\t##Make _{} directory".format(name)) os.chdir("_{}".format(name)) print("\t##Into _{} directory".format(name)) except: os.chdir("_{}".format(name)) print("\t##Into _{} directory".format(name)) #ебануть проверку for i, media in enumerate(medias): if not media.is_video: download_Photo(media.display_url, i) write_comment_file(medias, account) except: print("Haven't excess with open - {}".format(name)) pass os.chdir(dir_home) print("\t##Go to {}".format(dir_home))
def parse_list(massive): for i in massive: agent = WebAgent() add_posts(i.username, agent)
import time import instagram from instagram import Account, Media, WebAgent, Story, Location, Tag, Comment agent = WebAgent() account = Account("acc") posts = agent.get_media(account, pointer=None, count=5) res = list(list(posts)[0]) i = 0 #counts of posts likes = [] likes_prev = [] comments = [] comments_prev = [] import requests while True: media = Media(res[i]) likes.append(media.likes_count) comments.append(media.comments_count) i += 1 if i == len(res): likes_prev = likes likes = [] comments_prev = comments comments = [] time.sleep(3) try: for i in range(len(res)): media = Media(res[i]) agent.update(media)
from instagram import Account, Media, WebAgent # from datetime import datetime # ts = int("1284101485") print(datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')) agent = WebAgent() account = Account("islambek.temirbek") #username d = {} #information about user media1, pointer = agent.get_media(account) #last ten publication media2, pointer = agent.get_media(account, pointer=pointer, count=account.media_count, delay=1) #other places = [] #will be next time cntfrslks = 0 cntfrscmnt = 0 cntforlikes = 0 cntforcomments = 0 for s in media1: cntforlikes = cntforlikes + s.likes_count cntforcomments = cntforcomments + s.comments_count cntfrslks = +s.likes_count cntfrscmnt = +s.comments_count print(s.caption) for i in media2: cntforlikes = cntforlikes + i.likes_count cntforcomments = cntforcomments + i.comments_count ln = len(media2) + len(media1) d["avgfstpublclks"] = cntfrslks // len(media1) d["avgfstpublccomments"] = cntfrscmnt // len(media1)
def get_anna_nails_content(): ''' func come in instagramm of human and take from it photos and comments to them ''' #global locker # # #if locker == 1: # return False #locker = BoundedSemaphore(value=1) #with locker: photos = [] try: #locker = 1 agent = WebAgent() account = Account("anna_nails_ani") agent.update(account) count_download_photos = 1000 #если в БД меньше 50 записей то считываем максимальное коичество записей иначе 10 #if MyWork.query.count() < 50: # count_download_photos = 300 #вычисляем дату на которой нужно остановится и не рассматривать дальше контент если это не первая загрузка date_to_stop_search = MyWork.query.order_by(MyWork.published.desc()).first() if date_to_stop_search: date_to_stop_search = date_to_stop_search.published - timedelta(days=14) #создаем агента на считывание с аккаунта данных по контенту media=agent.get_media(account, count=count_download_photos) # список работ для добавления в БД photos = [] #список комментариев которые не нужно сохранять в БД reklama_pattern_list = {'реклам', 'клам', 'услуг', 'предлагаю', 'пиар'} reclama_owner_list = {'master_and_model123'} count_photo = 0 for med in media: for m in med: if m != None and not m.is_video: # print('Код медиа:' f'{m.code}') photo_date = datetime.fromtimestamp(m.date) count_photo = count_photo + 1 #если достигли определенной даты после которй не нужно загружать больше не ищем if date_to_stop_search and photo_date <= date_to_stop_search: # print('Достигли максимальной даты поиска') break photo_date = photo_date.strftime('%Y-%m-%d %H:%M:%S') # print(f'{type(date_to_stop_search)}' + '!!!!!!!' + f'{type(photo_date)}') #print(f'Считано фоток: {count_photo}, id_site: {m.id}') comment=agent.get_comments(media=m, count=30) comments_for_photo = [] if comment[0]: for c in comment[0]: if is_reklam(f'{c.owner}', reclama_owner_list) == False: if is_reklam(c.text, reklama_pattern_list) == False: #print(f'{c.media}') comment_date = datetime.fromtimestamp(c.created_at) comment_date = comment_date.strftime('%Y-%m-%d %H:%M:%S') comments = {'id' : f'{c.id}', 'media': f'{c.media}', 'owner' : f'{c.owner}', 'text' : f'{c.text}', 'date' : comment_date} comments_for_photo.append(comments) item = {'id' : f'{m.id}', 'caption' : f'{m.caption}', 'code' : f'{m.code}', 'date' : photo_date, 'url' : f'{m.display_url}', 'owner' : f'{m.owner}', 'likes' : f'{m.likes_count}', 'comments' : comments_for_photo} photos.append(item) except Exception as e: pass print(f"Error: Type None! {e}") except(AttributeError): print("Atribute Error!") #locker=0 #try: save_my_work(photos)
from telebot import TeleBot import schedule, time, datetime from instagram import Account, WebAgent bot = TeleBot('1228395330:AAEPH5rF1oNLXiuFBSZ26aosz-g_n3AiFfk') with open("posted_photos.txt") as file: data = [row.strip() for row in file] agent = WebAgent() account = Account("erotic_model_girls") agent.update(account) media = agent.get_media(account, count=9999)[0] count = 1 now = datetime.datetime.now() def job(): global count global media while True: m = media[-count] count += 1 if m.id in data or m.is_video: continue else: data.append(m.display_url) with open("posted_photos.txt", "a") as a_file: