def run(self): for resource in self.resources: logging.info(f'start to parse {resource} instagram resource') self.resource = Account(resource) new_posts = self.get_new_posts() self.process_posts(new_posts) logging.info(f'{resource} instagram resource is parsed')
def mem(): agent = WebAgent() account = Account("cosmopolitan_russia") media1, pointer = agent.get_media(account) count = 0 #счетчик колличества выполнений kek = [] #массив кеков for k in range(198): try: media2, pointer = agent.get_media(account, pointer=pointer, count=50, delay=0.4) for i in media2: kek.append({ 'text': i.caption, 'likes': i.likes_count, 'comments': i.comments_count }) count += 1 except: pass print(1) dmp = json.dumps(kek) with open('mem.json', 'a') as f: print(dmp, file=f)
def get_store_items(store_id, sale_hashtag, remove_hashtags=True): agent = WebAgent() account = Account(store_id) agent.update(account) assert not account.is_private, 'Account is private!' new_acc, created = InstagramAccount.objects.get_or_create(name=store_id) if not created: old_items = Item.objects.filter(account=new_acc) old_items.delete() media, pointer = agent.get_media(account, count=account.media_count) items = [] for post in media: description = post.caption or '' if sale_hashtag: # post_contain_hashtag = any([hashtag in description for hashtag in sale_hashtags]) post_contain_hashtag = sale_hashtag in description if not post_contain_hashtag: continue items.append(_get_item(post, remove_hashtags)) for item in items: if item.get('images'): Item.objects.create(account=new_acc, description=item.get('description'), image_url=item.get('images')[0]) return items
def instagram(userLogin): agent = WebAgent() d = {} try: account = Account(userLogin) try: firstTenPub, pointer = agent.get_media( account) #last ten publication otherPubs, pointer = agent.get_media(account, pointer=pointer, count=account.media_count, delay=1) #other places = [] #will be next time tenDaysLikes = 0 tenDaysComments = 0 allLikes = 0 allComments = 0 for i in firstTenPub: allLikes = allLikes + i.likes_count allComments = allComments + i.comments_count tenDaysLikes += i.likes_count tenDaysComments += i.comments_count for i in otherPubs: allLikes = allLikes + i.likes_count allComments = allComments + i.comments_count ln = len(otherPubs) + len(firstTenPub) d["Avatar: "] = account.profile_pic_url_hd d["Average quantity of likes from the last 10 publications: "] = str( tenDaysLikes // len(firstTenPub)) d["Average quantity of comments from the last 10 publications: "] = str( tenDaysComments // len(firstTenPub)) d["Average quantity of likes: "] = str(allLikes // ln) d["Average quantity of comments: "] = str(allComments // ln) d["Nickname: "] = account.username d["Quantity of posts: "] = str(account.media_count) d["Full Name: "] = account.full_name d["Quantity of follows: "] = str(account.follows_count) d["Quantity of followers: "] = str(account.followers_count) d["Is account private: "] = str(account.is_private) d["Account biography: "] = account.biography except: d["Avatar: "] = account.profile_pic_url_hd d["Nickname: "] = account.username d["Quantity of follows: "] = str(account.follows_count) d["Quantity of followers: "] = str(account.followers_count) d["Is account private: "] = str(account.is_private) d["Account biography: "] = account.biography except: d["Avatar: "] = "https://avatars.mds.yandex.net/get-zen_doc/125920/pub_5bf184d0e9397500ab3a1aec_5bf18854297efb00aaff9147/scale_600" d["Error: "] = "404" #username #information about user return d
def get_new_posts(self, resource): self.resource = Account(resource) stored_posts = self.get_parsed_posts() new_posts = [] pointer = None self.insta_request(data=self.resource) posts_count = self.resource.media_count posts_scraped = 0 while posts_count > posts_scraped: try: posts, pointer = self.insta_request(pointer=pointer) posts_scraped += len(posts) logging.info(f'scraper {posts_scraped} posts') for post in posts: if post.__str__() not in stored_posts: new_posts.append(post) else: raise StopIteration except StopIteration: break return new_posts
def mem(account="ccacc_ount"): last = None agent = WebAgent() account = Account(account) media1 = agent.get_media(account, count=1) tm = time.time() while (1): if (time.time() > tm + 5): media1 = agent.get_media(account) tm = time.time() if last != media1[0].code: last = media1[0].code print(last)
def mem(account=sconfig.our, delay=sconfig.delay): last = None agent = WebAgent() account = Account(account) media1 = agent.get_media(account, count=1) tm = time.time() while (1): if (time.time() > tm + delay): media1 = agent.get_media(account) tm = time.time() if last != media1[0].code: last = media1[0].code print(last)
def get_last_inst(account=sconfig.our, cnt=5): result = [] agent = WebAgent() account = Account(account) media1 = agent.get_media(account, count=cnt) for i in media1[0]: result.append({ 'url': 'https://www.instagram.com/p/' + i.code + '/', 'time': i.date, 'text': i.caption, 'network': 'inst', 'id': i.owner }) return result
def createAclist(agent,agentpass,accountName): agent = WebAgentAccount(agent) agent.auth(agentpass) account = Account(accountName) agent.update() print("please waite...") f, pointer = agent.get_followers(account) f2, pointer = agent.get_followers(account,pointer=pointer, count=account.followers_count, delay=1) followers = f+f2 file1 = open('saves/'+accountName,'w') for el in followers: print(el) file1.write(str(el)+'\n')
def count(name): while True: try: global counter agent = WebAgent() acc_name = name print("Вычисляем...") account = Account(acc_name) media1, pointer = agent.get_media(account) media2, pointer = agent.get_media(account, pointer=pointer, count=100, delay=1) posts = [] for i in media2: try: media = agent.get_likes(i, pointer=None, count=50, limit=5000, delay=10, settings=None) postlike = ([i, media[:1]]) posts.append(postlike) except: continue posts = dict(posts) counter = collections.Counter() string = [] for key, value in posts.items(): for i in value: for x in i: string.append(str(x)) for word in string: counter[word] += 1 return counter except instagram.exceptions.InternetException: logging.error("error: {}".format(sys.exc_info()[0])) counter = "0" break except instagram.exceptions.UnexpectedResponse: logging.error("error: {}".format(sys.exc_info())) counter = "0" break except ValueError: logging.error("error: {}".format(sys.exc_info()[0])) counter = "0" break
def add_posts(account, agent): account = Account(account) media = agent.get_media(account, pointer=None, count=50, limit=200, delay=0) user, created = Akk.objects.get_or_create(username=account.username, user_id=account.id, full_name=account.full_name) print(media) for i in media[0]: if i.is_video: pass # Post.objects.update_or_create( # media=i.video_url, # text=i.caption, # date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(i.date)), # post_url=BASE_INST_URL + i.base_url + i.code, # post_id=i.id, # account=user # ) elif i.is_album: # print('\nЭто альбом\n') album = [] for j in i.album: if j.is_video: album.append(j.video_url) elif j.is_ad: pass elif j.is_album: pass else: album.append(j.display_url) # Post.objects.update_or_create( # media=album, # text=i.caption, # date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(i.date)), # post_url=BASE_INST_URL + i.base_url + i.code, # post_id=i.id, # account=user, # ) print(album) elif i.is_ad: pass else: pass
def Download(name): """ Download function This function download all function from acc Function takes loggin of accaunt's user """ dir_home = os.getcwd() print("Start work with {}".format(name)) try: print("\tExcess with open ") agent = WebAgent() account = Account(name) if account.is_private: print("\tAccount {} is private".format(account)) return 0 else: print("\tAccount {} is not private".format(account)) agent.update(account) agent.get_media(account) medias, point = agent.get_media(account, count=account.media_count) try: os.mkdir("_{}".format(name)) print("\t##Make _{} directory".format(name)) os.chdir("_{}".format(name)) print("\t##Into _{} directory".format(name)) except: os.chdir("_{}".format(name)) print("\t##Into _{} directory".format(name)) #ебануть проверку for i, media in enumerate(medias): if not media.is_video: download_Photo(media.display_url, i) write_comment_file(medias, account) except: print("Haven't excess with open - {}".format(name)) pass os.chdir(dir_home) print("\t##Go to {}".format(dir_home))
def fl(): global fp_name global first_profile for i in ac[0]: if i == fp_name: first_profile = False if fp_name == "": fp_name = i a = Account(i) agent.update(a) bio = a.biography if ("CEO" in bio or "Founder" in bio or "founder" in bio or "owner" in bio or "Owner" in bio or "entrepreneur" in bio or "Entrepreneur" in bio): link = f"https://www.instagram.com/{a}/" requests.post(API_LINKS, data={"link": link, "source": INSTA_ACC}) print(link) ab = agent.get_followers(account=base_acc, pointer=ac[1], count=5, limit=2, delay=5) return ab
import instagram from instagram.agents import WebAgent from instagram import Account, Media, Location agent = WebAgent() account = Account("zuck") loc = Location(17326249) agent.update(account) media = agent.get_media(loc, count=50) print(media)
import time import instagram from instagram import Account, Media, WebAgent, Story, Location, Tag, Comment agent = WebAgent() account = Account("acc") posts = agent.get_media(account, pointer=None, count=5) res = list(list(posts)[0]) i = 0 #counts of posts likes = [] likes_prev = [] comments = [] comments_prev = [] import requests while True: media = Media(res[i]) likes.append(media.likes_count) comments.append(media.comments_count) i += 1 if i == len(res): likes_prev = likes likes = [] comments_prev = comments comments = [] time.sleep(3) try: for i in range(len(res)): media = Media(res[i]) agent.update(media)
from instagram import Account, Media, WebAgent # from datetime import datetime # ts = int("1284101485") print(datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')) agent = WebAgent() account = Account("islambek.temirbek") #username d = {} #information about user media1, pointer = agent.get_media(account) #last ten publication media2, pointer = agent.get_media(account, pointer=pointer, count=account.media_count, delay=1) #other places = [] #will be next time cntfrslks = 0 cntfrscmnt = 0 cntforlikes = 0 cntforcomments = 0 for s in media1: cntforlikes = cntforlikes + s.likes_count cntforcomments = cntforcomments + s.comments_count cntfrslks = +s.likes_count cntfrscmnt = +s.comments_count print(s.caption) for i in media2: cntforlikes = cntforlikes + i.likes_count cntforcomments = cntforcomments + i.comments_count ln = len(media2) + len(media1) d["avgfstpublclks"] = cntfrslks // len(media1) d["avgfstpublccomments"] = cntfrscmnt // len(media1)
def get_anna_nails_content(): ''' func come in instagramm of human and take from it photos and comments to them ''' #global locker # # #if locker == 1: # return False #locker = BoundedSemaphore(value=1) #with locker: photos = [] try: #locker = 1 agent = WebAgent() account = Account("anna_nails_ani") agent.update(account) count_download_photos = 1000 #если в БД меньше 50 записей то считываем максимальное коичество записей иначе 10 #if MyWork.query.count() < 50: # count_download_photos = 300 #вычисляем дату на которой нужно остановится и не рассматривать дальше контент если это не первая загрузка date_to_stop_search = MyWork.query.order_by(MyWork.published.desc()).first() if date_to_stop_search: date_to_stop_search = date_to_stop_search.published - timedelta(days=14) #создаем агента на считывание с аккаунта данных по контенту media=agent.get_media(account, count=count_download_photos) # список работ для добавления в БД photos = [] #список комментариев которые не нужно сохранять в БД reklama_pattern_list = {'реклам', 'клам', 'услуг', 'предлагаю', 'пиар'} reclama_owner_list = {'master_and_model123'} count_photo = 0 for med in media: for m in med: if m != None and not m.is_video: # print('Код медиа:' f'{m.code}') photo_date = datetime.fromtimestamp(m.date) count_photo = count_photo + 1 #если достигли определенной даты после которй не нужно загружать больше не ищем if date_to_stop_search and photo_date <= date_to_stop_search: # print('Достигли максимальной даты поиска') break photo_date = photo_date.strftime('%Y-%m-%d %H:%M:%S') # print(f'{type(date_to_stop_search)}' + '!!!!!!!' + f'{type(photo_date)}') #print(f'Считано фоток: {count_photo}, id_site: {m.id}') comment=agent.get_comments(media=m, count=30) comments_for_photo = [] if comment[0]: for c in comment[0]: if is_reklam(f'{c.owner}', reclama_owner_list) == False: if is_reklam(c.text, reklama_pattern_list) == False: #print(f'{c.media}') comment_date = datetime.fromtimestamp(c.created_at) comment_date = comment_date.strftime('%Y-%m-%d %H:%M:%S') comments = {'id' : f'{c.id}', 'media': f'{c.media}', 'owner' : f'{c.owner}', 'text' : f'{c.text}', 'date' : comment_date} comments_for_photo.append(comments) item = {'id' : f'{m.id}', 'caption' : f'{m.caption}', 'code' : f'{m.code}', 'date' : photo_date, 'url' : f'{m.display_url}', 'owner' : f'{m.owner}', 'likes' : f'{m.likes_count}', 'comments' : comments_for_photo} photos.append(item) except Exception as e: pass print(f"Error: Type None! {e}") except(AttributeError): print("Atribute Error!") #locker=0 #try: save_my_work(photos)
bio = a.biography if ("CEO" in bio or "Founder" in bio or "founder" in bio or "owner" in bio or "Owner" in bio or "entrepreneur" in bio or "Entrepreneur" in bio): link = f"https://www.instagram.com/{a}/" requests.post(API_LINKS, data={"link": link, "source": INSTA_ACC}) print(link) ab = agent.get_followers(account=base_acc, pointer=ac[1], count=5, limit=2, delay=5) return ab login = Account(USERNAME) pw = PASSWORD base_acc = Account(INSTA_ACC) settings = {} agent = WebAgentAccount(login) agent_url = "" agent_code = "" auth_failed = True try: agent.auth(pw) except CheckpointException as e: is_fail = True agent_url = e.checkpoint_url
class InstagramParser: def __init__(self, resources): self.config = parse_config('instagram') self.resources = resources self.anon_agent = WebAgent() self.agent = {'agent': self.anon_agent, 'set_time': time.time()} # self.logged_agent = WebAgentAccount(self.config['LOGIN']) # self.logged_agent.auth(self.config['PASSWORD']) self.logged_agent = self.anon_agent self.agent = {'agent': self.logged_agent, 'set_time': time.time()} self.mdb = MongoDBStorage() self.downloader = Downloader('https://www.instagram.com') self.proxy_helper = self.downloader.proxy_helper self.use_proxy = False self.date_limit = False self.old_datetime = datetime.datetime.now() - datetime.timedelta( days=ast.literal_eval(self.config['scraping_date_limit'])) if not os.path.exists('../stream/instagram'): os.makedirs('../stream/instagram') def get_settings(self, request_start_time, proxy_only=False): if proxy_only or self.use_proxy and time.time( ) - self.previous_local_request < 11 * 60: chosen_proxy = self.proxy_helper.get_proxy() else: chosen_proxy = None if self.use_proxy and self.agent['set_time'] < request_start_time: with self.proxy_helper.lock: self.agent = { 'agent': self.anon_agent, 'set_time': time.time() } self.use_proxy = False logging.info('stop to use proxies') settings = { "proxies": { "http": chosen_proxy, "https": chosen_proxy, }, 'timeout': 30 } return chosen_proxy, settings def swap_agent(self): if self.agent == self.anon_agent: self.agent = {'agent': self.logged_agent, 'set_time': time.time()} else: self.agent = {'agent': self.anon_agent, 'set_time': time.time()} def insta_request(self, pointer=None, data=None, proxy_only=False): @self.proxy_helper.exception_decorator def request_to_instagram(proxy, setting, pointer=None, posts=None): if data: self.agent['agent'].update(data, settings=setting) else: posts, pointer = self.agent['agent'].get_media( self.resource, pointer=pointer, settings=setting, delay=1) return posts, pointer request_start_time = time.time() while True: chosen_proxy, settings = self.get_settings( proxy_only=proxy_only, request_start_time=request_start_time) request_start_time = time.time() try: posts, pointer = request_to_instagram(proxy=chosen_proxy, setting=settings, pointer=pointer) break except (InternetException, UnexpectedResponse) as e: with self.proxy_helper.lock: if self.agent['set_time'] < request_start_time: if isinstance(e, UnexpectedResponse): self.swap_agent() if not chosen_proxy: self.use_proxy = True self.previous_local_request = time.time() logging.info('start to use proxies') self.agent = { 'agent': self.logged_agent, 'set_time': time.time() } if posts: return posts, pointer def get_new_posts(self, resource): self.resource = Account(resource) stored_posts = self.get_parsed_posts() new_posts = [] pointer = None self.insta_request(data=self.resource) posts_count = self.resource.media_count posts_scraped = 0 while posts_count > posts_scraped: try: posts, pointer = self.insta_request(pointer=pointer) posts_scraped += len(posts) logging.info(f'scraper {posts_scraped} posts') for post in posts: if post.__str__() not in stored_posts: new_posts.append(post) else: raise StopIteration except StopIteration: break return new_posts def get_parsed_posts(self): return self.mdb.get_instagram_posts(self.resource.__str__()) def parse_album(self, album): album_data = dict() album_pages = [] for album_page in album.album: self.insta_request(data=album_page) album_pages.append(album_page.resources[-1]) album_data['album_pages'] = album_pages return album_data def parse_video(self, video): video_data = dict() video_data['video_url'] = self.downloader.download_file( video.video_url, 'mp4') return video_data def parse_singe_post(self, single_post): single_post_data = dict() return single_post_data def get_mandatory_post_data(self, post): post_data = dict() post_data['_id'] = post.__str__() post_data['preview_image'] = self.downloader.download_file( post.resources[-1], 'jpg') if post.is_video else post.resources[-1] post_data['description'] = post.caption post_data['date'] = post.date post_data['likes_count'] = post.likes_count post_data['link'] = 'https://www.instagram.com/p/{post_id}'.format( post_id=post_data['_id']) post_data['resource'] = self.resource.__str__() post_data['icon'] = self.resource.profile_pic_url post_data['is_album'] = post.is_album post_data['is_video'] = post.is_video return post_data def parse_post(self, post): self.insta_request(data=post) post_data = self.get_mandatory_post_data(post) if post_data['date'] < self.old_datetime.timestamp(): self.date_limit = True if post.is_album: post_data.update(self.parse_album(post)) elif post.is_video: post_data.update(self.parse_video(post)) else: post_data.update(self.parse_singe_post(post)) return post_data def process_posts(self, posts): pool_size = ast.literal_eval(self.config['pool_size']) chunks = list(chunkify(posts, pool_size)) for n, chunk in enumerate(chunks): logging.info('processing {}/{} posts batch'.format( n + 1, len(chunks))) chunk = chunk[::-1] pool = ThreadPool(pool_size) parsed_posts = [ post for post in pool.map(self.parse_post, chunk) if post['date'] > self.old_datetime.timestamp() ] pool.close() self.mdb.add_new_posts(parsed_posts) if self.date_limit: logging.info('instagram resource {} reach day limit'.format( self.resource)) return def run(self): for resource in self.resources: logging.info(f'start to parse {resource} instagram resource') self.resource = Account(resource) new_posts = self.get_new_posts() self.process_posts(new_posts) logging.info(f'{resource} instagram resource is parsed')
from telebot import TeleBot import schedule, time, datetime from instagram import Account, WebAgent bot = TeleBot('1228395330:AAEPH5rF1oNLXiuFBSZ26aosz-g_n3AiFfk') with open("posted_photos.txt") as file: data = [row.strip() for row in file] agent = WebAgent() account = Account("erotic_model_girls") agent.update(account) media = agent.get_media(account, count=9999)[0] count = 1 now = datetime.datetime.now() def job(): global count global media while True: m = media[-count] count += 1 if m.id in data or m.is_video: continue else: data.append(m.display_url) with open("posted_photos.txt", "a") as a_file: