def test_pr_122(self): """ Feature implemented by @susundberg. Set the access time and modification time of a downloaded media according to its IG date. """ looter = ProfileLooter('franz_ferdinand', template='{code}', session=self.session) info = looter.get_post_info('BY77tSfBnRm') # Test download_post post_looter = PostLooter('BY77tSfBnRm', session=self.session, template='{code}') post_looter.download(self.destfs) stat = self.destfs.getdetails('BY77tSfBnRm.jpg') self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp']) # Test download_pictures pic = next(m for m in looter.medias() if not m['is_video']) looter.download_pictures(self.destfs, media_count=1) stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode'])) self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp']) # Test download_videos vid = next(m for m in looter.medias() if m['is_video']) looter.download_videos(self.destfs, media_count=1) stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode'])) self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp'])
def instagram_p(url): looter = PostLooter(url) # Скачивание picture_id = looter.info['id'] looter.download('./pictures/') file = open('./pictures/{}.jpg'.format(picture_id), 'rb') return file
def instagram_v(url): looter = PostLooter(url) # Скачивание video_id = looter.info['id'] looter.download_videos('./videos/') file = open('./videos/{}.mp4'.format(video_id), 'rb') return file
def get_insta(request): homedir=os.path.expanduser("~") dirs=homedir+'/Downloads' if request.method=='POST' and 'get_video' in request.POST: link=request.POST.get('links') PostLooter(link).download_videos(dirs) elif request.method=='POST' and 'get_photo' in request.POST: link=request.POST.get('links') PostLooter(link).download(dirs) return redir(request)
def get_post(post_id): print("get post : %s" % post_id) looter = PostLooter(post_id) post = looter.get_post_info(post_id) res = { "id": post["shortcode"], "user_name": post["owner"]["username"], "image_url": post["display_url"], } return res
def test_issue_094(self): """Thanks to @jeanmarctst for raising this issue. Make sure caption is properly extracted from images downloaded from a post code and written to the metadata. """ looter = PostLooter("BY77tSfBnRm", add_metadata=True, template='{code}', session=self.session) looter.download(self.destfs) metadata = piexif.load(self.destfs.getbytes("BY77tSfBnRm.jpg"), True) self.assertTrue(metadata['Exif']['UserComment'])
def download_instagram_vid(post_id): """Attempt to download the video""" try: looter = PostLooter(post_id) except ValueError: print("Couldn't get video from the link. The user's profile may be private.") sys.exit(1) info = looter.get_post_info(post_id) video = info['id'] username = info['owner']['username'] return video, username, looter.download_videos(destination)
def test_pr_122_download_post(self): """Feature implemented by @susundberg. Set the access time and modification time of a downloaded media according to its IG date. """ code = 'BY77tSfBnRm' post_looter = PostLooter(code, session=self.session, template='{code}') info = post_looter.get_post_info(code) post_looter.download(self.destfs) stat = self.destfs.getdetails('{}.jpg'.format(code)) self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp'])
def instalooterw(): # looter = ProfileLooter(username="******") # # looter.download(destination='~/Pictures', media_count=1, timeframe=(datetime.now(), datetime.now()-timedelta(5))) # looter.download_videos('~/Pictures', media_count=1) # f = open("/home/smkh_l/projects/instagram_helper/text.txt", "r") # f1 = f.read() # count = 0 # f1 = f1.split() # for i in f1: # if 'text' in i: # count += 1 # print(count) # users = set() # # for media in looter.medias(): # # post_info = looter.get_post_info(media['shortcode']) # # break # post_info = looter.get_post_info('CGSDCgLhNbq') # # for comment in post_info['edge_media_to_preview_comment']['edges']: # user = comment['node']['owner']['username'] # users.add(user) # print(users) looter = PostLooter( code= "https://www.instagram.com/p/CGW8ClsFRip/?utm_source=ig_web_copy_link")
def test_issue_184(self, _): """Feature request by @ghost. Allow downloading a post directly from its URL. """ looter = PostLooter("https://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k") self.assertEqual(looter.code, "BJlIB9WhdRn")
def test_issue_039(self): """ Feature request by @verafide Checks that all pictures are downloaded from posts with more than one picture. """ looter = PostLooter("BRHecUuFhPl", session=self.session) looter.download(self.destfs) self.assertEqual( set(self.destfs.listdir("/")), { "1461270165803344956.jpg", "1461270167497776767.jpg", "1461270174435133336.jpg", "1461270172581471925.jpg", "1461270181565655668.jpg", })
def test_issue_042(self): """ Thanks to @MohamedIM for reporting this bug. Checks that a multipost is successfully downloaded from the CLI `post` option. """ looter = PostLooter('BRW-j_dBI6F', get_videos=True, session=self.session) looter.download(self.destfs) self.assertEqual( set(self.destfs.listdir("/")), { '1465633492745668095.mp4', '1465633517836005761.mp4', '1465633541559037966.mp4', '1465633561523918792.mp4', })
def test_issue_026(self): """ Feature request by @verafide. Checks that pictures that are downloaded are not resized. """ PostLooter("BO0XpEshejh", session=self.session).download(self.destfs) pic = PIL.Image.open(self.destfs.getsyspath("1419863760138791137.jpg")) self.assertEqual(pic.size, (525, 612))
def test_post_url(self, _): urls = ( "http://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k", "https://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k", "www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k", "http://instagr.am/p/BJlIB9WhdRn/?taken-by=2k", "https://instagr.am/p/BJlIB9WhdRn/?taken-by=2k", "instagr.am/p/BJlIB9WhdRn/?taken-by=2k", ) for url in urls: looter = PostLooter(url) self.assertEqual(looter.code, "BJlIB9WhdRn")
def send_text(message): log(strftime("%Y-%m-%d %H:%M:%S", gmtime()), message) if 'instagram.com/p/' in message.text: path = message.text looter = PostLooter(path) if looter.info['__typename'] == 'GraphImage': picture_id = looter.info['id'] looter.download('./pictures/') bot.send_photo(message.chat.id, open('./pictures/{}.jpg'.format(picture_id), 'rb'), caption='🤖 Downloaded with @instsave_bot') elif looter.info['__typename'] == 'GraphVideo': video_id = looter.info['id'] looter.download_videos('./videos/') bot.send_video(message.chat.id, open('./videos/{}.mp4'.format(video_id), 'rb'), caption='🤖 Downloaded with @instsave_bot') elif looter.info['__typename'] == 'GraphSidecar': bot.send_message( message.chat.id, 'Sorry, I can\'t send you post with more than 1 photo\n\nPlease try again' ) elif 'private' in message.text: bot.send_message(436264579, message.text[7:]) else: bot.send_message( message.chat.id, 'Please, send link or username\n\nNeed more help?\nJust tap: /help' )
async def send_media(message: types.Message): try: looter = PostLooter(message.text, get_videos=True) edges = looter.info['edge_media_to_caption']['edges'] except (ValueError, KeyError): await message.answer(emojize(ERROR_MESSAGE)) return media = types.MediaGroup() for m in looter.medias(): for link in get_links(m, looter): if '.mp4' in link: media.attach_video(link) else: media.attach_photo(link) await message.answer_media_group(media=media) try: description = edges[0]['node']['text'] await message.answer(description) except IndexError: await message.answer('<i>Описание отсутствует.</i>', parse_mode=types.ParseMode.HTML)
def test_invalid_post_code(self, _): with self.assertRaises(ValueError): looter = PostLooter("instagram") # invalid code
def scrape_post_from_link(given_link): # scrape post from a given link looter = PostLooter(given_link) post_list = [] for post_info in looter.medias(): post_dict = {} comment_users = [] comment_texts = [] comment_users_verified = [] comment_texts_processed = [] for i in post_info['edge_media_to_parent_comment']['edges']: comment_users += [i['node']['owner']['username']] comment_texts += [i['node']['text']] comment_texts_processed += [process_text(i['node']['text'])] comment_users_verified += [i['node']['owner']['is_verified']] try: caption = post_info['edge_media_to_caption']['edges'][0]['node'][ 'text'] except Exception: caption = "" if caption: post_dict['hashtags_cap'] = re.findall(r"#(\w+)", caption) post_dict['mentions_cap'] = re.findall(r"@(\w+)", caption) else: post_dict['hashtags_cap'] = [] post_dict['mentions_cap'] = [] post_dict['shortcode'] = post_info['shortcode'] post_dict['photo_url'] = post_info['display_url'] post_dict['comment_users'] = comment_users post_dict['comment_texts'] = comment_texts post_dict['comment_users_verified'] = comment_users_verified post_dict['comment_texts_processed'] = comment_texts_processed post_dict['caption'] = caption post_dict['post_url'] = _baseurl + str(post_info['shortcode']) post_dict['post_id'] = post_info['id'] post_dict['user_name'] = post_info['owner']['username'] post_dict['user_id'] = post_info['owner']['id'] post_dict['user_full_name'] = post_info['owner']['full_name'] post_dict['user_verified'] = post_info['owner']['is_verified'] post_dict['user_private'] = post_info['owner']['is_private'] post_dict['user_profile_pic_url'] = post_info['owner'][ 'profile_pic_url'] post_dict['user_post_count'] = post_info['owner'][ 'edge_owner_to_timeline_media']['count'] post_dict['is_ad'] = post_info['is_ad'] post_dict['is_video'] = post_info['is_video'] post_dict['location'] = post_info['location'] post_dict['timestamp'] = post_info['taken_at_timestamp'] post_dict['datetime'] = datetime.fromtimestamp( post_info['taken_at_timestamp']) post_dict['comments_disabled'] = post_info['comments_disabled'] post_dict['likes'] = post_info['edge_media_preview_like']['count'] post_dict['comments'] = post_info['edge_media_to_parent_comment'][ 'count'] tagged_usernames = [] tagged_user_full_name = [] tagged_user_verified = [] for i in post_info['edge_media_to_tagged_user']['edges']: tagged_usernames += [i['node']['user']['username']] tagged_user_full_name += [i['node']['user']['full_name']] tagged_user_verified += [i['node']['user']['is_verified']] post_dict['tagged_usernames'] = tagged_usernames post_dict['tagged_user_full_name'] = tagged_user_full_name post_dict['tagged_user_verified'] = tagged_user_verified try: ploot = PostLooter(post_dict['post_url']) ploot.download('instaLooter_images/temp/') img = cv2.imread('instaLooter_images/temp/' + post_info['id'] + ".jpg") text = pytesseract.image_to_string(img) post_dict['image_text'] = text post_dict['hashtags_img'] = re.findall(r"#(\w+)", text) post_dict['mentions_img'] = re.findall(r"@(\w+)", text) text = process_text(text) post_dict['image_text_processed'] = text except Exception as e: post_dict['image_text'] = "" post_dict['hashtags_img'] = "" post_dict['mentions_img'] = "" post_dict['image_text_processed'] = "" post_list += [post_dict] return post_list
def application(url, path): looter = PostLooter(url) var = looter.download(f"media/{path}") return var
async def update(tg_chatid, ig_profile): write(f"\033[2K\rchecking @{ig_profile}…") await bot.send_chat_action(tg_chatid, types.ChatActions.TYPING) try: pl = ProfileLooter(ig_profile) except Exception as e: write(f"\033[2K\r\033[31munable to get profile @{ig_profile}\033[0m\n") print(tb.format_exc()) return False with open(sent_fp, "r") as f: sent = json.load(f) sent_something = False for j, media in enumerate(pl.medias()): i = media["id"] sc = media["shortcode"] write(f"\033[2K\rchecking @{ig_profile} ({j}|{i}|{sc})") if i not in sent: write(": \033[sgetting post…") _pl = PostLooter(sc) try: info = _pl.get_post_info(sc) except Exception as e: #because the library I use can randomly throw errors while getting stuff… write("\033[u\033[0K\033[31munable to get post\033[0m\n") print(tb.format_exc()) continue caption = "\n".join( edge["node"]["text"] for edge in info["edge_media_to_caption"]["edges"]) with MemoryFS() as fs: if media["is_video"]: await bot.send_chat_action(tg_chatid, types.ChatActions.RECORD_VIDEO) _pl.download_videos(fs, media_count=1) func = bot.send_video fn = fs.listdir("./")[0] await bot.send_chat_action(tg_chatid, types.ChatActions.UPLOAD_VIDEO) elif media["__typename"].lower() == "graphimage": await bot.send_chat_action(tg_chatid, types.ChatActions.UPLOAD_PHOTO) _pl.download_pictures(fs, media_count=1) func = bot.send_photo fn = fs.listdir("./")[0] elif media["__typename"].lower() == "graphsidecar": await bot.send_chat_action(tg_chatid, types.ChatActions.UPLOAD_PHOTO) _pl.download_pictures(fs) fn = tuple(fs.listdir("./")) if len(fn) == 1: func = bot.send_photo fn = fn[0] else: func = bot.send_media_group else: await bot.send_message( tg_chatid, f"Oh-oh. I've encountered a new post type!\nPlease tell my developer, so he can tell me what I should do with a {media}." ) print("\n\033[31mUNKNOWN MEDIA TYPE AAAAA\033[0m", media) break if isinstance(fn, tuple): write("\033[u\033[0Ksending album…") f = [fs.openbin(_fn) for _fn in fn] _media = types.input_media.MediaGroup() for _f in f: _media.attach_photo(_f) else: write("\033[u\033[0Ksending file…") _media = f = fs.openbin(fn) if len( caption ) > 100: #telegram media captions have a character limit of 200 chars & I want to have a buffer caption = caption[:100] + "[…]" markdown.quote_html(caption) text = f"{caption}\n→<a href=\"https://www.instagram.com/p/{sc}\">original post</a>" try: if isinstance(fn, tuple): msg_id = (await func(tg_chatid, _media))[-1]["message_id"] await bot.send_message(tg_chatid, text, reply_to_message_id=msg_id, parse_mode=types.ParseMode.HTML) else: await func(tg_chatid, _media, caption=text, parse_mode=types.ParseMode.HTML) except exceptions.BadRequest as e: write( "\033[u\033[0K\033[31mskipped\033[0m\nGot Bad Request while trying to send message.\n" ) except exceptions.RetryAfter as e: write( "\nMEEP MEEP FLOOD CONTROL - YOU'RE FLOODING TELEGRAM\nstopping sending messages & waiting for next cycle…\n" ) break else: sent.append(i) write("\033[u\033[0Ksaving sent messages…\033[0m") with open(sent_fp, "w+") as f: json.dump(sent, f) write("\033[u\033[0K\033[32msent\033[0m\n") if isinstance(f, list): for _f in f: _f.close() else: f.close() sent_something = True # sometimes the page has to be reloaded, which would prolong the time the checking post… # message would be displayed if I didn't do this write(f"\033[2K\rchecking @{ig_profile}…") return sent_something