def scrape_images(profile, num, startdate, enddate, out_dir): """ Function to scrape images from Instagram using Instalooter Parameters: ---------- profile: name of the Instagram profile. num: number of images to download. startdate: Most recent date from which you wanna start downloading. enddate: Date to specify the end of timeframe. Returns: -------- output: The path of the output directory """ #output = 'images_'+str(startdate)+ '_' +str(enddate) looter = ProfileLooter(profile) #if not os.path.exists(output): # os.makedirs(output) #output = "./"+output looter.download(output, media_count=int(num), timeframe=(startdate, enddate)) return output
def test_issue_006(self): """ Checks that instalooter does not iterate forever on a private profile. """ with self.assertRaises(RuntimeError): looter = ProfileLooter("tldr", session=self.session) next(looter.medias())
def post(self, target): looter = ProfileLooter(target) for media in looter.medias(): post_info = looter.get_post_info(media['shortcode']) for comment in post_info['edge_media_to_comment']['edges']: user = comment['node']['owner']['username'] users_comment.add(user) return "ok", 201
def _profile_images(username, destination='.instagram'): if not InstaLooter._logged_in(): InstaLooter._login(username, password) looter = ProfileLooter(username) n = looter.download(destination=destination, new_only=True) path = pathlib.Path(destination) return list(path.glob('*'))
def test_issue_012(self): """ Feature request by @paramjitrohit. Allows downloading pictures and videos only within a timeframe. """ looter = ProfileLooter("slotfaceofficial", session=self.session) day = datetime.date(2017, 2, 18) medias_in_timeframe = list(looter.medias(timeframe=[day, day])) self.assertEqual(len(medias_in_timeframe), 2)
def test_issue_041(self): """ Feature request by @liorlior Allow downloading only videos. """ looter = ProfileLooter("nintendo", videos_only=True, session=self.session) day = datetime.date(2017, 3, 10) looter.download(self.destfs, timeframe=[day, day]) self.assertTrue(self.destfs.isfile("1467639884243493431.mp4"))
def test_issue_006(self): """ Checks that instalooter does not iterate forever on a private profile. """ with self.assertRaises(RuntimeError): username = os.getenv("IG_USERNAME") looter = ProfileLooter(username, session=self.session) looter.logout() next(looter.medias())
def test_pr_122(self): """ Feature implemented by @susundberg. Set the access time and modification time of a downloaded media according to its IG date. """ looter = ProfileLooter('franz_ferdinand', template='{code}', session=self.session) info = looter.get_post_info('BY77tSfBnRm') # Test download_post post_looter = PostLooter('BY77tSfBnRm', session=self.session, template='{code}') post_looter.download(self.destfs) stat = self.destfs.getdetails('BY77tSfBnRm.jpg') self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp']) # Test download_pictures pic = next(m for m in looter.medias() if not m['is_video']) looter.download_pictures(self.destfs, media_count=1) stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode'])) self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp']) # Test download_videos vid = next(m for m in looter.medias() if m['is_video']) looter.download_videos(self.destfs, media_count=1) stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode'])) self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp'])
async def instagram_dl(igdl): """ To downloading photos from instagram account """ uname = igdl.pattern_match.group(1) input_str = TEMP_DOWNLOAD_DIRECTORY if not os.path.exists(input_str): os.makedirs(input_str) try: await igdl.edit(f"`Getting info.....`") looter = ProfileLooter(uname) looter.download('TEMP_DOWNLOAD_DIRECTORY', media_count=5) except ValueError: await igdl.edit( f"**Account {uname} Not Found.**\nPlease enter correct username.") return except RuntimeError: await igdl.edit(f"**Can't Catch Media.**\nAccount {uname} is Private.") return await igdl.edit("Processing ...") lst_of_files = [] for r, d, f in os.walk(input_str): for file in f: lst_of_files.append(os.path.join(r, file)) for file in d: lst_of_files.append(os.path.join(r, file)) LOGS.info(lst_of_files) uploaded = 0 countf = "{}".format(len(lst_of_files)) count = int(countf) if count == 0: await igdl.edit( "**No Media Found**\nSorry this account doesn't have any content") else: await igdl.edit( "Found {} files. Uploading will start soon. Please wait!".format( len(lst_of_files))) for single_file in lst_of_files: if os.path.exists(single_file): # https://stackoverflow.com/a/678242/4723940 caption_rts = os.path.basename(single_file) c_time = time.time() if not caption_rts.lower().endswith(".mp4"): await igdl.client.send_file( igdl.chat_id, single_file, caption=f"[{uname}](https://instagram.com/{uname})", force_document=True, allow_cache=False, progress_callback=lambda d, t: asyncio.get_event_loop( ).create_task( progress(d, t, igdl, c_time, "Uploading...", single_file))) os.remove(single_file)
def test_issue_012(self): """Feature request by @paramjitrohit. Allows downloading pictures and videos only within a timeframe. """ looter = ProfileLooter("nintendo", session=self.session) day = datetime.date(2018, 3, 16) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) medias_in_timeframe = list(looter.medias(timeframe=[day, day])) self.assertEqual(len(medias_in_timeframe), 2)
def test_issue_194(self): """Feature request by @raphaelbernardino When trying to download from an non-existing user, try to display a meaningful message instead of a cryptic error. """ username = "******" looter = ProfileLooter(username) with self.assertRaises(ValueError) as ctx: media = next(looter.medias()) self.assertEqual(str(ctx.exception), "user not found: '{}'".format(username))
def test_issue_041(self): """Feature request by @liorlior Allow downloading only videos. """ looter = ProfileLooter("nintendo", videos_only=True, session=self.session) day = datetime.date(2017, 3, 10) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) looter.download(self.destfs, timeframe=[day, day]) self.assertEqual(self.destfs.listdir("/"), ["1467639884243493431.mp4"])
def test_issue_014(self): """Feature request by @JFLarsen. Allows customizing filenames using a template following Python `.format()` minilanguage. """ looter = ProfileLooter("nintendo", template="{username}.{id}", session=self.session) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) looter.download(self.destfs, media_count=5) for f in self.destfs.scandir("/"): self.assertTrue(f.name.startswith('nintendo.'))
def test_issue_009(self): """ Thanks to @kurtmaia for reporting this bug. Checks that adding metadata to pictures downloaded from a hashtag works as well. """ looter = ProfileLooter("fluoxetine", add_metadata=True, session=self.session) looter.download(self.destfs, media_count=10) for f in self.destfs.listdir("/"): exif = piexif.load(self.destfs.getbytes(f)) self.assertTrue(exif['Exif']) # Date & Caption self.assertTrue(exif['0th']) # Image creator
def test_issue_022(self): """ Thanks to @kuchenmitsahne for reporting this bug. Checks that using ``{datetime}`` in the template does not put a Windows forbidden character in the filename. """ FORBIDDEN = set('<>:"/\|?*') looter = ProfileLooter( "mysteryjets", template="{datetime}", session=self.session) looter.download(self.destfs, media_count=5) for f in self.destfs.scandir("/"): self.assertFalse(FORBIDDEN.intersection(f.name))
def test_issue_022(self): """ Thanks to @kuchenmitsahne for reporting this bug. Checks that using ``{datetime}`` in the template does not put a Windows forbidden character in the filename. """ FORBIDDEN = set('<>:"/\|?*') looter = ProfileLooter("nintendo", template="{datetime}", session=self.session) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) looter.download(self.destfs, media_count=5) for f in self.destfs.scandir("/"): self.assertFalse(FORBIDDEN.intersection(f.name))
def test_issue_014(self): """ Feature request by @JFLarsen. Allows customizing filenames using a template following Python `.format()` minilanguage. """ looter = ProfileLooter( "mysteryjets", template="{username}.{id}", session=self.session) looter.download(self.destfs, media_count=5) for f in self.destfs.scandir("/"): self.assertTrue(f.name.startswith('mysteryjets'))
def get_media_links(): InstagramScraper.num_hits += 1 if InstagramScraper.num_hits % 20 == 1: InstagramScraper.num_hits = 0 else: return InstagramScraper.cache looter = ProfileLooter('embracingtheenemy') InstagramScraper.cache = [{ 'src': media['thumbnail_src'], 'is_video': media['is_video'], 'shortcode': media['shortcode'] } for media in looter.medias()][:3] return InstagramScraper.cache
def test_issue_066(self): """ Thanks to @douglasrizzo for reporting this bug. Check that likescount and commentscount can be used in filename templates without causing the program to crash. """ looter = ProfileLooter( "zuck", get_videos=True, add_metadata=True, template='{id}-{likescount}-{commentscount}', session=self.session) looter.download(self.destfs, media_count=10) for image in self.destfs.listdir("/"): self.assertRegex(image, '[a-zA-Z0-9]*-[0-9]*-[0-9]*.(jpg|mp4)')
def test_issue_084(self): """ Thanks to @raphaelbernardino for reporting this bug. Make sure private profiles with few pictures (less than a page worth) raise the private warning as expected. """ with warnings.catch_warnings(record=True) as registry: warnings.simplefilter('always') looter = ProfileLooter("raphaelbernardino", session=self.session) list(looter.medias()) self.assertEqual( six.text_type(registry[0].message), u"Profile raphaelbernardino is private, retry after logging in." )
def test_issue_084(self): """Thanks to @raphaelbernardino for reporting this bug. Make sure private profiles with few pictures (less than a page worth) raise the private error as expected. """ looter = ProfileLooter("raphaelbernardino", session=self.session) self.assertRaises(RuntimeError, looter.medias)
def scrape(self, username: str): all_medias = list() try: print("Username: `{}`".format(username)) user_dir = self.scrape_dir.joinpath(username) user_dir.mkdir(parents=True, exist_ok=True) temp_dir = user_dir.joinpath("temp") temp_dir.mkdir(parents=True, exist_ok=True) print("Output Dir: `{}`".format(user_dir.absolute())) medias = ProfileLooter(username).medias() total = medias.__length_hint__() print("Total Media Count: {}".format(total)) count = 0 for media in medias: count += 1 print("Scraped Count: {}/{}".format(count, total), end="\r") all_medias.append(media) temp_filename = "{id}_{shortcode}.json".format_map(media) temp_filepath = temp_dir.joinpath(temp_filename) with open(temp_filepath, "w", encoding='utf-8') as file: json.dump(media, file, indent=2) all_medias_filename = "{}.json".format(username) all_medias_filepath = user_dir.joinpath(all_medias_filename) with open(all_medias_filepath, "w", encoding="utf-8") as file: json.dump(all_medias, file, indent=2) status = True error = "" except Exception as err: status = False error = str(err) finally: print() stdout.flush() return (all_medias, status, error)
def scrapeVideos(username = "", password = "", output_folder = "", days = 1): print("Starting Scraping") L = instaloader.Instaloader() # Login or load session for loader L.login(username, password) profile = instaloader.Profile.from_username(L.context, username) following = profile.get_followees() print(following) for profile in following: acc = profile.username looter = ProfileLooter(acc, videos_only=True, template="{id}-{username}-{width}-{height}") if not looter.logged_in(): looter.login(username, password) print("Scraping From Account: " + acc) today = datetime.date.today() timeframe = (today, today - dateutil.relativedelta.relativedelta(days=days)) numDowloaded = looter.download(output_folder, media_count=30, timeframe=timeframe) print("Downloaded " + str(numDowloaded) + " videos successfully") print("")
def update(event, context): start_time = datetime.now() print("Starting soup") queries = Queries() print("configuring looter") # https://github.com/althonos/InstaLooter/issues/173 ProfileLooter._cachefs = fs.open_fs("osfs:///tmp/") looter = ProfileLooter("davesfreshpasta") print("configured looter") print("finished setup") last_date = queries.most_recent_image_date() timeframe = (start_time.date(), last_date) print(f"last post date: {last_date}. Timeframe is {timeframe}") posts = looter.medias(timeframe=timeframe) if posts: classify_posts(posts) repost_soup = os.environ.get("REPOST_SOUP", default=False) confidence = os.environ.get("CONFIDENCE", default=0.8) # TODO: move posting into post iteration loop top_image = queries.top_soup(confidence) if (not top_image.posted) or repost_soup: messenger = Messenger() messenger.post_message_to_channel(top_image) queries.commit() # messenger will mark image as posted elapsed_time = datetime.now() - start_time print(f"Finished soup. Elapsed time {elapsed_time}")
def test_issue_015(self): """ Feature request by @MohamedIM. Checks that videos are not downloaded several times if present already in the destination directory. """ looter = ProfileLooter("instagram", session=self.session) looter.download_videos(self.destfs, media_count=1) video_file = next(self.destfs.filterdir("/", ["*.mp4"])) mtime = self.destfs.getdetails(video_file.name).accessed looter.download_videos(self.destfs, media_count=1) self.assertEqual(mtime, self.destfs.getdetails(video_file.name).accessed)
def test_issue_015(self): """ Feature request by @MohamedIM. Checks that videos are not downloaded several times if present already in the destination directory. """ looter = ProfileLooter("nintendo", session=self.session) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) looter.download_videos(self.destfs, media_count=1) video_file = next(self.destfs.filterdir("/", ["*.mp4"])) mtime = self.destfs.getdetails(video_file.name).accessed looter.download_videos(self.destfs, media_count=1) self.assertEqual(mtime, self.destfs.getdetails(video_file.name).accessed)
def download(username: str, image_dir: str) -> None: """Downloading images from instagram.""" logging.info("Downloading...") looter = ProfileLooter(username, template="insta-{datetime}-{id}") looter.download(image_dir) logging.info("Done downloading.")
def setUp(self): self.looter = ProfileLooter(USERNAME, template="test") self.destfs = fs.memoryfs.MemoryFS()
class TestLogin(unittest.TestCase): def setUp(self): self.looter = ProfileLooter(USERNAME, template="test") self.destfs = fs.memoryfs.MemoryFS() def tearDown(self): self.destfs.close() def test_login(self): self.assertFalse(self.looter.logged_in()) self.assertRaises(RuntimeError, self.looter.medias) self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE)) try: self.looter.login(USERNAME, PASSWORD) self.assertTrue(self.looter.logged_in()) self.assertTrue( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) self.assertTrue(next(self.looter.medias())) finally: self.looter.logout() self.assertFalse( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) def test_download(self): try: self.looter.login(USERNAME, PASSWORD) self.looter.download(self.destfs) self.assertTrue(self.destfs.exists('test.jpg')) self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF') finally: self.looter.logout()
def test_profile(self, profile, **kwargs): looter = ProfileLooter(profile, session=self.session, **kwargs) looter.download(self.destfs, media_count=self.MEDIA_COUNT) self.assertGreaterEqual(len(self.destfs.listdir("/")), self.MEDIA_COUNT)