def scrape_images(profile, num, startdate, enddate, out_dir): """ Function to scrape images from Instagram using Instalooter Parameters: ---------- profile: name of the Instagram profile. num: number of images to download. startdate: Most recent date from which you wanna start downloading. enddate: Date to specify the end of timeframe. Returns: -------- output: The path of the output directory """ #output = 'images_'+str(startdate)+ '_' +str(enddate) looter = ProfileLooter(profile) #if not os.path.exists(output): # os.makedirs(output) #output = "./"+output looter.download(output, media_count=int(num), timeframe=(startdate, enddate)) return output
class TestLogin(unittest.TestCase): def setUp(self): self.looter = ProfileLooter(USERNAME, template="test") self.destfs = fs.memoryfs.MemoryFS() def tearDown(self): self.destfs.close() def test_login(self): self.assertFalse(self.looter.logged_in()) self.assertRaises(RuntimeError, self.looter.medias) self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE)) try: self.looter.login(USERNAME, PASSWORD) self.assertTrue(self.looter.logged_in()) self.assertTrue( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) self.assertTrue(next(self.looter.medias())) finally: self.looter.logout() self.assertFalse( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) def test_download(self): try: self.looter.login(USERNAME, PASSWORD) self.looter.download(self.destfs) self.assertTrue(self.destfs.exists('test.jpg')) self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF') finally: self.looter.logout()
def test_issue_041(self): """ Feature request by @liorlior Allow downloading only videos. """ looter = ProfileLooter("nintendo", videos_only=True, session=self.session) day = datetime.date(2017, 3, 10) looter.download(self.destfs, timeframe=[day, day]) self.assertTrue(self.destfs.isfile("1467639884243493431.mp4"))
async def instagram_dl(igdl): """ To downloading photos from instagram account """ uname = igdl.pattern_match.group(1) input_str = TEMP_DOWNLOAD_DIRECTORY if not os.path.exists(input_str): os.makedirs(input_str) try: await igdl.edit(f"`Getting info.....`") looter = ProfileLooter(uname) looter.download('TEMP_DOWNLOAD_DIRECTORY', media_count=5) except ValueError: await igdl.edit( f"**Account {uname} Not Found.**\nPlease enter correct username.") return except RuntimeError: await igdl.edit(f"**Can't Catch Media.**\nAccount {uname} is Private.") return await igdl.edit("Processing ...") lst_of_files = [] for r, d, f in os.walk(input_str): for file in f: lst_of_files.append(os.path.join(r, file)) for file in d: lst_of_files.append(os.path.join(r, file)) LOGS.info(lst_of_files) uploaded = 0 countf = "{}".format(len(lst_of_files)) count = int(countf) if count == 0: await igdl.edit( "**No Media Found**\nSorry this account doesn't have any content") else: await igdl.edit( "Found {} files. Uploading will start soon. Please wait!".format( len(lst_of_files))) for single_file in lst_of_files: if os.path.exists(single_file): # https://stackoverflow.com/a/678242/4723940 caption_rts = os.path.basename(single_file) c_time = time.time() if not caption_rts.lower().endswith(".mp4"): await igdl.client.send_file( igdl.chat_id, single_file, caption=f"[{uname}](https://instagram.com/{uname})", force_document=True, allow_cache=False, progress_callback=lambda d, t: asyncio.get_event_loop( ).create_task( progress(d, t, igdl, c_time, "Uploading...", single_file))) os.remove(single_file)
def test_issue_041(self): """Feature request by @liorlior Allow downloading only videos. """ looter = ProfileLooter("nintendo", videos_only=True, session=self.session) day = datetime.date(2017, 3, 10) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) looter.download(self.destfs, timeframe=[day, day]) self.assertEqual(self.destfs.listdir("/"), ["1467639884243493431.mp4"])
def test_issue_014(self): """Feature request by @JFLarsen. Allows customizing filenames using a template following Python `.format()` minilanguage. """ looter = ProfileLooter("nintendo", template="{username}.{id}", session=self.session) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) looter.download(self.destfs, media_count=5) for f in self.destfs.scandir("/"): self.assertTrue(f.name.startswith('nintendo.'))
def test_issue_022(self): """ Thanks to @kuchenmitsahne for reporting this bug. Checks that using ``{datetime}`` in the template does not put a Windows forbidden character in the filename. """ FORBIDDEN = set('<>:"/\|?*') looter = ProfileLooter( "mysteryjets", template="{datetime}", session=self.session) looter.download(self.destfs, media_count=5) for f in self.destfs.scandir("/"): self.assertFalse(FORBIDDEN.intersection(f.name))
def test_issue_022(self): """ Thanks to @kuchenmitsahne for reporting this bug. Checks that using ``{datetime}`` in the template does not put a Windows forbidden character in the filename. """ FORBIDDEN = set('<>:"/\|?*') looter = ProfileLooter("nintendo", template="{datetime}", session=self.session) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) looter.download(self.destfs, media_count=5) for f in self.destfs.scandir("/"): self.assertFalse(FORBIDDEN.intersection(f.name))
def test_issue_014(self): """ Feature request by @JFLarsen. Allows customizing filenames using a template following Python `.format()` minilanguage. """ looter = ProfileLooter( "mysteryjets", template="{username}.{id}", session=self.session) looter.download(self.destfs, media_count=5) for f in self.destfs.scandir("/"): self.assertTrue(f.name.startswith('mysteryjets'))
def test_issue_009(self): """ Thanks to @kurtmaia for reporting this bug. Checks that adding metadata to pictures downloaded from a hashtag works as well. """ looter = ProfileLooter("fluoxetine", add_metadata=True, session=self.session) looter.download(self.destfs, media_count=10) for f in self.destfs.listdir("/"): exif = piexif.load(self.destfs.getbytes(f)) self.assertTrue(exif['Exif']) # Date & Caption self.assertTrue(exif['0th']) # Image creator
def test_issue_066(self): """ Thanks to @douglasrizzo for reporting this bug. Check that likescount and commentscount can be used in filename templates without causing the program to crash. """ looter = ProfileLooter( "zuck", get_videos=True, add_metadata=True, template='{id}-{likescount}-{commentscount}', session=self.session) looter.download(self.destfs, media_count=10) for image in self.destfs.listdir("/"): self.assertRegex(image, '[a-zA-Z0-9]*-[0-9]*-[0-9]*.(jpg|mp4)')
def scrapeVideos(username = "", password = "", output_folder = "", days = 1): print("Starting Scraping") L = instaloader.Instaloader() # Login or load session for loader L.login(username, password) profile = instaloader.Profile.from_username(L.context, username) following = profile.get_followees() print(following) for profile in following: acc = profile.username looter = ProfileLooter(acc, videos_only=True, template="{id}-{username}-{width}-{height}") if not looter.logged_in(): looter.login(username, password) print("Scraping From Account: " + acc) today = datetime.date.today() timeframe = (today, today - dateutil.relativedelta.relativedelta(days=days)) numDowloaded = looter.download(output_folder, media_count=30, timeframe=timeframe) print("Downloaded " + str(numDowloaded) + " videos successfully") print("")
def _profile_images(username, destination='.instagram'): if not InstaLooter._logged_in(): InstaLooter._login(username, password) looter = ProfileLooter(username) n = looter.download(destination=destination, new_only=True) path = pathlib.Path(destination) return list(path.glob('*'))
class TestLogin(unittest.TestCase): @classmethod def setUpClass(cls): cls.session = requests.Session() InstaLooter._user_agent = cls.session.headers["User-Agent"] @classmethod def tearDownClass(cls): cls.session.close() del InstaLooter._user_agent def setUp(self): self.looter = ProfileLooter(USERNAME, template="test") self.destfs = fs.memoryfs.MemoryFS() def tearDown(self): self.destfs.close() def test_login(self): self.assertFalse(self.looter.logged_in()) self.assertRaises(RuntimeError, self.looter.medias) self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE)) try: self.looter.login(USERNAME, PASSWORD) self.assertTrue(self.looter.logged_in()) self.assertTrue( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) self.assertTrue(next(self.looter.medias())) finally: self.looter.logout() self.assertFalse( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) def test_download(self): try: self.looter.login(USERNAME, PASSWORD) self.looter.download(self.destfs) self.assertTrue(self.destfs.exists('test.jpg')) self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF') finally: self.looter.logout()
def post(self, target, number_media): looter = ProfileLooter(target) looter.download('Pictures/' + target, media_count=int(number_media)) return "ok", 201
def download(username: str, image_dir: str) -> None: """Downloading images from instagram.""" logging.info("Downloading...") looter = ProfileLooter(username, template="insta-{datetime}-{id}") looter.download(image_dir) logging.info("Done downloading.")
from instalooter.looters import ProfileLooter import datetime import dateutil.relativedelta # instalooter_test downloads videos posted by daquan in the last month # Instanciate looter = ProfileLooter("daquan", videos_only=True, template="{id}-{username}-{width}-{height}") looter.login("", "") today = datetime.date.today() thismonth = (today, today - dateutil.relativedelta.relativedelta(days=28)) looter.download('./Memes_December_4', media_count=50, timeframe=thismonth)
def test_profile(self, profile, **kwargs): looter = ProfileLooter(profile, session=self.session, **kwargs) looter.download(self.destfs, media_count=self.MEDIA_COUNT) self.assertGreaterEqual(len(self.destfs.listdir("/")), self.MEDIA_COUNT)
from instalooter.looters import ProfileLooter instagram_account_name = "eleanor3069" # Grab newest instagram posts looter = ProfileLooter(instagram_account_name) looter.download('./subjectimg/', media_count=5) print("Finished downloading photos")
from instalooter.looters import ProfileLooter from os import listdir import pandas as pd instagram_account_name = "iampaintingrobot" # Grab newest instagram post data photorecord_dir = './instabotimagerecords/' photorecord_csv_dir = photorecord_dir + 'procdimagelist.csv' total_photo_list = listdir(photorecord_dir) looter = ProfileLooter(instagram_account_name, dump_only=True) looter.download(photorecord_dir, media_count=1) print("Finished downloading photo data") csvdf = pd.read_csv(photorecord_csv_dir) non_matching_file_list = [ file_name for file_name in total_photo_list if not any(file_name in proc_name for proc_name in csvdf['name']) ] print("Non matching files: ") print(non_matching_file_list) if len(non_matching_file_list) == 1: # Move the file if necessary
def checkInsta(domain, foldername): lt = ProfileLooter(domain) lt.download(foldername)