Exemplo n.º 1
0
def scrape_images(profile, num, startdate, enddate, out_dir):
    """
    Function to scrape images from Instagram using Instalooter
    
    Parameters:
    ----------
        profile:    name of the Instagram profile.
        num:        number of images to download.
        startdate:  Most recent date from which you wanna start downloading.
        enddate:    Date to specify the end of timeframe.
    
    Returns:
    --------
        
        output:     The path of the output directory
    
    """
    #output = 'images_'+str(startdate)+ '_' +str(enddate)
    looter = ProfileLooter(profile)
    #if not os.path.exists(output):
    #    os.makedirs(output)
    #output = "./"+output
    looter.download(output, media_count=int(num), timeframe=(startdate, enddate))
    
    return output
Exemplo n.º 2
0
 def test_issue_006(self):
     """
     Checks that instalooter does not iterate forever on a private
     profile.
     """
     with self.assertRaises(RuntimeError):
         looter = ProfileLooter("tldr", session=self.session)
         next(looter.medias())
Exemplo n.º 3
0
    def post(self, target):
        looter = ProfileLooter(target)

        for media in looter.medias():
            post_info = looter.get_post_info(media['shortcode'])
            for comment in post_info['edge_media_to_comment']['edges']:
                user = comment['node']['owner']['username']
                users_comment.add(user)
            return "ok", 201
Exemplo n.º 4
0
def _profile_images(username, destination='.instagram'):
    if not InstaLooter._logged_in():
        InstaLooter._login(username, password)

    looter = ProfileLooter(username)
    n = looter.download(destination=destination, new_only=True)

    path = pathlib.Path(destination)
    return list(path.glob('*'))
Exemplo n.º 5
0
    def test_issue_012(self):
        """
        Feature request by @paramjitrohit.

        Allows downloading pictures and videos only within a timeframe.
        """
        looter = ProfileLooter("slotfaceofficial", session=self.session)
        day = datetime.date(2017, 2, 18)
        medias_in_timeframe = list(looter.medias(timeframe=[day, day]))
        self.assertEqual(len(medias_in_timeframe), 2)
Exemplo n.º 6
0
    def test_issue_041(self):
        """
        Feature request by @liorlior

        Allow downloading only videos.
        """
        looter = ProfileLooter("nintendo", videos_only=True, session=self.session)
        day = datetime.date(2017, 3, 10)
        looter.download(self.destfs, timeframe=[day, day])
        self.assertTrue(self.destfs.isfile("1467639884243493431.mp4"))
Exemplo n.º 7
0
 def test_issue_006(self):
     """
     Checks that instalooter does not iterate forever on a private
     profile.
     """
     with self.assertRaises(RuntimeError):
         username = os.getenv("IG_USERNAME")
         looter = ProfileLooter(username, session=self.session)
         looter.logout()
         next(looter.medias())
Exemplo n.º 8
0
    def test_pr_122(self):
        """
        Feature implemented by @susundberg.

        Set the access time and modification time of a downloaded media
        according to its IG date.
        """

        looter = ProfileLooter('franz_ferdinand',
            template='{code}', session=self.session)
        info = looter.get_post_info('BY77tSfBnRm')

        # Test download_post
        post_looter = PostLooter('BY77tSfBnRm',
            session=self.session, template='{code}')
        post_looter.download(self.destfs)
        stat = self.destfs.getdetails('BY77tSfBnRm.jpg')
        self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp'])

        # Test download_pictures
        pic = next(m for m in looter.medias() if not m['is_video'])
        looter.download_pictures(self.destfs, media_count=1)
        stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode']))
        self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp'])

        # Test download_videos
        vid = next(m for m in looter.medias() if m['is_video'])
        looter.download_videos(self.destfs, media_count=1)
        stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode']))
        self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp'])
Exemplo n.º 9
0
async def instagram_dl(igdl):
    """ To downloading photos from instagram account """
    uname = igdl.pattern_match.group(1)
    input_str = TEMP_DOWNLOAD_DIRECTORY
    if not os.path.exists(input_str):
        os.makedirs(input_str)
    try:
        await igdl.edit(f"`Getting info.....`")
        looter = ProfileLooter(uname)
        looter.download('TEMP_DOWNLOAD_DIRECTORY', media_count=5)

    except ValueError:
        await igdl.edit(
            f"**Account {uname} Not Found.**\nPlease enter correct username.")
        return

    except RuntimeError:
        await igdl.edit(f"**Can't Catch Media.**\nAccount {uname} is Private.")
        return

    await igdl.edit("Processing ...")
    lst_of_files = []
    for r, d, f in os.walk(input_str):
        for file in f:
            lst_of_files.append(os.path.join(r, file))
        for file in d:
            lst_of_files.append(os.path.join(r, file))
    LOGS.info(lst_of_files)
    uploaded = 0
    countf = "{}".format(len(lst_of_files))
    count = int(countf)
    if count == 0:
        await igdl.edit(
            "**No Media Found**\nSorry this account doesn't have any content")
    else:
        await igdl.edit(
            "Found {} files. Uploading will start soon. Please wait!".format(
                len(lst_of_files)))
        for single_file in lst_of_files:
            if os.path.exists(single_file):
                # https://stackoverflow.com/a/678242/4723940
                caption_rts = os.path.basename(single_file)
                c_time = time.time()
                if not caption_rts.lower().endswith(".mp4"):
                    await igdl.client.send_file(
                        igdl.chat_id,
                        single_file,
                        caption=f"[{uname}](https://instagram.com/{uname})",
                        force_document=True,
                        allow_cache=False,
                        progress_callback=lambda d, t: asyncio.get_event_loop(
                        ).create_task(
                            progress(d, t, igdl, c_time, "Uploading...",
                                     single_file)))
                    os.remove(single_file)
Exemplo n.º 10
0
    def test_issue_012(self):
        """Feature request by @paramjitrohit.

        Allows downloading pictures and videos only within a timeframe.
        """
        looter = ProfileLooter("nintendo", session=self.session)
        day = datetime.date(2018, 3, 16)
        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            medias_in_timeframe = list(looter.medias(timeframe=[day, day]))
        self.assertEqual(len(medias_in_timeframe), 2)
Exemplo n.º 11
0
    def test_issue_194(self):
        """Feature request by @raphaelbernardino

        When trying to download from an non-existing user, try to display a
        meaningful message instead of a cryptic error.
        """
        username = "******"
        looter = ProfileLooter(username)
        with self.assertRaises(ValueError) as ctx:
            media = next(looter.medias())
        self.assertEqual(str(ctx.exception), "user not found: '{}'".format(username))
Exemplo n.º 12
0
    def test_issue_041(self):
        """Feature request by @liorlior

        Allow downloading only videos.
        """
        looter = ProfileLooter("nintendo", videos_only=True, session=self.session)
        day = datetime.date(2017, 3, 10)
        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            looter.download(self.destfs, timeframe=[day, day])
        self.assertEqual(self.destfs.listdir("/"), ["1467639884243493431.mp4"])
Exemplo n.º 13
0
    def test_issue_014(self):
        """Feature request by @JFLarsen.

        Allows customizing filenames using a template following Python
        `.format()` minilanguage.
        """
        looter = ProfileLooter("nintendo", template="{username}.{id}", session=self.session)
        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            looter.download(self.destfs, media_count=5)
        for f in self.destfs.scandir("/"):
            self.assertTrue(f.name.startswith('nintendo.'))
Exemplo n.º 14
0
    def test_issue_009(self):
        """
        Thanks to @kurtmaia for reporting this bug.

        Checks that adding metadata to pictures downloaded from a hashtag
        works as well.
        """
        looter = ProfileLooter("fluoxetine", add_metadata=True, session=self.session)
        looter.download(self.destfs, media_count=10)

        for f in self.destfs.listdir("/"):
            exif = piexif.load(self.destfs.getbytes(f))
            self.assertTrue(exif['Exif']) # Date & Caption
            self.assertTrue(exif['0th'])  # Image creator
Exemplo n.º 15
0
    def test_issue_022(self):
        """
        Thanks to @kuchenmitsahne for reporting this bug.

        Checks that using ``{datetime}`` in the template does not put
        a Windows forbidden character in the filename.
        """
        FORBIDDEN = set('<>:"/\|?*')

        looter = ProfileLooter(
            "mysteryjets", template="{datetime}", session=self.session)
        looter.download(self.destfs, media_count=5)
        for f in self.destfs.scandir("/"):
            self.assertFalse(FORBIDDEN.intersection(f.name))
Exemplo n.º 16
0
    def test_issue_022(self):
        """
        Thanks to @kuchenmitsahne for reporting this bug.

        Checks that using ``{datetime}`` in the template does not put
        a Windows forbidden character in the filename.
        """
        FORBIDDEN = set('<>:"/\|?*')
        looter = ProfileLooter("nintendo", template="{datetime}", session=self.session)
        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            looter.download(self.destfs, media_count=5)
        for f in self.destfs.scandir("/"):
            self.assertFalse(FORBIDDEN.intersection(f.name))
Exemplo n.º 17
0
    def test_issue_014(self):
        """
        Feature request by @JFLarsen.

        Allows customizing filenames using a template following Python
        `.format()` minilanguage.
        """

        looter = ProfileLooter(
            "mysteryjets", template="{username}.{id}", session=self.session)
        looter.download(self.destfs, media_count=5)

        for f in self.destfs.scandir("/"):
            self.assertTrue(f.name.startswith('mysteryjets'))
Exemplo n.º 18
0
    def get_media_links():
        InstagramScraper.num_hits += 1
        if InstagramScraper.num_hits % 20 == 1:
            InstagramScraper.num_hits = 0
        else:
            return InstagramScraper.cache

        looter = ProfileLooter('embracingtheenemy')
        InstagramScraper.cache = [{
            'src': media['thumbnail_src'],
            'is_video': media['is_video'],
            'shortcode': media['shortcode']
        } for media in looter.medias()][:3]
        return InstagramScraper.cache
Exemplo n.º 19
0
    def test_issue_066(self):
        """
        Thanks to @douglasrizzo for reporting this bug.

        Check that likescount and commentscount can be used
        in filename templates without causing the program to
        crash.
        """
        looter = ProfileLooter(
            "zuck", get_videos=True, add_metadata=True,
            template='{id}-{likescount}-{commentscount}',
            session=self.session)
        looter.download(self.destfs, media_count=10)
        for image in self.destfs.listdir("/"):
            self.assertRegex(image, '[a-zA-Z0-9]*-[0-9]*-[0-9]*.(jpg|mp4)')
Exemplo n.º 20
0
    def test_issue_084(self):
        """
        Thanks to @raphaelbernardino for reporting this bug.

        Make sure private profiles with few pictures (less than a page worth)
        raise the private warning as expected.
        """

        with warnings.catch_warnings(record=True) as registry:
            warnings.simplefilter('always')
            looter = ProfileLooter("raphaelbernardino", session=self.session)
            list(looter.medias())

        self.assertEqual(
            six.text_type(registry[0].message),
            u"Profile raphaelbernardino is private, retry after logging in."
        )
Exemplo n.º 21
0
    def test_issue_084(self):
        """Thanks to @raphaelbernardino for reporting this bug.

        Make sure private profiles with few pictures (less than a page worth)
        raise the private error as expected.
        """
        looter = ProfileLooter("raphaelbernardino", session=self.session)
        self.assertRaises(RuntimeError, looter.medias)
Exemplo n.º 22
0
    def scrape(self, username: str):
        all_medias = list()

        try:
            print("Username: `{}`".format(username))

            user_dir = self.scrape_dir.joinpath(username)
            user_dir.mkdir(parents=True, exist_ok=True)
            temp_dir = user_dir.joinpath("temp")
            temp_dir.mkdir(parents=True, exist_ok=True)

            print("Output Dir: `{}`".format(user_dir.absolute()))

            medias = ProfileLooter(username).medias()
            total = medias.__length_hint__()
            print("Total Media Count: {}".format(total))
            
            count = 0
            for media in medias:
                count += 1
                print("Scraped Count: {}/{}".format(count, total), end="\r")
                all_medias.append(media)
                temp_filename = "{id}_{shortcode}.json".format_map(media)
                temp_filepath = temp_dir.joinpath(temp_filename)
                with open(temp_filepath, "w", encoding='utf-8') as file:
                    json.dump(media, file, indent=2)

            all_medias_filename = "{}.json".format(username)
            all_medias_filepath = user_dir.joinpath(all_medias_filename)
            with open(all_medias_filepath, "w", encoding="utf-8") as file:
                json.dump(all_medias, file, indent=2)
        
            status = True
            error = ""
        except Exception as err:
            status = False
            error = str(err)
        finally:
            print()
            stdout.flush()
            return (all_medias, status, error)
def scrapeVideos(username = "",
                 password = "",
                 output_folder = "",
                 days = 1):
        
    print("Starting Scraping")

    L = instaloader.Instaloader()

    # Login or load session for loader
    L.login(username, password)  
    profile = instaloader.Profile.from_username(L.context, username)
    following = profile.get_followees()
    print(following)

    for profile in following:
        acc = profile.username
        looter = ProfileLooter(acc, videos_only=True, template="{id}-{username}-{width}-{height}")
        if not looter.logged_in():
            looter.login(username, password)
        print("Scraping From Account: " + acc)

        today = datetime.date.today()
        timeframe = (today, today - dateutil.relativedelta.relativedelta(days=days))
        numDowloaded = looter.download(output_folder, media_count=30, timeframe=timeframe)
        print("Downloaded " + str(numDowloaded) + " videos successfully")
        print("")
Exemplo n.º 24
0
def update(event, context):
    start_time = datetime.now()

    print("Starting soup")

    queries = Queries()

    print("configuring looter")
    # https://github.com/althonos/InstaLooter/issues/173
    ProfileLooter._cachefs = fs.open_fs("osfs:///tmp/")
    looter = ProfileLooter("davesfreshpasta")
    print("configured looter")

    print("finished setup")

    last_date = queries.most_recent_image_date()
    timeframe = (start_time.date(), last_date)
    print(f"last post date: {last_date}. Timeframe is {timeframe}")

    posts = looter.medias(timeframe=timeframe)

    if posts:
        classify_posts(posts)

    repost_soup = os.environ.get("REPOST_SOUP", default=False)
    confidence = os.environ.get("CONFIDENCE", default=0.8)

    # TODO: move posting into post iteration loop
    top_image = queries.top_soup(confidence)

    if (not top_image.posted) or repost_soup:
        messenger = Messenger()
        messenger.post_message_to_channel(top_image)
        queries.commit()  # messenger will mark image as posted

    elapsed_time = datetime.now() - start_time
    print(f"Finished soup. Elapsed time {elapsed_time}")
Exemplo n.º 25
0
    def test_issue_015(self):
        """
        Feature request by @MohamedIM.

        Checks that videos are not downloaded several times if present
        already in the destination directory.
        """
        looter = ProfileLooter("instagram", session=self.session)
        looter.download_videos(self.destfs, media_count=1)

        video_file = next(self.destfs.filterdir("/", ["*.mp4"]))
        mtime = self.destfs.getdetails(video_file.name).accessed
        looter.download_videos(self.destfs, media_count=1)
        self.assertEqual(mtime, self.destfs.getdetails(video_file.name).accessed)
Exemplo n.º 26
0
    def test_issue_015(self):
        """
        Feature request by @MohamedIM.

        Checks that videos are not downloaded several times if present
        already in the destination directory.
        """
        looter = ProfileLooter("nintendo", session=self.session)

        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            looter.download_videos(self.destfs, media_count=1)
            video_file = next(self.destfs.filterdir("/", ["*.mp4"]))
            mtime = self.destfs.getdetails(video_file.name).accessed
            looter.download_videos(self.destfs, media_count=1)
            self.assertEqual(mtime, self.destfs.getdetails(video_file.name).accessed)
Exemplo n.º 27
0
def download(username: str, image_dir: str) -> None:
    """Downloading images from instagram."""
    logging.info("Downloading...")
    looter = ProfileLooter(username, template="insta-{datetime}-{id}")
    looter.download(image_dir)
    logging.info("Done downloading.")
Exemplo n.º 28
0
 def setUp(self):
     self.looter = ProfileLooter(USERNAME, template="test")
     self.destfs = fs.memoryfs.MemoryFS()
Exemplo n.º 29
0
class TestLogin(unittest.TestCase):
    def setUp(self):
        self.looter = ProfileLooter(USERNAME, template="test")
        self.destfs = fs.memoryfs.MemoryFS()

    def tearDown(self):
        self.destfs.close()

    def test_login(self):

        self.assertFalse(self.looter.logged_in())
        self.assertRaises(RuntimeError, self.looter.medias)
        self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE))

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.assertTrue(self.looter.logged_in())
            self.assertTrue(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))
            self.assertTrue(next(self.looter.medias()))
        finally:
            self.looter.logout()
            self.assertFalse(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))

    def test_download(self):

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.looter.download(self.destfs)
            self.assertTrue(self.destfs.exists('test.jpg'))
            self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF')
        finally:
            self.looter.logout()
Exemplo n.º 30
0
 def test_profile(self, profile, **kwargs):
     looter = ProfileLooter(profile, session=self.session, **kwargs)
     looter.download(self.destfs, media_count=self.MEDIA_COUNT)
     self.assertGreaterEqual(len(self.destfs.listdir("/")),
                             self.MEDIA_COUNT)