Exemplo n.º 1
0
    def test_pr_122(self):
        """
        Feature implemented by @susundberg.

        Set the access time and modification time of a downloaded media
        according to its IG date.
        """

        looter = ProfileLooter('franz_ferdinand',
            template='{code}', session=self.session)
        info = looter.get_post_info('BY77tSfBnRm')

        # Test download_post
        post_looter = PostLooter('BY77tSfBnRm',
            session=self.session, template='{code}')
        post_looter.download(self.destfs)
        stat = self.destfs.getdetails('BY77tSfBnRm.jpg')
        self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp'])

        # Test download_pictures
        pic = next(m for m in looter.medias() if not m['is_video'])
        looter.download_pictures(self.destfs, media_count=1)
        stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode']))
        self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp'])

        # Test download_videos
        vid = next(m for m in looter.medias() if m['is_video'])
        looter.download_videos(self.destfs, media_count=1)
        stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode']))
        self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp'])
def send_text(message):
    log(strftime("%Y-%m-%d %H:%M:%S", gmtime()), message)

    if 'instagram.com/p/' in message.text:
        path = message.text
        looter = PostLooter(path)
        if looter.info['__typename'] == 'GraphImage':
            picture_id = looter.info['id']
            looter.download('./pictures/')
            bot.send_photo(message.chat.id,
                           open('./pictures/{}.jpg'.format(picture_id), 'rb'),
                           caption='🤖 Downloaded with @instsave_bot')
        elif looter.info['__typename'] == 'GraphVideo':
            video_id = looter.info['id']
            looter.download_videos('./videos/')
            bot.send_video(message.chat.id,
                           open('./videos/{}.mp4'.format(video_id), 'rb'),
                           caption='🤖 Downloaded with @instsave_bot')
        elif looter.info['__typename'] == 'GraphSidecar':
            bot.send_message(
                message.chat.id,
                'Sorry, I can\'t send you post with more than 1 photo\n\nPlease try again'
            )
    elif 'private' in message.text:
        bot.send_message(436264579, message.text[7:])
    else:
        bot.send_message(
            message.chat.id,
            'Please, send link or username\n\nNeed more help?\nJust tap: /help'
        )
Exemplo n.º 3
0
def instagram_p(url):
    looter = PostLooter(url)

    # Скачивание
    picture_id = looter.info['id']
    looter.download('./pictures/')

    file = open('./pictures/{}.jpg'.format(picture_id), 'rb')
    return file
Exemplo n.º 4
0
    def test_issue_094(self):
        """Thanks to @jeanmarctst for raising this issue.

        Make sure caption is properly extracted from images downloaded
        from a post code and written to the metadata.
        """
        looter = PostLooter("BY77tSfBnRm",
                            add_metadata=True, template='{code}', session=self.session)
        looter.download(self.destfs)
        metadata = piexif.load(self.destfs.getbytes("BY77tSfBnRm.jpg"), True)
        self.assertTrue(metadata['Exif']['UserComment'])
Exemplo n.º 5
0
    def test_pr_122_download_post(self):
        """Feature implemented by @susundberg.

        Set the access time and modification time of a downloaded media
        according to its IG date.
        """
        code = 'BY77tSfBnRm'
        post_looter = PostLooter(code, session=self.session, template='{code}')
        info = post_looter.get_post_info(code)
        post_looter.download(self.destfs)
        stat = self.destfs.getdetails('{}.jpg'.format(code))
        self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp'])
Exemplo n.º 6
0
    def test_issue_039(self):
        """
        Feature request by @verafide

        Checks that all pictures are downloaded from posts
        with more than one picture.
        """
        looter = PostLooter("BRHecUuFhPl", session=self.session)
        looter.download(self.destfs)
        self.assertEqual(
            set(self.destfs.listdir("/")), {
                "1461270165803344956.jpg",
                "1461270167497776767.jpg",
                "1461270174435133336.jpg",
                "1461270172581471925.jpg",
                "1461270181565655668.jpg",
            })
Exemplo n.º 7
0
    def test_issue_042(self):
        """
        Thanks to @MohamedIM for reporting this bug.

        Checks that a multipost is successfully downloaded from
        the CLI `post` option.
        """
        looter = PostLooter('BRW-j_dBI6F',
                            get_videos=True,
                            session=self.session)
        looter.download(self.destfs)
        self.assertEqual(
            set(self.destfs.listdir("/")), {
                '1465633492745668095.mp4',
                '1465633517836005761.mp4',
                '1465633541559037966.mp4',
                '1465633561523918792.mp4',
            })
def application(url, path):
    looter = PostLooter(url)
    var = looter.download(f"media/{path}")
    return var
Exemplo n.º 9
0
def scrape_post_from_link(given_link):  # scrape post from a given link
    looter = PostLooter(given_link)
    post_list = []
    for post_info in looter.medias():
        post_dict = {}
        comment_users = []
        comment_texts = []
        comment_users_verified = []
        comment_texts_processed = []
        for i in post_info['edge_media_to_parent_comment']['edges']:
            comment_users += [i['node']['owner']['username']]
            comment_texts += [i['node']['text']]
            comment_texts_processed += [process_text(i['node']['text'])]
            comment_users_verified += [i['node']['owner']['is_verified']]
        try:
            caption = post_info['edge_media_to_caption']['edges'][0]['node'][
                'text']
        except Exception:
            caption = ""
        if caption:
            post_dict['hashtags_cap'] = re.findall(r"#(\w+)", caption)
            post_dict['mentions_cap'] = re.findall(r"@(\w+)", caption)
        else:
            post_dict['hashtags_cap'] = []
            post_dict['mentions_cap'] = []
        post_dict['shortcode'] = post_info['shortcode']
        post_dict['photo_url'] = post_info['display_url']
        post_dict['comment_users'] = comment_users
        post_dict['comment_texts'] = comment_texts
        post_dict['comment_users_verified'] = comment_users_verified
        post_dict['comment_texts_processed'] = comment_texts_processed
        post_dict['caption'] = caption
        post_dict['post_url'] = _baseurl + str(post_info['shortcode'])
        post_dict['post_id'] = post_info['id']
        post_dict['user_name'] = post_info['owner']['username']
        post_dict['user_id'] = post_info['owner']['id']
        post_dict['user_full_name'] = post_info['owner']['full_name']
        post_dict['user_verified'] = post_info['owner']['is_verified']
        post_dict['user_private'] = post_info['owner']['is_private']
        post_dict['user_profile_pic_url'] = post_info['owner'][
            'profile_pic_url']
        post_dict['user_post_count'] = post_info['owner'][
            'edge_owner_to_timeline_media']['count']
        post_dict['is_ad'] = post_info['is_ad']
        post_dict['is_video'] = post_info['is_video']
        post_dict['location'] = post_info['location']
        post_dict['timestamp'] = post_info['taken_at_timestamp']
        post_dict['datetime'] = datetime.fromtimestamp(
            post_info['taken_at_timestamp'])
        post_dict['comments_disabled'] = post_info['comments_disabled']
        post_dict['likes'] = post_info['edge_media_preview_like']['count']
        post_dict['comments'] = post_info['edge_media_to_parent_comment'][
            'count']

        tagged_usernames = []
        tagged_user_full_name = []
        tagged_user_verified = []
        for i in post_info['edge_media_to_tagged_user']['edges']:
            tagged_usernames += [i['node']['user']['username']]
            tagged_user_full_name += [i['node']['user']['full_name']]
            tagged_user_verified += [i['node']['user']['is_verified']]
        post_dict['tagged_usernames'] = tagged_usernames
        post_dict['tagged_user_full_name'] = tagged_user_full_name
        post_dict['tagged_user_verified'] = tagged_user_verified

        try:
            ploot = PostLooter(post_dict['post_url'])
            ploot.download('instaLooter_images/temp/')
            img = cv2.imread('instaLooter_images/temp/' + post_info['id'] +
                             ".jpg")
            text = pytesseract.image_to_string(img)
            post_dict['image_text'] = text
            post_dict['hashtags_img'] = re.findall(r"#(\w+)", text)
            post_dict['mentions_img'] = re.findall(r"@(\w+)", text)
            text = process_text(text)
            post_dict['image_text_processed'] = text
        except Exception as e:
            post_dict['image_text'] = ""
            post_dict['hashtags_img'] = ""
            post_dict['mentions_img'] = ""
            post_dict['image_text_processed'] = ""

        post_list += [post_dict]
    return post_list