Пример #1
0
def ig_login(currentUser):

    device_id = None
    try:
        settings_file = os.path.join(
            current_app.root_path, 'cookies', currentUser['username'])
        if not os.path.isfile(settings_file):
            # settings file does not exist

            # login new
            api = Client(
                currentUser['ig-username'], currentUser['ig-password'],
                on_login=lambda x: onlogin_callback(x, settings_file))
        else:
            with open(settings_file) as file_data:
                cached_settings = json.load(file_data, object_hook=from_json)

            device_id = cached_settings.get('device_id')
            # reuse auth settings
            api = Client(
                currentUser['ig-username'], currentUser['ig-password'],
                settings=cached_settings)

    except (ClientCookieExpiredError, ClientLoginRequiredError) as e:
        print(
            'ClientCookieExpiredError/ClientLoginRequiredError: {0!s}'.format(e))

        # Login expired
        # Do relogin but use default ua, keys and such
        api = Client(
            currentUser['ig-username'], currentUser['ig-password'],
            device_id=device_id,
            on_login=lambda x: onlogin_callback(x, settings_file))

    except ClientLoginError as e:
        print('ClientLoginError {0!s}'.format(e))
        exit(9)
    except ClientError as e:
        print('ClientError {0!s} (Code: {1:d}, Response: {2!s})'.format(
            e.msg, e.code, e.error_response))
        exit(9)
    except Exception as e:
        print('Unexpected Exception: {0!s}'.format(e))
        exit(99)

    # Call the api
    lst = []
    posts = api.feed_timeline()
    items = [item for item in posts.get('feed_items', [])
             if item.get('media_or_ad')]
    tz = pytz.timezone('America/New_York')
    for item in items:
        ClientCompatPatch.media(item['media_or_ad'])
        ig_post = {'Platform': 'Instagram',
                   'Date': convert_time(tz, item['media_or_ad']['taken_at']),
                   'Link': 'https://www.instagram.com/p/' + str(item['media_or_ad']['code'])}
        ig_post_copy = ig_post.copy()
        lst.append(ig_post_copy)
    return lst
Пример #2
0
def login(username, password):
    try:
        api = Client(username, password)
        results = api.feed_timeline()
        items = results.get('items', [])
        for item in items:
            print(item)
            media = ClientCompatPatch.media(item)
            print(media['code'])
    except:
        messagebox.showinfo(
            "Hata",
            "Instagram girişi hatalı lütfen kullanıcı adı şifrenizi kontrol ediniz"
        )
Пример #3
0
	def instagram_login():
		try:
			passwd=open('data','r').read()
		except IOError as err:
			return err

		try:
			delay=2
			a=Client(username,passwd)
			r=a.feed_timeline()
			loads('Finding Password ...')
			sleep(delay * 7)
		except:
			loads('Finding Password ...')
			sleep(delay*7)
			print 'Password not found! Try again'
		else:
            
			print '\nlogin as',username,'successfully'
			print 'Password =>',passwd
Пример #4
0
    def scrape(self):
        api = Client(self.username, self.password)
        results = api.feed_timeline()

        comments_vector = []
        #print(results)

        items = [
            item for item in results.get('feed_items', [])
            if item.get('media_or_ad')
        ]
        for item in items:
            # Manually patch the entity to match the public api as closely as possible, optional
            # To automatically patch entities, initialise the Client with auto_patch=True
            ClientCompatPatch.media(item['media_or_ad'])

            if ('ad_metadata' not in item['media_or_ad'].keys()):

                # print(item['media_or_ad']['code'])
                # print("#####################")

                # print(item['media_or_ad']['caption']['text'])

                media_id = item['media_or_ad']['caption']['media_id']

                comments = api.media_comments(media_id)

                # print("#####################")
                # print(comments)
                for comment in comments['comments']:
                    comment_vector = []
                    comment_vector.append(
                        item['media_or_ad']['caption']['text'])
                    comment_vector.append(comment['text'])
                    # print(comment['text'])
                    comments_vector.append(comment_vector)

        return comments_vector
Пример #5
0
api = Client(user_name, password)

# user_feed_info = api.user_feed('329452045', count=1)
# print(user_feed_info)
# for post in user_feed_info:
#     print("post",post)
# print(f"{post['link']}, {post['user']['username']}")

see = api.autocomplete_user_list()

print(see)
# following = api.user_following('123456','1')
# for user in following:
#     print(user['username'])

results = api.feed_timeline()
items = [
    item for item in results.get('feed_items', []) if item.get('media_or_ad')
]
for item in items:
    # Manually patch the entity to match the public api as closely as possible, optional
    # To automatically patch entities, initialise the Client with auto_patch=True
    ClientCompatPatch.media(item['media_or_ad'])
    print(item['media_or_ad']['code'])

# from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError
#
# # Without any authentication
# web_api = Client(auto_patch=True, drop_incompat_keys=False)
# user_feed_info = web_api.user_feed('329452045', count=10)
# for post in user_feed_info:
    except ClientError as e:
        print('ClientError {0!s} (Code: {1:d}, Response: {2!s})'.format(
            e.msg, e.code, e.error_response))
        exit(9)
    except Exception as e:
        print('Unexpected Exception: {0!s}'.format(e))
        exit(99)

    cookie_expiry = api.cookie_jar.auth_expires
    print('Cookie Expiry: {0!s}'.format(
        datetime.datetime.fromtimestamp(cookie_expiry).strftime(
            '%Y-%m-%dT%H:%M:%SZ')))

    rank_token = Client.generate_uuid()
    tag_results = []
    resultsForTimeline = api.feed_timeline()
    resultsForLikedFeeds = api.feed_liked()
    apple = json.dumps([resultsForTimeline], indent=100)
    mango = json.dumps([resultsForLikedFeeds], indent=100)
    y = json.loads(apple)
    z = json.loads(mango)

    stringList = []

    for s in y:
        for e in s['feed_items']:
            try:
                stringList.append(e['media_or_ad']['caption']['text'])
            except:
                print('')
# next_max_id = results['next_max_id']
# items = [item for item in results.get('feed_items', []) if item.get('media_or_ad')]
# for item in items:
#     # Manually patch the entity to match the public api as closely as possible, optional
#     # To automatically patch entities, initialise the Client with auto_patch=True
#     posts_counter += 1
#     ClientCompatPatch.media(item['media_or_ad'])
#     if not item['media_or_ad']['user']['friendship_status']['following']:
#         ads_counter += 1
#     elif item['media_or_ad']['user']['is_verified']:
#         verified_counter += 1
#     print(item['media_or_ad']['user']['username'])
next_max_id = ''
for i in range(1, 10):
    # results = api.feed_timeline(seen_posts=watched[:-1])
    results = api.feed_timeline(max_id=next_max_id)
    next_max_id = results['next_max_id']
    items = [item for item in results.get('feed_items', []) if item.get('media_or_ad')]
    for item in items:
        # Manually patch the entity to match the public api as closely as possible, optional
        # To automatically patch entities, initialise the Client with auto_patch=True
        posts_counter += 1
        ClientCompatPatch.media(item['media_or_ad'])
        if not item['media_or_ad']['user']['friendship_status']['following']:
            ads_counter += 1
        elif item['media_or_ad']['user']['is_verified']:
            verified_counter += 1
        #print(item['media_or_ad']['user']['username'])
print('stats : ' + str(posts_counter) + ' posts total, from all of them : ' + str(verified_counter + ads_counter)
      +" are ads or verified " + str(verified_counter) + '(verified) ' + str(ads_counter) + '(ads) ~ ' + str(100*(verified_counter + ads_counter)/posts_counter)+'%')
class InstagramScrape(object):
    def __init__(self, username, password):
        print("setting up instagram scraper...")
        self.username = username
        self.password = password
        self.api = Client(username=username, password=password)
        self.user_id = self.api.username_info(self.username)['user']['pk']
        print("instagram scraper successfully initialized!")

    def get_timeline(self):
        return self.api.feed_timeline()

    def get_self_feed(self):
        result = []
        feed = self.api.self_feed()['items']

        for entry in feed:
            if 'carousel_media_count' in list(entry.keys()):
                media = entry['carousel_media']
            else:
                media = [entry]
            carousel_parent_id = entry['id']

            if not entry['caption'] is None:
                text = entry['caption']['text']
            else:
                text = ''

            user_id = entry['caption']["user_id"]
            username = entry['user']['username']
            full_name = entry['user']['full_name']
            for medium in media:
                media_id = medium['id']
                images = medium['image_versions2']['candidates']
                for i in range(0, len(images)):
                    if not i % 2:
                        url = images[i]["url"]
                        image = {
                            "carousel_parent_id": carousel_parent_id,
                            "id": user_id,
                            "username": username,
                            "full_name": full_name,
                            "media_id": media_id,
                            "url": url,
                            "text": text
                        }

                        result.append(image)
        return result

    def get_own_stories(self):
        result = []
        tray = self.api.reels_tray()['tray'][0]
        user_id = tray['id']
        username = tray['user']['username']
        full_name = tray['user']['full_name']
        stories = tray['items']

        for story in stories:
            url = story['image_versions2']['candidates'][0]['url']
            entry = {
                "user_id": user_id,
                "username": username,
                "full_name": full_name,
                "url": url
            }
            result.append(entry)

        return result

    def search_results(self, query):
        result = []
        search_results = self.api.search_users(query)['users']

        for user in search_results:
            pk = user['pk']
            username = user['username']
            full_name = user['full_name']
            info = {"id": pk, "username": username, "full_name": full_name}
            result.append(info)

        return result

    def get_user_feed(self, username, id):
        result = []

        try:
            feed = self.api.username_feed(username)['items']
        except ClientError as err:
            return err

        for entry in feed:
            if 'carousel_media_count' in list(entry.keys()):
                media = entry['carousel_media']
            else:
                media = [entry]
            carousel_parent_id = entry['id']

            if not entry['caption'] is None:
                text = entry['caption']['text']
            else:
                text = ''

            username = entry['user']['username']
            full_name = entry['user']['full_name']
            for medium in media:
                media_id = medium['id']
                images = medium['image_versions2']['candidates']
                for i in range(0, len(images)):
                    if not i % 2:
                        url = images[i]["url"]
                        image = {
                            "carousel_parent_id": carousel_parent_id,
                            "user_id": id,
                            "username": username,
                            "full_name": full_name,
                            "media_id": media_id,
                            "url": url,
                            "text": text
                        }
                        result.append(image)
        return result

    def gather_media_comments(self, media_id):
        results = []
        comments = self.api.media_comments(media_id)["comments"]

        for comment in comments:
            pk = comment['user']['pk']
            username = comment['user']['username']
            text = comment['text']
            result = {
                'pk': pk,
                'username': username,
                'media_id': media_id,
                'comment_id': comment['pk'],
                'text': text
            }
            results.append(result)

        return results

    def get_comment_replies(self, media_id, comment_id):
        results = []
        replies = self.api.comment_replies(media_id,
                                           comment_id)['child_comments']

        for reply in replies:
            id = reply['user_id']
            comment = reply['text']
            replier = reply['user']['username']
            replied_id = reply['user']['pk']
            result = {
                'id': id,
                'comment': comment,
                'from': replier,
                'from_id': replied_id,
                'media_id': media_id,
                'comment_id': comment_id
            }
            results.append(result)

        return results

    def get_explore_results(self):
        return self.api.explore()

    def get_story_archive(self):
        return self.api.highlight_user_feed(self.user_id)

    def get_top_search(self):
        return self.api.top_search()

    def scrape(self, mental_illnesses):
        instagram_results = {}

        for mental_illness in mental_illnesses:
            instagram_results[mental_illness] = []

            print("currently handling Instagram information:")
            ig_result = self.search_results(mental_illness)
            handle_feed_lst = {}

            for handle in ig_result:
                handle_feed_lst[handle['id']] = []
                handle_feed = self.get_user_feed(handle['username'],
                                                 handle['id'])

                if type(handle_feed) is not ClientError:
                    handle_feed_lst[handle['id']].extend(handle_feed)

            users = list(handle_feed_lst.keys())
            carousel_ids = []

            for user in users:
                posts = handle_feed_lst[user]
                for post in posts:
                    print(post)
                    if post['carousel_parent_id'] in carousel_ids:
                        continue
                    else:
                        print(post['carousel_parent_id'])
                        carousel_ids.append(post['carousel_parent_id'])

                        search_result = {
                            'id': user,
                            'text': post['text'],
                            'media': post['url']
                        }
                        instagram_results[mental_illness].append(search_result)

            print(instagram_results[mental_illness])

        return instagram_results
# filename = 'instagram_' + mental_illness + '.json'
# with open(filename, 'w', encoding='utf-8') as f:
#     json.dump(instagram_results, f)
#
# print("complete handling Instagram information.")

    def scrape_self(self):
        results = self.get_self_feed()
        return results

    def save_self_scraped_information(self):
        results = self.scrape_self()

        filename = 'instagram_' + self.username + '.json'
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(results, f)


# Do this:
# 1) Detect post.
# 2) Caption and picture.
# 3) Get the image out.
# 4) Process the image

# For user posts
# 1st run ==> Get last 50 posts if possible.
# 2nd run onwards ==> Get last 3 posts.
# Run weekly

# For user likes
# Get liked posts once every 4 hours.
Пример #9
0
class Instagram:
    """ Class for collect photo from instagram """
    def __init__(self, login, password):
        self.Source = "Instagram"
        self.API = Client(login, password)

    def get_user_video(self, user_id):
        _first = True
        count_of_loaded_photo = 0
        next_max_id = None
        while next_max_id or _first:
            try:
                _first = False
                results = self.API.user_feed(user_id=user_id,
                                             max_id=next_max_id)
                for item in results.get('items', []):
                    try:
                        id = item["id"]
                        date = item["caption"]["created_at"]
                        photo_url = item['video_versions'][0]['url']
                        print('I find video))')
                    except:
                        continue
                    if self._save_photo(photo_url, id, date, extension='mp4'):
                        print('I load video))')
                        db_utils.insert_photo({
                            "source_id": id,
                            "source": self.Source,
                            "date": date
                        })
                        count_of_loaded_photo += 1
                        if count_of_loaded_photo % db_utils.COMMIT_COUNT == 0:
                            db_utils.commit()
                next_max_id = results.get('next_max_id')
            except:
                utils.print_message(traceback.format_exc())
        db_utils.commit()
        return count_of_loaded_photo

    def get_user_photo(self, user_id):
        """
        Get all user photo

        :param
            - user_id: Account id in instagram
        """
        _first = True
        count_of_loaded_photo = 0
        next_max_id = None
        while next_max_id or _first:
            try:
                _first = False
                results = self.API.user_feed(user_id=user_id,
                                             max_id=next_max_id)
                for item in results.get('items', []):
                    try:
                        id = item["id"]
                        date = item["caption"]["created_at"]
                        photo_url = item["image_versions2"]["candidates"][0][
                            "url"]
                    except:
                        continue
                    if self._save_photo(photo_url, id, date):
                        db_utils.insert_photo({
                            "source_id": id,
                            "source": self.Source,
                            "date": date
                        })
                        count_of_loaded_photo += 1
                        if count_of_loaded_photo % db_utils.COMMIT_COUNT == 0:
                            db_utils.commit()
                next_max_id = results.get('next_max_id')
            except:
                utils.print_message(traceback.format_exc())
        db_utils.commit()
        return count_of_loaded_photo

    def get_timeline(self, K):
        """
        Get K photo from feed timeline

        :param
            - K: Count of post in timeline
        """
        result = []
        _first = True
        count_of_loaded_photo = 0
        next_max_id = None
        counter = K
        while (next_max_id or _first) and counter > 0:
            try:
                _first = False
                results = self.API.feed_timeline(max_id=next_max_id)
                for item in results.get('feed_items', []):
                    try:
                        id = item["media_or_ad"]["id"]
                        date = item["media_or_ad"]["caption"]["created_at"]
                        photo_url = item["media_or_ad"]["image_versions2"][
                            "candidates"][0]["url"]
                    except:
                        continue
                    if counter <= 0: return
                    counter -= 1
                    if self._save_photo(photo_url, id, date):
                        db_utils.insert_photo({
                            "source_id": id,
                            "source": self.Source,
                            "date": date
                        })
                        count_of_loaded_photo += 1
                        if count_of_loaded_photo % db_utils.COMMIT_COUNT == 0:
                            db_utils.commit()
                next_max_id = results.get('next_max_id')
            except:
                utils.print_message(traceback.format_exc())
        db_utils.commit()
        return count_of_loaded_photo

    def get_followings_accounts(self):
        """ Get followings accounts from current user """
        return self.API.user_following(
            self.API.authenticated_user_id).get('users')

    def load_all_following_photo(self):
        """ Download all photo from each following account """
        count_of_loaded_photo = 0
        for account in self.get_followings_accounts():
            count_of_loaded_photo += self.get_user_photo(account['pk'])
        return count_of_loaded_photo

    def _save_photo(self, url, source_id, source_time, extension='jpg'):
        TRY_COUNTS = 3
        try_counter = TRY_COUNTS
        result = False
        while (try_counter > 0):
            try:
                if db_utils.check_exists(source_id, self.Source):
                    break
                filename = r".\{}\{}.{}".format(self.Source, source_id,
                                                extension)
                p = requests.get(url)
                if p.status_code == 200:
                    with open(filename, "wb") as f:
                        f.write(p.content)
                        result = True
                        break
            except:
                utils.print_message(traceback.format_exc())
            try_counter -= 1
        return result