Exemplo n.º 1
0
 def run(self):
     for resource in self.resources:
         logging.info(f'start to parse {resource} instagram resource')
         self.resource = Account(resource)
         new_posts = self.get_new_posts()
         self.process_posts(new_posts)
         logging.info(f'{resource} instagram resource is parsed')
Exemplo n.º 2
0
def mem():
    agent = WebAgent()
    account = Account("cosmopolitan_russia")

    media1, pointer = agent.get_media(account)
    count = 0  #счетчик колличества выполнений
    kek = []  #массив кеков
    for k in range(198):
        try:
            media2, pointer = agent.get_media(account,
                                              pointer=pointer,
                                              count=50,
                                              delay=0.4)
            for i in media2:
                kek.append({
                    'text': i.caption,
                    'likes': i.likes_count,
                    'comments': i.comments_count
                })
            count += 1
        except:
            pass
        print(1)
    dmp = json.dumps(kek)
    with open('mem.json', 'a') as f:
        print(dmp, file=f)
Exemplo n.º 3
0
def get_store_items(store_id, sale_hashtag, remove_hashtags=True):
    agent = WebAgent()
    account = Account(store_id)
    agent.update(account)

    assert not account.is_private, 'Account is private!'

    new_acc, created = InstagramAccount.objects.get_or_create(name=store_id)
    if not created:
        old_items = Item.objects.filter(account=new_acc)
        old_items.delete()

    media, pointer = agent.get_media(account, count=account.media_count)
    items = []
    for post in media:
        description = post.caption or ''
        if sale_hashtag:
            # post_contain_hashtag = any([hashtag in description for hashtag in sale_hashtags])
            post_contain_hashtag = sale_hashtag in description
            if not post_contain_hashtag:
                continue
        items.append(_get_item(post, remove_hashtags))

    for item in items:
        if item.get('images'):
            Item.objects.create(account=new_acc,
                                description=item.get('description'),
                                image_url=item.get('images')[0])
    return items
Exemplo n.º 4
0
def instagram(userLogin):
    agent = WebAgent()
    d = {}
    try:
        account = Account(userLogin)
        try:
            firstTenPub, pointer = agent.get_media(
                account)  #last ten publication
            otherPubs, pointer = agent.get_media(account,
                                                 pointer=pointer,
                                                 count=account.media_count,
                                                 delay=1)  #other
            places = []  #will be next time
            tenDaysLikes = 0
            tenDaysComments = 0
            allLikes = 0
            allComments = 0
            for i in firstTenPub:
                allLikes = allLikes + i.likes_count
                allComments = allComments + i.comments_count
                tenDaysLikes += i.likes_count
                tenDaysComments += i.comments_count

            for i in otherPubs:
                allLikes = allLikes + i.likes_count
                allComments = allComments + i.comments_count

            ln = len(otherPubs) + len(firstTenPub)
            d["Avatar: "] = account.profile_pic_url_hd
            d["Average quantity of likes from the last 10 publications: "] = str(
                tenDaysLikes // len(firstTenPub))
            d["Average quantity of comments from the last 10 publications: "] = str(
                tenDaysComments // len(firstTenPub))
            d["Average quantity of likes: "] = str(allLikes // ln)
            d["Average quantity of comments: "] = str(allComments // ln)
            d["Nickname: "] = account.username
            d["Quantity of posts: "] = str(account.media_count)
            d["Full Name: "] = account.full_name
            d["Quantity of follows: "] = str(account.follows_count)
            d["Quantity of followers: "] = str(account.followers_count)
            d["Is account private: "] = str(account.is_private)
            d["Account biography: "] = account.biography
        except:
            d["Avatar: "] = account.profile_pic_url_hd
            d["Nickname: "] = account.username
            d["Quantity of follows: "] = str(account.follows_count)
            d["Quantity of followers: "] = str(account.followers_count)
            d["Is account private: "] = str(account.is_private)
            d["Account biography: "] = account.biography

    except:
        d["Avatar: "] = "https://avatars.mds.yandex.net/get-zen_doc/125920/pub_5bf184d0e9397500ab3a1aec_5bf18854297efb00aaff9147/scale_600"
        d["Error: "] = "404"

    #username
    #information about user

    return d
Exemplo n.º 5
0
 def get_new_posts(self, resource):
     self.resource = Account(resource)
     stored_posts = self.get_parsed_posts()
     new_posts = []
     pointer = None
     self.insta_request(data=self.resource)
     posts_count = self.resource.media_count
     posts_scraped = 0
     while posts_count > posts_scraped:
         try:
             posts, pointer = self.insta_request(pointer=pointer)
             posts_scraped += len(posts)
             logging.info(f'scraper {posts_scraped} posts')
             for post in posts:
                 if post.__str__() not in stored_posts:
                     new_posts.append(post)
                 else:
                     raise StopIteration
         except StopIteration:
             break
     return new_posts
Exemplo n.º 6
0
def mem(account="ccacc_ount"):
    last = None
    agent = WebAgent()
    account = Account(account)
    media1 = agent.get_media(account, count=1)
    tm = time.time()
    while (1):
        if (time.time() > tm + 5):
            media1 = agent.get_media(account)
            tm = time.time()
            if last != media1[0].code:
                last = media1[0].code
                print(last)
Exemplo n.º 7
0
def mem(account=sconfig.our, delay=sconfig.delay):
    last = None
    agent = WebAgent()
    account = Account(account)
    media1 = agent.get_media(account, count=1)
    tm = time.time()
    while (1):
        if (time.time() > tm + delay):
            media1 = agent.get_media(account)
            tm = time.time()
            if last != media1[0].code:
                last = media1[0].code
                print(last)
Exemplo n.º 8
0
def get_last_inst(account=sconfig.our, cnt=5):
    result = []
    agent = WebAgent()
    account = Account(account)
    media1 = agent.get_media(account, count=cnt)
    for i in media1[0]:
        result.append({
            'url': 'https://www.instagram.com/p/' + i.code + '/',
            'time': i.date,
            'text': i.caption,
            'network': 'inst',
            'id': i.owner
        })
    return result
Exemplo n.º 9
0
def createAclist(agent,agentpass,accountName):
  agent = WebAgentAccount(agent)
  agent.auth(agentpass)
  account = Account(accountName)
  agent.update()
  
  print("please waite...") 

  f, pointer = agent.get_followers(account)
  f2, pointer = agent.get_followers(account,pointer=pointer, count=account.followers_count, delay=1)
  followers = f+f2
  file1 = open('saves/'+accountName,'w')
  for el in followers:
   print(el)
   file1.write(str(el)+'\n')
Exemplo n.º 10
0
def count(name):
    while True:
        try:
            global counter
            agent = WebAgent()
            acc_name = name
            print("Вычисляем...")

            account = Account(acc_name)

            media1, pointer = agent.get_media(account)
            media2, pointer = agent.get_media(account, pointer=pointer, count=100, delay=1)

            posts = []

            for i in media2:
                try:
                    media = agent.get_likes(i, pointer=None, count=50, limit=5000, delay=10, settings=None)
                    postlike = ([i, media[:1]])
                    posts.append(postlike)
                except:
                    continue

            posts = dict(posts)
            counter = collections.Counter()

            string = []
            for key, value in posts.items():
                for i in value:
                    for x in i:
                        string.append(str(x))

            for word in string:
                counter[word] += 1

            return counter
        except instagram.exceptions.InternetException:
            logging.error("error: {}".format(sys.exc_info()[0]))
            counter = "0"
            break
        except instagram.exceptions.UnexpectedResponse:
            logging.error("error: {}".format(sys.exc_info()))
            counter = "0"
            break
        except ValueError:
            logging.error("error: {}".format(sys.exc_info()[0]))
            counter = "0"
            break
Exemplo n.º 11
0
def add_posts(account, agent):
    account = Account(account)
    media = agent.get_media(account,
                            pointer=None,
                            count=50,
                            limit=200,
                            delay=0)
    user, created = Akk.objects.get_or_create(username=account.username,
                                              user_id=account.id,
                                              full_name=account.full_name)
    print(media)
    for i in media[0]:
        if i.is_video:
            pass
            # Post.objects.update_or_create(
            #     media=i.video_url,
            #     text=i.caption,
            #     date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(i.date)),
            #     post_url=BASE_INST_URL + i.base_url + i.code,
            #     post_id=i.id,
            #     account=user
            # )
        elif i.is_album:
            # print('\nЭто альбом\n')
            album = []
            for j in i.album:
                if j.is_video:
                    album.append(j.video_url)
                elif j.is_ad:
                    pass
                elif j.is_album:
                    pass
                else:
                    album.append(j.display_url)
            # Post.objects.update_or_create(
            #     media=album,
            #     text=i.caption,
            #     date=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(i.date)),
            #     post_url=BASE_INST_URL + i.base_url + i.code,
            #     post_id=i.id,
            #     account=user,
            # )
            print(album)
        elif i.is_ad:
            pass
        else:
            pass
Exemplo n.º 12
0
def Download(name):
    """
    Download function
    This function download all function from acc
    Function takes loggin of accaunt's user
    """

    dir_home = os.getcwd()

    print("Start work with {}".format(name))
    try:
        print("\tExcess with open ")
        agent = WebAgent()
        account = Account(name)
        if account.is_private:
            print("\tAccount {} is private".format(account))
            return 0
        else:
            print("\tAccount {} is not private".format(account))
            agent.update(account)
            agent.get_media(account)
            medias, point = agent.get_media(account, count=account.media_count)
            try:
                os.mkdir("_{}".format(name))
                print("\t##Make _{} directory".format(name))
                os.chdir("_{}".format(name))
                print("\t##Into _{} directory".format(name))
            except:
                os.chdir("_{}".format(name))
                print("\t##Into _{} directory".format(name))

            #ебануть проверку
            for i, media in enumerate(medias):
                if not media.is_video:
                    download_Photo(media.display_url, i)

            write_comment_file(medias, account)

    except:
        print("Haven't excess with open - {}".format(name))
        pass

    os.chdir(dir_home)
    print("\t##Go to {}".format(dir_home))
Exemplo n.º 13
0
def fl():
    global fp_name
    global first_profile
    for i in ac[0]:
        if i == fp_name:
            first_profile = False
        if fp_name == "":
            fp_name = i
        a = Account(i)
        agent.update(a)
        bio = a.biography
        if ("CEO" in bio or "Founder" in bio or "founder" in bio
                or "owner" in bio or "Owner" in bio or "entrepreneur" in bio
                or "Entrepreneur" in bio):
            link = f"https://www.instagram.com/{a}/"
            requests.post(API_LINKS, data={"link": link, "source": INSTA_ACC})
            print(link)
    ab = agent.get_followers(account=base_acc,
                             pointer=ac[1],
                             count=5,
                             limit=2,
                             delay=5)
    return ab
Exemplo n.º 14
0
import instagram
from instagram.agents import WebAgent
from instagram import Account, Media, Location
agent = WebAgent()
account = Account("zuck")
loc = Location(17326249)
agent.update(account)
media = agent.get_media(loc, count=50)
print(media)
Exemplo n.º 15
0
import time
import instagram
from instagram import Account, Media, WebAgent, Story, Location, Tag, Comment

agent = WebAgent()
account = Account("acc")

posts = agent.get_media(account, pointer=None, count=5)
res = list(list(posts)[0])
i = 0  #counts of posts
likes = []
likes_prev = []
comments = []
comments_prev = []
import requests
while True:
    media = Media(res[i])
    likes.append(media.likes_count)
    comments.append(media.comments_count)
    i += 1

    if i == len(res):
        likes_prev = likes
        likes = []
        comments_prev = comments
        comments = []
        time.sleep(3)
        try:
            for i in range(len(res)):
                media = Media(res[i])
                agent.update(media)
Exemplo n.º 16
0
from instagram import Account, Media, WebAgent
# from datetime import datetime
# ts = int("1284101485")

print(datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'))
agent = WebAgent()
account = Account("islambek.temirbek")  #username
d = {}  #information about user
media1, pointer = agent.get_media(account)  #last ten publication
media2, pointer = agent.get_media(account,
                                  pointer=pointer,
                                  count=account.media_count,
                                  delay=1)  #other
places = []  #will be next time
cntfrslks = 0
cntfrscmnt = 0
cntforlikes = 0
cntforcomments = 0
for s in media1:
    cntforlikes = cntforlikes + s.likes_count
    cntforcomments = cntforcomments + s.comments_count
    cntfrslks = +s.likes_count
    cntfrscmnt = +s.comments_count
    print(s.caption)
for i in media2:
    cntforlikes = cntforlikes + i.likes_count
    cntforcomments = cntforcomments + i.comments_count

ln = len(media2) + len(media1)
d["avgfstpublclks"] = cntfrslks // len(media1)
d["avgfstpublccomments"] = cntfrscmnt // len(media1)
Exemplo n.º 17
0
def get_anna_nails_content():
    '''
    func come in instagramm of human and take from it photos and comments to them
    '''
    #global locker
#   #
    #if locker == 1:
    #     return False   
   
    #locker = BoundedSemaphore(value=1) 
    #with locker:
    photos = []
    try:


        #locker = 1
        agent = WebAgent()
        account = Account("anna_nails_ani")

        agent.update(account)

        count_download_photos = 1000    
        #если в БД меньше 50 записей то считываем максимальное коичество записей иначе 10
        #if MyWork.query.count() < 50:
        #    count_download_photos = 300        
        
        #вычисляем дату на которой нужно остановится и не рассматривать дальше контент если это не первая загрузка
          
        date_to_stop_search = MyWork.query.order_by(MyWork.published.desc()).first()
        if date_to_stop_search:
             date_to_stop_search = date_to_stop_search.published - timedelta(days=14) 

        #создаем агента на считывание с аккаунта данных по контенту
        media=agent.get_media(account, count=count_download_photos)

        # список работ для добавления в БД
        photos = []
        #список комментариев которые не нужно сохранять в БД
        reklama_pattern_list = {'реклам', 'клам', 'услуг', 'предлагаю', 'пиар'}
        reclama_owner_list = {'master_and_model123'}
              
        count_photo = 0
        for med in media:            
              for m in med:                                                     
                  if  m != None and not m.is_video:        
                     # print('Код медиа:' f'{m.code}')
                      photo_date = datetime.fromtimestamp(m.date)                 
                         
                      count_photo = count_photo + 1
                      #если достигли определенной даты после которй не нужно загружать больше не ищем                      
                      if date_to_stop_search and photo_date <= date_to_stop_search:
                         # print('Достигли максимальной даты поиска')
                          break
                      photo_date = photo_date.strftime('%Y-%m-%d %H:%M:%S') 
                     # print(f'{type(date_to_stop_search)}' + '!!!!!!!' + f'{type(photo_date)}')
                      #print(f'Считано фоток: {count_photo}, id_site: {m.id}')

                      comment=agent.get_comments(media=m, count=30)
                      comments_for_photo = []                  
                      if comment[0]:
                          for c in comment[0]:
                              if is_reklam(f'{c.owner}', reclama_owner_list) == False:   
                                  if is_reklam(c.text, reklama_pattern_list) == False:
                                      #print(f'{c.media}')
                                      comment_date = datetime.fromtimestamp(c.created_at)
                                      comment_date = comment_date.strftime('%Y-%m-%d %H:%M:%S')   
                                      comments = {'id' : f'{c.id}', 'media': f'{c.media}', 'owner' : f'{c.owner}', 'text' : f'{c.text}', 'date' : comment_date}
                                      comments_for_photo.append(comments)
                      
                      item = {'id' : f'{m.id}', 'caption' : f'{m.caption}', 'code' : f'{m.code}', 'date' : photo_date, 'url' : f'{m.display_url}', 'owner' : f'{m.owner}', 'likes' : f'{m.likes_count}', 'comments' : comments_for_photo}
                      photos.append(item)
        
    except Exception as e:
        pass
        print(f"Error: Type None! {e}")
    except(AttributeError):
        print("Atribute Error!")

    #locker=0

    #try:
    save_my_work(photos)
Exemplo n.º 18
0
        bio = a.biography
        if ("CEO" in bio or "Founder" in bio or "founder" in bio
                or "owner" in bio or "Owner" in bio or "entrepreneur" in bio
                or "Entrepreneur" in bio):
            link = f"https://www.instagram.com/{a}/"
            requests.post(API_LINKS, data={"link": link, "source": INSTA_ACC})
            print(link)
    ab = agent.get_followers(account=base_acc,
                             pointer=ac[1],
                             count=5,
                             limit=2,
                             delay=5)
    return ab


login = Account(USERNAME)
pw = PASSWORD
base_acc = Account(INSTA_ACC)
settings = {}

agent = WebAgentAccount(login)

agent_url = ""
agent_code = ""
auth_failed = True

try:
    agent.auth(pw)
except CheckpointException as e:
    is_fail = True
    agent_url = e.checkpoint_url
Exemplo n.º 19
0
class InstagramParser:
    def __init__(self, resources):
        self.config = parse_config('instagram')
        self.resources = resources
        self.anon_agent = WebAgent()
        self.agent = {'agent': self.anon_agent, 'set_time': time.time()}
        # self.logged_agent = WebAgentAccount(self.config['LOGIN'])
        # self.logged_agent.auth(self.config['PASSWORD'])
        self.logged_agent = self.anon_agent
        self.agent = {'agent': self.logged_agent, 'set_time': time.time()}

        self.mdb = MongoDBStorage()
        self.downloader = Downloader('https://www.instagram.com')
        self.proxy_helper = self.downloader.proxy_helper
        self.use_proxy = False
        self.date_limit = False
        self.old_datetime = datetime.datetime.now() - datetime.timedelta(
            days=ast.literal_eval(self.config['scraping_date_limit']))
        if not os.path.exists('../stream/instagram'):
            os.makedirs('../stream/instagram')

    def get_settings(self, request_start_time, proxy_only=False):
        if proxy_only or self.use_proxy and time.time(
        ) - self.previous_local_request < 11 * 60:
            chosen_proxy = self.proxy_helper.get_proxy()
        else:
            chosen_proxy = None
            if self.use_proxy and self.agent['set_time'] < request_start_time:
                with self.proxy_helper.lock:
                    self.agent = {
                        'agent': self.anon_agent,
                        'set_time': time.time()
                    }
                    self.use_proxy = False
                    logging.info('stop to use proxies')
        settings = {
            "proxies": {
                "http": chosen_proxy,
                "https": chosen_proxy,
            },
            'timeout': 30
        }
        return chosen_proxy, settings

    def swap_agent(self):
        if self.agent == self.anon_agent:
            self.agent = {'agent': self.logged_agent, 'set_time': time.time()}
        else:
            self.agent = {'agent': self.anon_agent, 'set_time': time.time()}

    def insta_request(self, pointer=None, data=None, proxy_only=False):
        @self.proxy_helper.exception_decorator
        def request_to_instagram(proxy, setting, pointer=None, posts=None):
            if data:
                self.agent['agent'].update(data, settings=setting)
            else:
                posts, pointer = self.agent['agent'].get_media(
                    self.resource, pointer=pointer, settings=setting, delay=1)
            return posts, pointer

        request_start_time = time.time()
        while True:
            chosen_proxy, settings = self.get_settings(
                proxy_only=proxy_only, request_start_time=request_start_time)
            request_start_time = time.time()
            try:
                posts, pointer = request_to_instagram(proxy=chosen_proxy,
                                                      setting=settings,
                                                      pointer=pointer)
                break
            except (InternetException, UnexpectedResponse) as e:
                with self.proxy_helper.lock:
                    if self.agent['set_time'] < request_start_time:
                        if isinstance(e, UnexpectedResponse):
                            self.swap_agent()
                        if not chosen_proxy:
                            self.use_proxy = True
                            self.previous_local_request = time.time()
                            logging.info('start to use proxies')
                            self.agent = {
                                'agent': self.logged_agent,
                                'set_time': time.time()
                            }
        if posts:
            return posts, pointer

    def get_new_posts(self, resource):
        self.resource = Account(resource)
        stored_posts = self.get_parsed_posts()
        new_posts = []
        pointer = None
        self.insta_request(data=self.resource)
        posts_count = self.resource.media_count
        posts_scraped = 0
        while posts_count > posts_scraped:
            try:
                posts, pointer = self.insta_request(pointer=pointer)
                posts_scraped += len(posts)
                logging.info(f'scraper {posts_scraped} posts')
                for post in posts:
                    if post.__str__() not in stored_posts:
                        new_posts.append(post)
                    else:
                        raise StopIteration
            except StopIteration:
                break
        return new_posts

    def get_parsed_posts(self):
        return self.mdb.get_instagram_posts(self.resource.__str__())

    def parse_album(self, album):
        album_data = dict()
        album_pages = []
        for album_page in album.album:
            self.insta_request(data=album_page)
            album_pages.append(album_page.resources[-1])
        album_data['album_pages'] = album_pages
        return album_data

    def parse_video(self, video):
        video_data = dict()
        video_data['video_url'] = self.downloader.download_file(
            video.video_url, 'mp4')
        return video_data

    def parse_singe_post(self, single_post):
        single_post_data = dict()
        return single_post_data

    def get_mandatory_post_data(self, post):
        post_data = dict()
        post_data['_id'] = post.__str__()
        post_data['preview_image'] = self.downloader.download_file(
            post.resources[-1], 'jpg') if post.is_video else post.resources[-1]
        post_data['description'] = post.caption
        post_data['date'] = post.date
        post_data['likes_count'] = post.likes_count
        post_data['link'] = 'https://www.instagram.com/p/{post_id}'.format(
            post_id=post_data['_id'])
        post_data['resource'] = self.resource.__str__()
        post_data['icon'] = self.resource.profile_pic_url
        post_data['is_album'] = post.is_album
        post_data['is_video'] = post.is_video

        return post_data

    def parse_post(self, post):
        self.insta_request(data=post)
        post_data = self.get_mandatory_post_data(post)
        if post_data['date'] < self.old_datetime.timestamp():
            self.date_limit = True
        if post.is_album:
            post_data.update(self.parse_album(post))
        elif post.is_video:
            post_data.update(self.parse_video(post))
        else:
            post_data.update(self.parse_singe_post(post))
        return post_data

    def process_posts(self, posts):
        pool_size = ast.literal_eval(self.config['pool_size'])
        chunks = list(chunkify(posts, pool_size))
        for n, chunk in enumerate(chunks):
            logging.info('processing {}/{} posts batch'.format(
                n + 1, len(chunks)))
            chunk = chunk[::-1]
            pool = ThreadPool(pool_size)
            parsed_posts = [
                post for post in pool.map(self.parse_post, chunk)
                if post['date'] > self.old_datetime.timestamp()
            ]
            pool.close()
            self.mdb.add_new_posts(parsed_posts)
            if self.date_limit:
                logging.info('instagram resource {} reach day limit'.format(
                    self.resource))
                return

    def run(self):
        for resource in self.resources:
            logging.info(f'start to parse {resource} instagram resource')
            self.resource = Account(resource)
            new_posts = self.get_new_posts()
            self.process_posts(new_posts)
            logging.info(f'{resource} instagram resource is parsed')
Exemplo n.º 20
0
from telebot import TeleBot
import schedule, time, datetime
from instagram import Account, WebAgent

bot = TeleBot('1228395330:AAEPH5rF1oNLXiuFBSZ26aosz-g_n3AiFfk')

with open("posted_photos.txt") as file:
    data = [row.strip() for row in file]

agent = WebAgent()
account = Account("erotic_model_girls")

agent.update(account)

media = agent.get_media(account, count=9999)[0]
count = 1

now = datetime.datetime.now()


def job():
    global count
    global media
    while True:
        m = media[-count]
        count += 1
        if m.id in data or m.is_video:
            continue
        else:
            data.append(m.display_url)
            with open("posted_photos.txt", "a") as a_file: