Exemplo n.º 1
0
def download_insta(message):
    try:
        print("download_insta")
        post = False
        file_identifier = str(message.chat.id) + str(int(time.time()))
        if ((" " not in message.text)):
            risposta(
                message, "L' utilizzo del comando è /insta post urldelpost o /insta nomeutente")
        else:
            messaggio = message.text.split(" ")[1]
            if ("post" in messaggio):
                post = True
                messaggio = message.text.split(" ")[2]
            os.system("mkdir " + bot_path + file_identifier)
            username = shlex.split(messaggio)
            if post == True:
                InstaLooter(directory=bot_path + file_identifier).download_post(re.search('[A-Z][^/]+',str(username[0])).group())
                os.system("mv "+bot_path + file_identifier + "/*.jpg "+bot_path + file_identifier + "/"+file_identifier+".jpg")
                bot.send_chat_action(message.chat.id, 'upload_photo')
                bot.send_photo(message.chat.id, open(bot_path + file_identifier + "/" + file_identifier + ".jpg", "rb"))
            else:
                InstaLooter(directory=bot_path + file_identifier, profile=str(username[0])).download()
                shutil.make_archive(bot_path + file_identifier, 'zip', bot_path + file_identifier)
                bot.send_chat_action(message.chat.id, 'upload_document')
                bot.send_document(message.chat.id, open(bot_path + file_identifier + ".zip", "rb"))
    except Exception as e:
        print(e)
        risposta(message, "Si è verificato un errore, riprova")
    finally:
        os.system("rm -r " + bot_path + "*"+file_identifier+"*")
Exemplo n.º 2
0
def new_looter(*args, **kwargs):
    """Create a new looter with the given parameters

    Args:
        *args: All the parameters as in :class:`instaLooter.core.InstaLooter`
        **kwargs: All the parameters as in :class:`instaLooter.core.InstaLooter`
    """
    global looter
    looter = InstaLooter(*args, **kwargs)

    if INSTA_USERNAME and INSTA_PASSWORD:
        looter.login(INSTA_USERNAME, INSTA_PASSWORD)
Exemplo n.º 3
0
def getInstaLinks(username):
    looter = InstaLooter(profile=username)
    images = []
    i = 0
    for media in looter.medias():
        if i > cfg.instaLimit():
            break
        if not media['is_video']:
            console.subtask("Got Image: {0}".format(media['display_src'].strip()[:90]))
            images.append(media['display_src'])
            i = i + 1
    return images
Exemplo n.º 4
0
 def __init__(self, userName, userFolder): #donePicturesQueue, userName):
     self.donePicturesQueue = queue.Queue()#donePicturesQueue
     self.userName = userName
     self.userFolder = userFolder
     self.looter = InstaLooter(profile=userName)
     self.fullName = self.looter.get_metadata()['full_name']
     self.profilePicture = self.looter.get_metadata()['profile_pic_url']
     self.profileUrl = 'r{}/{}'.format(INSTAGRAM_URL, userName)[1:]
     self.isPrivate = self.looter.get_metadata()['is_private']
     self.pics_dic = {}
     self.sum = 0
     self.counter = 0
     self.scan()
Exemplo n.º 5
0
from instaLooter import InstaLooter
import re
import os

insgtagram_user = "******"
looter = InstaLooter(profile=insgtagram_user)
post_list = []
_baseurl = "https://www.instagram.com/p"

for media in looter.medias(with_pbar=True):
   post_dict = {}
   post_info = looter.get_post_info(media['code'])
   post_dict['photo_url'] = post_info['display_url']
   comment_user = []
   comment_text = []
   for component in post_info['edge_media_to_comment']['edges']:
       comment_user.append(component['node']['owner']['username'])
       comment_text.append(component['node']['text'])
   try:
       caption = post_info['edge_media_to_caption']['edges'][0]['node']['text']
   except :
       caption = []
       post_dict['caption'] = caption
   if caption:
       post_dict['hashtag'] = re.findall(r"#(\w+)", caption)
   else :
       post_dict['hashtag'] = []

   post_dict['caption']=caption
   post_dict['insta_url'] =os.path.join(_baseurl,post_info['shortcode'])
   post_dict['instagram_id'] = post_info['owner']['username']
Exemplo n.º 6
0
conta = sys.argv[1]

if not sys.argv[1]:
	conta = raw_input("Informe a conta do instagram: ")

pasta_imagens = "imagens"
if not os.path.exists(pasta_imagens):
	os.makedirs(pasta_imagens)
pasta_legendas = "legendas"
if not os.path.exists(pasta_legendas):
	os.makedirs(pasta_legendas)



looter = InstaLooter(profile=conta, get_videos=False, url_generator=resizer(640), directory=pasta_imagens)

print "Baixando imagens..."
looter.download()
print "Salvo na pasta %s" % pasta_imagens

print "Baixando legendas..."
counter = 0
for media in looter.medias():
	filename = "%s/caption%d.txt" % (pasta_legendas, counter)
	arq = open(filename, "w")
	try:
		arq.write(media["caption"].encode('utf8', 'ignore'))
	except KeyError:
		pass
	arq.close()
Exemplo n.º 7
0
    def crawlTag(self, tag, goal=0):
        """
        Loads the last crawled time from timelog_dir, and
        crawl posts made after that time.

        Save the time when crawling started in starttime_dict
        
        Returns list of dict of posts.
        """

        with open(os.path.join(self.timelog_dir, tag), 'r') as time_log_file:
            last_crawled_timestamp = time_log_file.read().strip()
            try:
                last_crawled_timestamp = float(last_crawled_timestamp)
            except ValueError:
                last_crawled_timestamp = 0

        start_time = datetime.now()
        print("{} : {} starts".format(tag, str(start_time)))
        print("Last crawled date : {}\n".format(
            str(datetime.fromtimestamp(last_crawled_timestamp))))

        self.starttime_dict[tag] = start_time

        post_list = []

        looter = InstaLooter(hashtag=tag)
        count = 0

        for media in looter.medias():
            code = media['code']
            try:
                postDict = looter.get_post_info(code)
            except KeyError:
                continue
            except AttributeError:
                continue

            try:
                rowDict = self.makeRowDict(postDict)
            except IndexError:
                continue

            if not rowDict:
                continue

            print("{} / {}".format(postDict['date'], last_crawled_timestamp))
            if float(postDict['date']) <= last_crawled_timestamp:
                break

            post_list.append(rowDict)
            count += 1

            if (count % 500) == 0:
                print("{} : {} counts at {}\n".format(tag, str(count),
                                                      str(datetime.now())))
            # Finish Point
            if count == goal:
                break

        print("{} : {} ends \n".format(tag, str(datetime.now())))
        looter.__del__()

        return post_list
Exemplo n.º 8
0
def image_crawler():
    table = dynamodb.Table('Images')
    target = request.form.get('target')
    num = request.form.get('num')
    num = int(num)
    radio = request.form.get('gridRadios')

    if radio == 'Greedy':
        url = target
        url = str(url)
        greedy_crawler = GreedyImageCrawler(
            storage={'root_dir': 'downloaded_pictures'})
        greedy_crawler.crawl(domains=url,
                             max_num=num,
                             min_size=(200, 200),
                             max_size=None)
        print(file_names)
        for file_name in file_names:
            response = table.put_item(Item={
                'username': session['username'],
                'imagename': file_name,
            })

    if radio == 'Instagram':
        looter = InstaLooter(directory="/tmp/", profile=target)
        looter.download_pictures(media_count=num)
        counter = 0
        for media in looter.medias():
            print(media)
            if (counter < num):
                if media['is_video']:
                    continue
                    # url = looter.get_post_info(media['code'])['video_url']
                else:
                    counter = counter + 1
                    url = media['display_src']
                    s3 = boto3.client('s3')
                    fp = io.BytesIO(urlopen(url).read())
                    s3.upload_fileobj(fp, 'ece1779project',
                                      media['id'] + '.jpg')
                    response = table.put_item(
                        Item={
                            'username': session['username'],
                            'imagename': media['id'] + '.jpg',
                        })
            else:
                break

    if radio == 'Google':
        google_crawler = GoogleImageCrawler(
            parser_threads=2,
            downloader_threads=4,
            storage={'root_dir': 'downloaded_pictures'})
        google_crawler.crawl(keyword=target,
                             max_num=num,
                             date_min=None,
                             date_max=None,
                             min_size=(200, 200),
                             max_size=None)
        for file_name in file_names:
            response = table.put_item(Item={
                'username': session['username'],
                'imagename': file_name,
            })

    return render_template("/imagecrawler/form.html")
Exemplo n.º 9
0
class Instagram:
    def __init__(self, userName, userFolder): #donePicturesQueue, userName):
        self.donePicturesQueue = queue.Queue()#donePicturesQueue
        self.userName = userName
        self.userFolder = userFolder
        self.looter = InstaLooter(profile=userName)
        self.fullName = self.looter.get_metadata()['full_name']
        self.profilePicture = self.looter.get_metadata()['profile_pic_url']
        self.profileUrl = 'r{}/{}'.format(INSTAGRAM_URL, userName)[1:]
        self.isPrivate = self.looter.get_metadata()['is_private']
        self.pics_dic = {}
        self.sum = 0
        self.counter = 0
        self.scan()
        #self.connectedFacebookPage = self.looter.get_metadata()['connected_fb_page'] --> Connected Facebook page, can be None.

    def scan(self):
        http = urllib3.PoolManager()
        # Write the profile picture
        with open(r'{}\profile.jpg'.format(self.userFolder), 'wb') as profile_picture:
            profile_picture.write(http.request("GET",self.profilePicture).data)
        
        if self.looter.get_metadata()['media']['count'] <= 0:
            return #No Pictures
        
        availablePicturesQueue = queue.Queue()
        threads = []
        
        for media in self.looter.medias():
            if not media['is_video']:
                availablePicturesQueue.put(media)
                self.counter+=1
        
        for thread in range(THREAD_COUNT):
            thread = threading.Thread(target=self.write_picture, args=(availablePicturesQueue, ))   
            thread.start()
            
        for thread in threads:
            thread.join()

    def write_picture(self, availablePicturesQueue):
        http = urllib3.PoolManager()
        
        while not availablePicturesQueue.empty():
            media = availablePicturesQueue.get()
        
            if media['is_video']:
                #url = looter.get_post_info(media['code'])['video_url'] --> Download mp4 video
                continue
            else:
                url = media['display_src']
                
            dataToWrite = http.request("GET",url).data
            picturePath = '{}\{}{}.jpg'.format(self.userFolder, self.userName, str(threading.current_thread().ident * randint(1, 10000000)))
            if os.path.exists(picturePath):
                print("if u see this the programmer is an idiot " + picturePath)
            with open(picturePath, 'wb') as picture:
                 picture.write(dataToWrite)
            availablePicturesQueue.task_done()
            self.donePicturesQueue.put({picturePath : url})
            self.pics_dic[picturePath] = url
            self.donePicturesQueue.task_done()
            self.sum += 1