Exemplo n.º 1
0
    async def search_imgur(self, ctx: Context, *, query: str):
        """Imgur search."""
        search_id = 0

        await self.bot.send_typing(ctx.message.channel)

        try:
            client_id = self.settings["imgur"]["id"]
            client_secret = self.settings["imgur"]["secret"]
        except KeyError:
            await self.bot.say("Please set imgur id and secret.")
            return

        try:
            search_id = self.settings["imgur"]["search_id"]
        except KeyError:
            self.settings["imgur"]["search_id"] = 0

        # count = 0
        client = ImgurClient(client_id, client_secret)
        results = client.gallery_search(query)

        try:
            result = next(islice(results, search_id, None))
            if result.is_album:
                img = client.get_image(result.cover)
            else:
                img = result
            await self.bot.say(str(img.link))
            search_id += 1
        except StopIteration:
            search_id = 0

        self.settings["imgur"]["search_id"] = search_id
        dataIO.save_json(JSON, self.settings)
Exemplo n.º 2
0
def get_url(submission):
    def what_is_inside(url):
        header = requests.head(url).headers
        if 'Content-Type' in header:
            return header['Content-Type']
        else:
            return ''

    url = submission.url

    url_content = what_is_inside(url)
    if ('image/jpeg' == url_content or 'image/png' == url_content):
        return 'img', url, url_content.split('/')[1]

    if 'image/gif' in url_content:
        return 'gif', url, 'gif'

    if url.endswith('.gifv'):
        if 'image/gif' in what_is_inside(url[0:-1]):
            return 'gif', url[0:-1], 'gif'

    if submission.is_self is True:
        # Self submission with text
        return 'text', None, None

    if urlparse(url).netloc == 'imgur.com':
        # Imgur
        imgur_config = yaml.load(open('imgur.yml').read())
        imgur_client = ImgurClient(imgur_config['client_id'],
                                   imgur_config['client_secret'])
        path_parts = urlparse(url).path.split('/')
        if path_parts[1] == 'gallery':
            # TODO: gallary handling
            return 'other', url, None
        elif path_parts[1] == 'topic':
            # TODO: topic handling
            return 'other', url, None
        elif path_parts[1] == 'a':
            # An imgur album
            album = imgur_client.get_album(path_parts[2])
            story = {}
            for num, img in enumerate(album.images):
                number = num + 1
                story[number] = {
                    'link': img['link'],
                    'gif': img['animated'],
                    'type': img['type'].split('/')[1]
                }
            return 'album', story, None
        else:
            # Just imgur img
            img = imgur_client.get_image(path_parts[1].split('.')[0])
            if not img.animated:
                return 'img', img.link, img.type.split('/')[1]
            else:
                return 'gif', img.link, 'gif'

    else:
        return 'other', url, None
 def get_imgur_urls(self, url):
     c_id = '45b94b4d0013b7a'
     split = url.split('/')
     if self.has_extension(split[len(split)-1]):
         yield url
         return
     client = ImgurClient(client_id=c_id, client_secret=None)
     if 'a' == split[len(split)-2]:
         split = split[len(split)-1].split('#')[0]
         for img in client.get_album_images(split):
                 yield img.link
     else:
         yield client.get_image(split[len(split)-1]).link
Exemplo n.º 4
0
  def get_imgur_info(self, url):
    # Returns imgur link info.
    p = re.compile(ur'(?:.*)(?:http(?:s|)://(?:www\.|i\.|)imgur\.com/)([A-Za-z0-9]*)(?:(?:\.[jpgt]|\ |$).*)', re.IGNORECASE)
    m = re.search(p, url)
    if m is None:
      return None

    client = ImgurClient('', '')
    img = client.get_image(m.group(1))
    lines = []

    lines.append("Title: " + img.title)
    return lines
Exemplo n.º 5
0
def find(criteria):
    client = ImgurClient(creds.client_id, creds.client_secret)

    q = criteria.replace("'", "")
    search = client.gallery_search(q, advanced=None, sort='top', window='all')
    numresults = len(search)
    if numresults != 0:
        if numresults != 1:
            numb = random.randrange(1, numresults, 1)
            item = search[numb]
            imgID = item.id
            result = client.get_image(imgID)
            result = result.link
            return result
        else:
            item = search[0]
            imgID = item.id
            result = client.get_image(imgID)
            result = result.link
            return result
    else:
        # use bing if no imgur results
        return risky(q)
Exemplo n.º 6
0
class Imgur_Uploader(object):
    def __init__(self):
        client_id = 'YOUR-CLIENT-ID'
        client_secret = 'YOUR-CLIENT-SECRET'

        self.client = ImgurClient(client_id, client_secret)
        
    def upload_image(self, filepath):
        return self.client.upload_from_path(filepath)

    def download_image(self, image_id, name):
        image = self.client.get_image(image_id)
        image_link = image.link
        uid = uuid.uuid1().urn
        urllib.urlretrieve(image_link, "IMGURDownloads/" + name[:-4] + uid[9:] + name[-4:])
        
Exemplo n.º 7
0
async def imgur(*search_terms):
    """ Fetches images from Imgur based on given arguments.
        Support single and multiple arguments"
    """
    client = ImgurClient(imgur_client_id, imgur_client_secret)

    search_terms = " ".join(search_terms)
    images = client.gallery_search(search_terms)
    if images:
        image = random.choice(images)
        if image.is_album == True:
            await bot.say(client.get_image(image.cover).link)
        else:
            await bot.say(image.link)
    else:
        await bot.say("Ei löytynyt kuvia termillä " + search_terms)
Exemplo n.º 8
0
async def imgur(*search_terms):
    """ Fetches images from Imgur based on given arguments.
        Support single and multiple arguments"
    """
    client = ImgurClient(imgur_client_id, imgur_client_secret)

    search_terms = " ".join(search_terms)
    images = client.gallery_search(search_terms)
    if images:
        image = random.choice(images)
        if image.is_album == True:
            await ctx.send(client.get_image(image.cover).link)
        else:
            await client.send(image.link)
    else:
        await ctx.send("Couldnt find the picture! " + search_terms)
Exemplo n.º 9
0
class ImgurExtractor(Extractor):
    def __init__(self, url, user, post_title, subreddit, creation_date,
                 save_path, subreddit_save_method, imgur_client,
                 name_downloads_by):
        """
        A subclass of the Extractor class.  This class interacts exclusively with the imgur website through the imgur
        api via ImgurPython

        :param imgur_client: A tuple of the client id and client secret provided by imgur to access their api.  This
        tuple is supplied to imgurpython to establish an imgur client
        """
        super().__init__(url, user, post_title, subreddit, creation_date,
                         save_path, subreddit_save_method, name_downloads_by)
        try:
            self.client = ImgurClient(imgur_client[0], imgur_client[1])
        except ImgurClientError as e:
            if e.status_code == 500:
                self.over_capacity_error()

    def extract_content(self):
        """Dictates what type of page container a link is and then dictates which extraction method should be used"""
        try:
            if 'i.imgur' in self.url:
                self.extract_direct_link()

            elif "/a/" in self.url:
                self.extract_album()
            elif '/gallery/' in self.url:
                try:
                    self.extract_album()
                except:
                    pass
            elif self.url.lower().endswith(
                ('.jpg', 'jpeg', '.png', '.gif', '.gifv', '.mp4', '.webm')):
                self.extract_direct_mislinked()
            else:
                self.extract_single()
        except ImgurClientError as e:
            if e.status_code == 403:
                if self.client.credits['ClientRemaining'] is None:
                    self.failed_to_locate_error()
                elif self.client.credits['ClientRemaining'] <= 0:
                    self.no_credit_error()
                else:
                    self.failed_to_locate_error()
            if e.status_code == 429:
                self.rate_limit_exceeded_error()
            if e.status_code == 500:
                self.over_capacity_error()
            if e.status_code == 404:
                self.does_not_exist_error()
        except ImgurClientRateLimitError:
            self.rate_limit_exceeded_error()
        except:
            self.failed_to_locate_error()

    def rate_limit_exceeded_error(self):
        x = Post(self.url, self.user, self.post_title, self.subreddit,
                 self.creation_date)
        self.failed_extracts_to_save.append(x)
        self.failed_extract_messages.append(
            '\nFailed: Imgur rate limit exceeded.  This post has been saved and will be downloaded '
            'the next time the application is run.  Please make sure you have adequate user '
            'credits upon the next run.  User credits can be checked in the help menu\n'
            'Title: %s,  User: %s,  Subreddit: %s' %
            (self.post_title, self.user, self.subreddit))

    def no_credit_error(self):
        x = Post(self.url, self.user, self.post_title, self.subreddit,
                 self.creation_date)
        self.failed_extracts_to_save.append(x)
        self.failed_extract_messages.append(
            '\nFailed: You do not have enough imgur credits left to extract this '
            'content.  This post will be saved and extraction attempted '
            'the next time the program is run.  Please make sure that you '
            'have adequate credits upon next run.\nTitle: %s,  User: %s,  '
            'Subreddit: %s' % (self.post_title, self.user, self.subreddit))

    def over_capacity_error(self):
        x = Post(self.url, self.user, self.post_title, self.subreddit,
                 self.creation_date)
        self.failed_extracts_to_save.append(x)
        self.failed_extract_messages.append(
            '\nFailed: Imgur is currently over capacity.  This post has been saved and '
            'extraction will be attempted the next time the program is run.\nTitle: '
            '%s, User: %s,  Subreddit: %s' %
            (self.post_title, self.user, self.subreddit))

    def does_not_exist_error(self):
        self.failed_extract_messages.append(
            '\nFailed: The content does not exist.  This most likely means that the '
            'image has been deleted on Imgur, but the post still remains on reddit\n'
            'Url: %s,  User: %s,  Subreddit: %s,  Title: %s' %
            (self.url, self.user, self.subreddit, self.post_title))

    def failed_to_locate_error(self):
        self.failed_extract_messages.append(
            '\nFailed to locate the content at %s\nUser: %s  Subreddit: %s  Title: %s'
            '\n' % (self.url, self.user, self.subreddit, self.post_title))

    def extract_direct_link(self):
        for ext in ['.jpg', '.jpeg', '.png', '.gif', '.gifv', '.mp4', '.webm']:
            if ext in self.url:
                index = self.url.find(ext)
                url = '%s%s' % (self.url[:index], ext)

        domain, id_with_ext = url.rsplit('/', 1)
        image_id, extension = id_with_ext.rsplit('.', 1)
        file_name = self.post_title if self.name_downloads_by == 'Post Title' else image_id
        if url.endswith('gifv') or url.endswith('gif'):
            picture = self.client.get_image(image_id)
            if picture.type == 'image/gif' and picture.animated:
                url = picture.mp4
                extension = 'mp4'
        x = Content(url, self.user, self.post_title, self.subreddit, file_name,
                    "", '.' + extension, self.save_path,
                    self.subreddit_save_method)
        self.extracted_content.append(x)

    def extract_album(self):
        count = 1
        domain, album_id = self.url.rsplit('/', 1)
        for pic in self.client.get_album_images(album_id):
            url = pic.link
            address, extension = url.rsplit('.', 1)
            file_name = self.post_title if self.name_downloads_by == 'Post Title' else album_id
            if pic.type == 'image/gif' and pic.animated:
                extension = 'mp4'
                url = pic.mp4
            x = Content(url, self.user, self.post_title, self.subreddit,
                        file_name + " ", count, '.' + extension,
                        self.save_path, self.subreddit_save_method)
            count += 1
            self.extracted_content.append(x)

    def extract_single(self):
        domain, image_id = self.url.rsplit('/', 1)
        pic = self.client.get_image(image_id)
        url = pic.link
        address, extension = url.rsplit('.', 1)
        file_name = self.post_title if self.name_downloads_by == 'Post Title' else image_id
        if pic.type == 'image/gif' and pic.animated:
            extension = 'mp4'
            url = pic.mp4
        x = Content(url, self.user, self.post_title, self.subreddit, file_name,
                    "", '.' + extension, self.save_path,
                    self.subreddit_save_method)
        self.extracted_content.append(x)

    def extract_direct_mislinked(self):
        """
        All direct links to imgur.com must start with 'https://i.imgur.  Sometimes links get mis labeled somehow when
        they are posted.  This method is to add the correct address beginning to mislinked imgur urls and get a proper
        extraction
        """
        for ext in ['.jpg', '.jpeg', '.png', '.gif', '.gifv', '.mp4', '.webm']:
            if ext in self.url:
                index = self.url.find(ext)
                url = '%s%s' % (self.url[:index], ext)

        domain, id_with_ext = url.rsplit('/', 1)
        domain = 'https://i.imgur.com/'
        url = '%s%s' % (domain, id_with_ext)
        image_id, extension = id_with_ext.rsplit('.', 1)
        file_name = self.post_title if self.name_downloads_by == 'Post Title' else image_id
        if url.endswith('gifv') or url.endswith('gif'):
            picture = self.client.get_image(image_id)
            if picture.type == 'image/gif' and picture.animated:
                url = picture.mp4
                extension = 'mp4'
        x = Content(url, self.user, self.post_title, self.subreddit, file_name,
                    "", '.' + extension, self.save_path,
                    self.subreddit_save_method)
        self.extracted_content.append(x)
Exemplo n.º 10
0
                # this is a lie, but eh so what
                path += '.mp4'
                if os.path.exists(path): 
                    continue
                print("   \_{}".format(url_to_get))

            elif parts.netloc in ['imgur.com','i.imgur.com']:
                noext = os.path.splitext(parts.path)[0]
                pieces = noext.strip('/').split('/')
                try:
                    if pieces[0] == 'a':
                        for x in imgur.get_album_images(pieces[1]):
                            url_to_get = x.link

                    else:
                        obj = imgur.get_image(pieces[0])
                        url_to_get = obj.link

                except:
                    print("   \_ Unable to get {}".format(entry.url))
                    ignore.add(path)
                    continue

                hasext = os.path.splitext(path)
                if not hasext[1]:
                    ext = os.path.splitext(url_to_get)[1]
                    path += ext

                print("   \_{}".format(url_to_get))

            elif parts.netloc == 'gfycat.com':
Exemplo n.º 11
0
def get_media(img_url, post_id):
    if any(s in img_url for s in ('i.redd.it', 'i.reddituploads.com')):
        file_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
        file_extension = os.path.splitext(img_url)[-1].lower()
        # Fix for issue with i.reddituploads.com links not having a file extension in the URL
        if not file_extension:
            file_extension += '.jpg'
            file_name += '.jpg'
            img_url += '.jpg'
        # Grab the GIF versions of .GIFV links
        # When Tweepy adds support for video uploads, we can use grab the MP4 versions
        if (file_extension == '.gifv'):
            file_extension = file_extension.replace('.gifv', '.gif')
            file_name = file_name.replace('.gifv', '.gif')
            img_url = img_url.replace('.gifv', '.gif')
        # Download the file
        file_path = IMAGE_DIR + '/' + file_name
        print('[ OK ] Downloading file at URL ' + img_url + ' to ' +
              file_path + ', file type identified as ' + file_extension)
        img = save_file(img_url, file_path)
        return img
    elif ('imgur.com' in img_url):  # Imgur
        try:
            client = ImgurClient(IMGUR_CLIENT, IMGUR_CLIENT_SECRET)
        except BaseException as e:
            print('[EROR] Error while authenticating with Imgur:', str(e))
            return
        # Working demo of regex: https://regex101.com/r/G29uGl/2
        regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)"
        m = re.search(regex, img_url, flags=0)
        if m:
            # Get the Imgur image/gallery ID
            id = m.group(1)
            if any(s in img_url
                   for s in ('/a/', '/gallery/')):  # Gallery links
                images = client.get_album_images(id)
                # Only the first image in a gallery is used
                imgur_url = images[0].link
            else:  # Single image
                imgur_url = client.get_image(id).link
            # If the URL is a GIFV link, change it to a GIF
            file_extension = os.path.splitext(imgur_url)[-1].lower()
            if (file_extension == '.gifv'):
                file_extension = file_extension.replace('.gifv', '.gif')
                img_url = imgur_url.replace('.gifv', '.gif')
            # Download the image
            file_path = IMAGE_DIR + '/' + id + file_extension
            print('[ OK ] Downloading Imgur image at URL ' + imgur_url +
                  ' to ' + file_path)
            imgur_file = save_file(imgur_url, file_path)
            # Imgur will sometimes return a single-frame thumbnail instead of a GIF, so we need to check for this
            if (file_extension == '.gif'):
                # Open the file using the Pillow library
                img = Image.open(imgur_file)
                # Get the MIME type
                mime = Image.MIME[img.format]
                if (mime == 'image/gif'):
                    # Image is indeed a GIF, so it can be posted
                    img.close()
                    return imgur_file
                else:
                    # Image is not actually a GIF, so don't post it
                    print(
                        '[EROR] Imgur has not processed a GIF version of this link, so it can not be posted'
                    )
                    img.close()
                    # Delete the image
                    try:
                        os.remove(imgur_file)
                    except BaseException as e:
                        print('[EROR] Error while deleting media file:',
                              str(e))
                    return
            else:
                return imgur_file
        else:
            print(
                '[EROR] Could not identify Imgur image/gallery ID in this URL:',
                img_url)
            return
    elif ('gfycat.com' in img_url):  # Gfycat
        gfycat_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
        client = GfycatClient()
        gfycat_info = client.query_gfy(gfycat_name)
        # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs
        gfycat_url = gfycat_info['gfyItem']['max2mbGif']
        file_path = IMAGE_DIR + '/' + gfycat_name + '.gif'
        print('[ OK ] Downloading Gfycat at URL ' + gfycat_url + ' to ' +
              file_path)
        gfycat_file = save_file(gfycat_url, file_path)
        return gfycat_file
    elif ('giphy.com' in img_url):  # Giphy
        # Working demo of regex: https://regex101.com/r/o8m1kA/2
        regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)"
        m = re.search(regex, img_url, flags=0)
        if m:
            # Get the Giphy ID
            id = m.group(3)
            # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs
            giphy_url = 'https://media.giphy.com/media/' + id + '/giphy-downsized.gif'
            file_path = IMAGE_DIR + '/' + id + '-downsized.gif'
            print('[ OK ] Downloading Giphy at URL ' + giphy_url + ' to ' +
                  file_path)
            giphy_file = save_file(giphy_url, file_path)
            return giphy_file
        else:
            print('[EROR] Could not identify Giphy ID in this URL:', img_url)
            return
    else:
        print('[WARN] Post', post_id, 'doesn\'t point to an image/GIF:',
              img_url)
        return
Exemplo n.º 12
0
class ImgurWrapper:
    def __init__(self, client_id, client_secret):
        self.__client = ImgurClient(client_id, client_secret)

    @staticmethod
    def is_imgur(url):
        """
        Simple check to see if url is an imgur url
        :param url: ParsedUrl of the
        :return: boolean if it is a imagr or not
        """
        return "imgur.com" in url.hostname

    @staticmethod
    def is_album(url):
        if "/a/" not in url.path:
            return False
        else:
            return True

    def get_image_list(self, url):
        """
        This call is intended to take the url and return a list of all images associated with it.  It will parse
        the image without extension or parse all images.
        :param url: parsed url object
        :return: list of images
        """
        image_list = []
        if self.is_album(url):
            image_list = self.get_album_images(url)
        else:
            image = self.get_image(url)
            if image is not None:
                image_list.append(image)

        return image_list

    def get_image(self, url):
        """
        Get a single image from a url
        :param url: parsed url
        :return: an image or None if exception raised
        """
        image_id = url.path[url.path.rfind("/") + 1:]
        try:
            image = self.__client.get_image(image_id)
        except ImgurClientError as e:
            logging.error("Status Code: " + e.status_code + " Error: " +
                          e.error_message)
            image = None

        return image

    def get_album_images(self, url):
        """
        Gets all the images in an album as a list of image objects
        :param url: parsed url
        :return: Either a list of images or an empty list
        """
        album_id = url.path[url.path.rfind("/") + 1:]
        image_list = []

        try:
            images = self.__client.get_album_images(album_id)
        except ImgurClientError as e:
            logging.error("Status Code: " + str(e.status_code) + " Error: " +
                          e.error_message)
        else:
            image_list = images

        return image_list
Exemplo n.º 13
0
def get_media(submission, IMGUR_CLIENT, IMGUR_CLIENT_SECRET):
    img_url = submission.url
    # Make sure config file exists
    try:
        config = configparser.ConfigParser()
        config.read('config.ini')
    except BaseException as e:
        print('[EROR] Error while reading config file:', str(e))
        sys.exit()
    # Make sure media folder exists
    IMAGE_DIR = config['MediaSettings']['MediaFolder']
    if not os.path.exists(IMAGE_DIR):
        os.makedirs(IMAGE_DIR)
        print('[ OK ] Media folder not found, created a new one')
    # Download and save the linked image
    if any(s in img_url for s in ('i.redd.it', 'i.reddituploads.com')):  # Reddit-hosted images
        file_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
        file_extension = os.path.splitext(img_url)[-1].lower()
        # Fix for issue with i.reddituploads.com links not having a file extension in the URL
        if not file_extension:
            file_extension += '.jpg'
            file_name += '.jpg'
            img_url += '.jpg'
        # Download the file
        file_path = IMAGE_DIR + '/' + file_name
        print('[ OK ] Downloading file at URL ' + img_url + ' to ' +
              file_path + ', file type identified as ' + file_extension)
        img = save_file(img_url, file_path)
        return [img]
    elif ('v.redd.it' in img_url):  # Reddit video
        fileouts = []
        class ytdlLogger(object):
            def debug(self, msg):
                if msg.startswith(IMAGE_DIR):
                    fileouts.append(msg)
            def warning(self, msg):
                print("[WARN] " + msg)
            def error(self, msg):
                print("[EROR] " + msg)
        ytdl_opts = {
            'outtmpl': IMAGE_DIR + '/%(id)s.%(ext)s',
            'noplaylist': True,
            'forcefilename': True,
            'logger': ytdlLogger()
        }
        print("[ OK ] Downloading video at url " + img_url + " via youtube-dl...")
        with youtube_dl.YoutubeDL(ytdl_opts) as ytdl:
            ytdl.download([img_url])
            print("[ OK ] File downloaded to " + fileouts[0])
            return [fileouts[0]]
    elif ('reddit.com/gallery/' in img_url):  # Reddit galleries (multiple images)
        try:
            galleryitems = submission.gallery_data['items']
            mediadata = submission.media_metadata
        except BaseException as e:
            print('[EROR] Post seems to be a gallery but there was an error trying to get the gallery data:', str(e))
            return
        if len(galleryitems) > 4:
            print('[WARN] Post is a gallery with more than 4 images. Skipping as it is too many for Twitter.')
            return
        img_url_list = []
        for item in galleryitems:
            if mediadata[item['media_id']]['m'] == 'image/jpg':
                img_url_list.append(f"https://i.redd.it/{item['media_id']}.jpg")
            elif mediadata[item['media_id']]['m'] == 'image/png':
                img_url_list.append(f"https://i.redd.it/{item['media_id']}.png")
            elif mediadata[item['media_id']]['m'] == 'image/webp':
                img_url_list.append(f"https://i.redd.it/{item['media_id']}.webp")
            else:
                print('[WARN] An item in the gallery is not a JPG, PNG, or WEBP. Skipping this post as it is likely unable to be posted to Twitter.')
                return
        downloaded_imgs = []
        for url in img_url_list:
            file_name = os.path.basename(urllib.parse.urlsplit(url).path)
            saved = save_file(url, file_name)
            downloaded_imgs.append(saved)
        return downloaded_imgs
    elif ('imgur.com' in img_url):  # Imgur
        try:
            client = ImgurClient(IMGUR_CLIENT, IMGUR_CLIENT_SECRET)
        except BaseException as e:
            print('[EROR] Error while authenticating with Imgur:', str(e))
            return
        # Working demo of regex: https://regex101.com/r/G29uGl/2
        regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)"
        m = re.search(regex, img_url, flags=0)
        if m:
            # Get the Imgur image/gallery ID
            id = m.group(1)
            if any(s in img_url for s in ('/a/', '/gallery/')):  # Gallery links
                images = client.get_album_images(id)
                # Only the first image in a gallery is used
                imgur_url = images[0].link
            else:  # Single image
                imgur_url = client.get_image(id).link
            # If the URL is a GIFV or MP4 link, change it to the GIF version
            file_extension = os.path.splitext(imgur_url)[-1].lower()
            if (file_extension == '.gifv'):
                file_extension = file_extension.replace('.gifv', '.gif')
                imgur_url = imgur_url.replace('.gifv', '.gif')
            elif (file_extension == '.mp4'):
                file_extension = file_extension.replace('.mp4', '.gif')
                imgur_url = imgur_url.replace('.mp4', '.gif')
            # Download the image
            file_path = IMAGE_DIR + '/' + id + file_extension
            print('[ OK ] Downloading Imgur image at URL ' +
                  imgur_url + ' to ' + file_path)
            imgur_file = save_file(imgur_url, file_path)
            # Imgur will sometimes return a single-frame thumbnail instead of a GIF, so we need to check for this
            if (file_extension == '.gif'):
                # Open the file using the Pillow library
                img = Image.open(imgur_file)
                # Get the MIME type
                mime = Image.MIME[img.format]
                if (mime == 'image/gif'):
                    # Image is indeed a GIF, so it can be posted
                    img.close()
                    return [imgur_file]
                else:
                    # Image is not actually a GIF, so don't post it
                    print(
                        '[WARN] Imgur has not processed a GIF version of this link, so it can not be posted to Twitter')
                    img.close()
                    # Delete the image
                    try:
                        os.remove(imgur_file)
                    except BaseException as e:
                        print('[EROR] Error while deleting media file:', str(e))
                    return
            else:
                return [imgur_file]
        else:
            print(
                '[EROR] Could not identify Imgur image/gallery ID in this URL:', img_url)
            return
    elif ('gfycat.com' in img_url):  # Gfycat
        try:
            gfycat_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
            client = GfycatClient()
            gfycat_info = client.query_gfy(gfycat_name)
        except BaseException as e:
            print('[EROR] Error downloading Gfycat link:', str(e))
            return
        # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs
        gfycat_url = gfycat_info['gfyItem']['max2mbGif']
        file_path = IMAGE_DIR + '/' + gfycat_name + '.gif'
        print('[ OK ] Downloading Gfycat at URL ' +
              gfycat_url + ' to ' + file_path)
        gfycat_file = save_file(gfycat_url, file_path)
        return [gfycat_file]
    elif ('giphy.com' in img_url):  # Giphy
        # Working demo of regex: https://regex101.com/r/o8m1kA/2
        regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)"
        m = re.search(regex, img_url, flags=0)
        if m:
            # Get the Giphy ID
            id = m.group(3)
            # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs
            giphy_url = 'https://media.giphy.com/media/' + id + '/giphy-downsized.gif'
            file_path = IMAGE_DIR + '/' + id + '-downsized.gif'
            print('[ OK ] Downloading Giphy at URL ' +
                  giphy_url + ' to ' + file_path)
            giphy_file = save_file(giphy_url, file_path)
            # Check the hash to make sure it's not a GIF saying "This content is not available"
            # More info: https://github.com/corbindavenport/tootbot/issues/8
            hash = hashlib.md5(file_as_bytes(
                open(giphy_file, 'rb'))).hexdigest()
            if (hash == '59a41d58693283c72d9da8ae0561e4e5'):
                print(
                    '[WARN] Giphy has not processed a 2MB GIF version of this link, so it can not be posted to Twitter')
                return
            else:
                return [giphy_file]
        else:
            print('[EROR] Could not identify Giphy ID in this URL:', img_url)
            return
    else:
        # Check if URL is an image, based on the MIME type
        image_formats = ('image/png', 'image/jpeg', 'image/gif', 'image/webp')
        img_site = urlopen(img_url)
        meta = img_site.info()
        if meta["content-type"] in image_formats:
            # URL appears to be an image, so download it
            file_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
            file_path = IMAGE_DIR + '/' + file_name
            print('[ OK ] Downloading file at URL ' +
                  img_url + ' to ' + file_path)
            try:
                img = save_file(img_url, file_path)
                return [img]
            except BaseException as e:
                print('[EROR] Error while downloading image:', str(e))
                return
        else:
            print('[EROR] URL does not point to a valid image file')
            return
Exemplo n.º 14
0
class imgur(commands.Cog):
    def __init__(self, bot):
        self.bot = bot
        self.clientID = bot.config.get('imgur_client_id')
        self.secretID = bot.config.get('imgur_client_secret')
        self.imgur_client = ImgurClient(self.clientID, self.secretID)

    @is_admin()
    @commands.command(aliases=['addalbum', 'aa'])
    async def album(self, ctx, link: str = None, *, album_name: str = None):
        """addalbum [album link] [album name] - Adds an album, link, and name.
        ex; .addalbum https://imgur.com/gallery/MnIjj3n a phone
        and 'pickone a phone' would call this album.
        """
        if not link or not album_name:
            await ctx.send(
                'Please include a link to the album and a name for the album.')
            return

        possible_links = [
            'https://imgur.com/gallery/', 'https://imgur.com/a/'
        ]  #leaving this for additions later
        if not any(x in link for x in possible_links):
            await ctx.send('That doesnt look like a valid link.')

        else:
            album_name = album_name.lower()
            fetch_albums = await self.bot.fetch.all(
                f"SELECT * FROM Albums WHERE GuildID=?", (ctx.guild.id, ))
            fetch_album_names = list([album[2] for album in fetch_albums
                                      ]) if fetch_albums else []
            if album_name not in fetch_album_names:
                await self.bot.db.execute(
                    f"INSERT INTO Albums(GuildID, AlbumName, AlbumLink) VALUES (?, ?, ?)",
                    (
                        ctx.guild.id,
                        album_name,
                        link,
                    ))
                await self.bot.db.commit()
                await ctx.send(f'"{album_name}" has been added!')
            else:
                await ctx.send(f'"{album_name}" already exists')

    @is_admin()
    @commands.command(aliases=['delalbum', 'remalbum', 'da', 'ra'])
    async def deletealbum(self, ctx, *, album_name: str = None):
        """
        deletealbum [album name] - Deletes an album, name.
        ex; .deletealbum a phone
        """
        if not album_name:
            await ctx.send('Please provide an album name.')

        if album_name:
            album_name = album_name.lower()
            fetch_album = await self.bot.fetch.one(
                f"SELECT * FROM Albums WHERE GuildID=? AND AlbumName=?",
                (ctx.guild.id, album_name))
            if fetch_album:
                await self.bot.db.execute(
                    f"DELETE FROM Albums WHERE GuildID=? And AlbumName=?", (
                        ctx.guild.id,
                        album_name,
                    ))
                await self.bot.db.commit()
                await ctx.send(f'Removed album "{album_name}"')

            else:
                await ctx.send(
                    f'Couldn\'t find an album the name of "{album_name}"')

    @commands.command(aliases=['p1', 'po', 'pick'])
    async def pickone(self, ctx, *, album_name: str = None):
        """
        pickone (Optional album name) - picks a random image from the album.
        ex; .pickone a phone
        If only one album exists you do not provide an album name.
        """
        grab_content_title_config = await self.bot.fetch.one(
            f"SELECT Content, Title FROM GuildConfig WHERE ID=?",
            (ctx.guild.id, ))
        content = grab_content_title_config[0]
        title = grab_content_title_config[1]
        if content is None and title is None:
            content = 'You asked me to pick a picture...'
            title = 'I Chose...'

        if album_name:
            album_name = album_name.lower()
            fetch_album = await self.bot.fetch.one(
                f"SELECT * FROM Albums WHERE GuildID=? AND AlbumName=?", (
                    ctx.guild.id,
                    album_name,
                ))
            if not fetch_album:
                return await ctx.send("Couldnt find an album by that name")

            if len(fetch_album) == 0:
                return await ctx.send(
                    'You should probably add an album first..')
            imgur_link = fetch_album[3]

        if not album_name:
            fetch_albums = await self.bot.fetch.all(
                f"SELECT AlbumName, AlbumLink FROM Albums WHERE GuildID=?",
                (ctx.guild.id, ))
            if not fetch_albums:
                return await ctx.send("Might want to add an album first!")

            if len(fetch_albums) >= 2:
                return await ctx.send(
                    'Seems you forgot to provide an album name!')
            imgur_link = fetch_albums[0][1]

        try:
            await ctx.message.add_reaction(
                discord.utils.get(self.bot.emojis, name='check'))
        except:
            pass

        try:
            tail = imgur_link.split('/')[4]
            the_list = list(
                item.link for item in self.imgur_client.get_album_images(tail))
            item = random.choice(the_list)
            item_id = item.split('/')[3][0:-4]
            if title in ['album title', 'Album Title']:
                title = self.imgur_client.get_album(tail).title
            if content in ['description', 'Description']:
                content = self.imgur_client.get_image(item_id).description
            if (self.imgur_client.get_image(item_id).size * 1e-6) > 8.0:
                return await ctx.send(
                    f"{self.imgur_client.get_image(item_id).link} was too big to send."
                )
            get_stream_status = await self.bot.fetch.one(
                f"SELECT Stream FROM GuildConfig WHERE ID=?", (ctx.guild.id, ))
            stream = get_stream_status[0]
            async with self.bot.aiohttp.get(item) as resp:
                link = await resp.read()
                if item.endswith('.gif'):
                    f = discord.File(io.BytesIO(link), filename="image.gif")
                    e = discord.Embed(
                        title=title,
                        colour=discord.Colour(0x278d89),
                    )
                    if stream:
                        e.set_image(url=f'''attachment://image.gif''')
                    else:
                        e.set_image(
                            url=f'{self.imgur_client.get_image(item_id).link}')
                else:
                    f = discord.File(io.BytesIO(link), filename="image.png")
                    e = discord.Embed(
                        title=title,
                        colour=discord.Colour(0x278d89),
                    )
                    if stream:
                        e.set_image(url=f'''attachment://image.png''')
                    else:
                        e.set_image(
                            url=f'{self.imgur_client.get_image(item_id).link}')

                e.set_footer(
                    text=
                    f'storage is currently: {"link" if not stream else "stream"} \n'
                    f'if images aren\'t showing up, try toggling this with .stream'
                )
                if stream:
                    await ctx.send(file=f, embed=e, content=content)

                if not stream:
                    await ctx.send(embed=e, content=content)

        except Exception as e:
            print(
                f'{e}, tail: {tail if tail else None} link: {imgur_link}, item: {item if item else None}'
            )
            if isinstance(e, ImgurClientError):
                print(f'{e.error_message}')
                return await ctx.send(f'{e.error_message}')
            elif not isinstance(e, ImgurClientError):
                return await ctx.send(
                    f'There was an issue processing this command.\nDebug: `{e}`'
                )

    @commands.command(aliases=['al', 'list'])
    async def albumlist(self, ctx):
        """albumlist - displays all currently added albums by name.

        """
        fetch_albums = await self.bot.fetch.all(
            f"SELECT * FROM Albums WHERE GuildID=?", (ctx.guild.id, ))
        if fetch_albums:
            list_album_names = ", ".join(
                list([album[2] for album in fetch_albums]))
            await ctx.send(f"{list_album_names}")
        else:
            await ctx.send('It doesnt seem that you have added an ablum.')

    @is_admin()
    @commands.command(aliases=['adda', 'admin'])
    async def addadmin(self, ctx, member: discord.Member = None):
        """addadmin [user name] - Adds an admin
        ex; .addadmin @ProbsJustin#0001
        You can attempt to use just a string name; eg ProbsJustin but recommend a mention.
        """
        if not member:
            await ctx.send('You should probably include a member.')
            return

        else:
            check_if_pwr_user = await self.bot.fetch.one(
                f"SELECT * FROM Permissions WHERE MemberID=? AND GuildID=?", (
                    ctx.author.id,
                    ctx.guild.id,
                ))
            if not check_if_pwr_user:
                await self.bot.db.execute(
                    f"INSERT INTO Permissions(MemberID, GuildID) VALUES (?, ?)",
                    (
                        ctx.author.id,
                        ctx.guild.id,
                    ))
                await self.bot.db.commit()
                await ctx.send(f'{member.mention} has been added as an admin.')
            else:
                await ctx.send('That user is already an admin!')

    @is_admin()
    @commands.command(aliases=['remadmin', 'deladmin', 'deleteadmin'])
    async def removeadmin(self, ctx, member: discord.Member = None):
        """removeadmin [user name] - Remove an admin
        ex; .removeadmin @ProbsJustin#0001
        You can attempt to use just a string name; eg ProbsJustin but recommend a mention.
        """
        if not member:
            await ctx.send('You should probably include a member.')
            return
        else:
            chck_if_usr_is_admin = await self.bot.fetch.one(
                f"SELECT * FROM Permissions WHERE MemberID=? AND GuildID=?", (
                    ctx.author.id,
                    ctx.guild.id,
                ))
            if chck_if_usr_is_admin:
                await self.bot.db.execute(
                    f"DELETE FROM Permissions WHERE MemberID=? AND GuildID=?",
                    (
                        ctx.author.id,
                        ctx.guild.id,
                    ))
                await self.bot.db.commit()
                await ctx.send(
                    f'{member.mention} has been removed as an admin.')
            else:
                await ctx.send('I couldnt find that user in the admin list.')

    @addadmin.error
    @removeadmin.error
    async def member_not_found_error(self, ctx,
                                     exception):  #so this is a thing.
        if not isinstance(exception, NotAuthorized):
            await ctx.send('Member not found! Try mentioning them instead.')

    @is_admin()
    @commands.command()
    async def set(self, ctx, content_title: str = None, *, message: str = ''):
        """set [content/title] [name] - Change the title/content from "I Chose..." "you asked.." """
        editable_args = ['content', 'title']
        if not content_title:
            await ctx.send(
                f"Please provide either {' or '.join(editable_args)}.")
            return
        content_title = content_title.lower()
        if content_title in editable_args:
            if content_title == "title":
                await self.bot.db.execute(
                    f"UPDATE GuildConfig SET Title=? WHERE ID=?", (
                        message,
                        ctx.guild.id,
                    ))
            if content_title == 'content':
                await self.bot.db.execute(
                    f"UPDATE GuildConfig SET Content=? WHERE ID=?", (
                        message,
                        ctx.guild.id,
                    ))

            await self.bot.db.commit()
            await ctx.send(f'{content_title.lower()} updated.')

        else:
            await ctx.send("Invalid parameters.")

    @is_admin()
    @commands.command()
    async def stream(self, ctx):
        """
        Toggles how the images are sent to discord, if images aren't showing up try toggling this.
        """
        get_stream_status = await self.bot.fetch.one(
            f"SELECT Stream FROM GuildConfig WHERE ID=?", (ctx.guild.id, ))
        update_stream_status = await self.bot.db.execute(
            f"UPDATE GuildConfig SET Stream=? WHERE ID=?",
            (not get_stream_status[0], ctx.guild.id))
        await self.bot.db.commit()
        await ctx.send(
            f"Streaming turned {'on' if not get_stream_status[0] else 'off'}")
Exemplo n.º 15
0
class imgur(commands.Cog):
    def __init__(self, bot):
        self.bot = bot
        self.clientID = bot.config.data.get('config').get('imgur_client_id')
        self.secretID = bot.config.data.get('config').get(
            'imgur_client_secret')
        self.imgur_client = ImgurClient(self.clientID, self.secretID)

    async def fetch_one(self, arg):
        get = await self.bot.db.execute(arg)
        results = await get.fetchone()
        return results

    async def fetch_all(self, arg):
        get = await self.bot.db.execute(arg)
        results = await get.fetchall()
        return results

    @is_admin()
    @commands.command(aliases=['addalbum', 'aa'])
    async def album(self, ctx, link: str = None, *, album_name: str = None):
        """addalbum [album link] [album name] - Adds an album, link, and name.
        ex; .addalbum https://imgur.com/gallery/MnIjj3n a phone
        and 'pickone a phone' would call this album.
        """
        if not link or not album_name:
            await ctx.send(
                'Please include a link to the album and a name for the album.')
            return

        possible_links = [
            'https://imgur.com/gallery/', 'https://imgur.com/a/'
        ]  #leaving this for additions later
        if not any(x in link for x in possible_links):
            await ctx.send('That doesnt look like a valid link.')

        else:
            album_name = album_name.lower()
            get_albums = await imgur.fetch_all(
                self,
                f'SELECT AlbumLink FROM GuildAlbums WHERE GuildID={ctx.guild.id}'
            )
            if link not in list(albumlink[0] for albumlink in get_albums):
                await self.bot.db.execute(
                    f"INSERT INTO GuildAlbums(GuildID, AlbumLink, AlbumName) "
                    f"VALUES (?, ?, ?)", (ctx.guild.id, link, album_name))
                await self.bot.db.commit()
                await ctx.send(f'"{album_name}" has been added!')
            else:
                albums_name = await imgur.fetch_one(
                    self,
                    f'SELECT AlbumName FROM GuildAlbums WHERE AlbumLink="{link}"'
                )
                await ctx.send(f'{link} already exists as {albums_name[0]}.')

    @is_admin()
    @commands.command(aliases=['delalbum', 'remalbum', 'da', 'ra'])
    async def deletealbum(self, ctx, *, album_name: str = None):
        """deletealbum [album name] - Deletes an album, name.
        ex; .deletealbum a phone
        """
        if not album_name:
            await ctx.send('Please provide an album name.')
        get_albums = await imgur.fetch_all(
            self,
            f'SELECT AlbumName FROM GuildAlbums WHERE GuildID={ctx.guild.id}')
        if album_name.lower() in list(albumnames[0]
                                      for albumnames in get_albums):
            await self.bot.db.execute(
                f'DELETE FROM GuildAlbums WHERE GuildID=? and AlbumName=?',
                (ctx.guild.id, album_name.lower()))
            await self.bot.db.commit()
            await ctx.send(f'Removed album "{album_name}"')

        else:
            await ctx.send(f'Couldnt find an album the name of "{album_name}"')

    @commands.command(aliases=['p1', 'po', 'pick'])
    async def pickone(self, ctx, *, album_name: str = None):
        """pickone (Optional album name) - picks a random image from the album.
        ex; .pickone a phone
        If only one album exists you do not provide an album name.
        """
        album_names = await imgur.fetch_all(
            self,
            f'SELECT AlbumName FROM GuildAlbums WHERE GuildID={ctx.guild.id}')
        if not album_names:
            await ctx.send('You should probably add an album first..')
            return

        content = await imgur.fetch_one(
            self, f'SELECT Content FROM GuildConfig WHERE ID={ctx.guild.id}')
        title = await imgur.fetch_one(
            self, f'SELECT Title FROM GuildConfig WHERE ID={ctx.guild.id}')
        await ctx.message.add_reaction(
            discord.utils.get(self.bot.emojis, name='check'))
        content = 'You asked me to pick a picture...' if not content[
            0] else content[0]
        title = 'I Chose...' if not title[0] else title[0]
        if album_name:
            if album_name.lower() in list(albumnames[0]
                                          for albumnames in album_names):
                album_link = await imgur.fetch_one(
                    self, f'SELECT AlbumLink FROM GuildAlbums WHERE '
                    f'AlbumName="{album_name.lower()}" and GuildID={ctx.guild.id}'
                )
                tail = album_link[0].split('/')[4]
                the_list = list(
                    item.link
                    for item in self.imgur_client.get_album_images(tail))
            else:
                await ctx.send(
                    f'I couldnt find an album by the name of "{album_name}"')

        if not album_name:
            if len(album_names) >= 2:
                await ctx.send('Seems you forgot to provide an album name!')
                return
            if len(album_names) == 1:
                album_link = await imgur.fetch_one(
                    self, f'SELECT AlbumLink FROM GuildAlbums WHERE '
                    f'AlbumName="{album_names[0][0]}" and GuildID={ctx.guild.id}'
                )
                tail = album_link[0].split('/')[4]
                the_list = list(
                    item.link
                    for item in self.imgur_client.get_album_images(tail))
        try:
            item = random.choice(the_list)
            item_id = item.split('/')[3][0:-4]
            if title in ['album title', 'Album Title']:
                title = self.imgur_client.get_album(tail).title
            if content in ['description', 'Description']:
                content = self.imgur_client.get_image(item_id).description
            async with self.bot.aiohttp.get(item) as resp:
                link = await resp.read()
                if item.endswith('.gif'):
                    f = discord.File(io.BytesIO(link), filename="image.gif")
                    e = discord.Embed(title=title,
                                      colour=discord.Colour(0x278d89))
                    e.set_image(url=f'''attachment://image.gif''')
                else:
                    f = discord.File(io.BytesIO(link), filename="image.png")
                    e = discord.Embed(title=title,
                                      colour=discord.Colour(0x278d89))
                    e.set_image(url=f'''attachment://image.png''')
                await ctx.send(file=f, embed=e, content=content)

        except Exception as e:
            if isinstance(e, ImgurClientError):
                print(f'{e.error_message}')
                await ctx.send(f'{e.error_message}')
            elif not isinstance(e, ImgurClientError):
                await ctx.send(
                    f'There was an issue processing this command. {e}')

    @commands.command(aliases=['al', 'list'])
    async def albumlist(self, ctx):
        """albumlist - displays all currently added albums by name.
        """
        album_names = await imgur.fetch_all(
            self,
            f'SELECT AlbumName FROM GuildAlbums WHERE GuildID={ctx.guild.id}')
        if len(album_names) is not 0:
            await ctx.send(
                f"The list of albums I see are: {', '.join(list(an[0] for an in album_names))}."
            )
        else:
            await ctx.send('It doesnt seem that you have added an ablum.')

    @is_admin()
    @commands.command(aliases=['adda', 'admin'])
    async def addadmin(self, ctx, member: discord.Member = None):
        """addadmin [user name] - Adds an admin
        ex; .addadmin @ProbsJustin#0001
        You can attempt to use just a string name; eg ProbsJustin but recommend a mention.
        """
        if not member:
            await ctx.send('You should probably include a member.')
            return
        else:
            get_admins = await imgur.fetch_all(
                self,
                f'SELECT AdminID FROM GuildAdmins WHERE GuildID={ctx.guild.id}'
            )
            if member.id not in list(admin[0] for admin in get_admins):
                await self.bot.db.execute(
                    f"INSERT INTO GuildAdmins(GuildID, AdminID) VALUES (?, ?)",
                    (ctx.guild.id, member.id))
                await self.bot.db.commit()
                await ctx.send(f'{member.mention} has been added as an admin.')
            else:
                await ctx.send('That user is already an admin!')

    @is_admin()
    @commands.command(aliases=['remadmin', 'deladmin', 'deleteadmin'])
    async def removeadmin(self, ctx, member: discord.Member = None):
        """removeadmin [user name] - Remove an admin
        ex; .removeadmin @ProbsJustin#0001
        You can attempt to use just a string name; eg ProbsJustin but recommend a mention.
        """
        if not member:
            await ctx.send('You should probably include a member.')
            return
        else:
            get_admins = await imgur.fetch_all(
                self,
                f'SELECT AdminID FROM GuildAdmins WHERE GuildID={ctx.guild.id}'
            )
            if member.id in list(admin[0] for admin in get_admins):
                await self.bot.db.execute(
                    f'DELETE FROM GuildAdmins WHERE GuildID=? and AdminID=?',
                    (ctx.guild.id, member.id))
                await self.bot.db.commit()
                await ctx.send(
                    f'{member.mention} has been removed as an admin.')
            else:
                await ctx.send('I couldnt find that user in the admin list.')

    @addadmin.error
    @removeadmin.error
    async def member_not_found_error(self, ctx,
                                     exception):  #so this is a thing.
        if not isinstance(exception, NotAuthorized):
            await ctx.send('Member not found! Try mentioning them instead.')

    @is_admin()
    @commands.command()
    async def set(self, ctx, content_title: str = None, *, message: str = ''):
        """set [content/title] [name] - Change the title/content from "I Chose..." "you asked.." """
        editable_args = ['content', 'title']
        if not content_title:
            await ctx.send(
                f"Please provide either {' or '.join(editable_args)}.")
            return

        if content_title.lower() in editable_args:
            await self.bot.db.execute(
                f'UPDATE GuildConfig SET {content_title.title()}="{message}" '
                f'WHERE ID={ctx.guild.id}')
            await self.bot.db.commit()
            await ctx.send(f'{content_title.lower()} updated.')

        else:
            await ctx.send("Invalid parameters.")
Exemplo n.º 16
0
     if not os.path.isfile(toDownload[i].id + '.mp4'):
         try:
             ydl_opts['outtmpl'] = toDownload[i].id + '.mp4'
             with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                 ydl.download([toDownload[i].url])
         except Exception as e:
             Unable.write(
                 str(e) + ' ' + toDownload[i].url +
                 ' http://www.reddit.com/' + toDownload[i].id +
                 '\n')
     os.chdir(base)
     print('imgur gifv: ' + toDownload[i].id)
 else:
     imgurCode = (urlparse(
         toDownload[i].url).path).split('/')[-1].split('.')[0]
     image = client.get_image(imgurCode)
     if not (imgCheck):
         ext = (".mp4" if hasattr(image, "mp4") else "." +
                (urlparse(image.link).path).split('.')[-1])
         toDownload[
             i].url = "https://i.imgur.com/" + imgurCode + ext
         imgur_path = mypath + '/' + toDownload[i].id + '.' + (
             urlparse(toDownload[i].url).path).split('.')[-1]
         download(toDownload[i].url, imgur_path)
         print('Basic imgur file, missing extension: ' +
               toDownload[i].id)
     if (imgCheck):
         imgur_path = mypath + '/' + ''.join(
             filename(toDownload[i].id)) + '.' + (urlparse(
                 image.link).path).split('.')[-1]
         download(toDownload[i].url, imgur_path)
Exemplo n.º 17
0
def get_url(submission, mp4_instead_gif=True):
    '''
    return TYPE, URL, EXTENSION
    E.x.: return 'img', 'http://example.com/pic.png', 'png'
    '''
    def what_is_inside(url):
        header = requests.head(url).headers
        if 'Content-Type' in header:
            return header['Content-Type']
        else:
            return ''

    url = submission.url
    url_content = what_is_inside(url)

    if (CONTENT_JPEG == url_content or CONTENT_PNG == url_content):
        return TYPE_IMG, url, url_content.split('/')[1]

    if CONTENT_GIF in url_content:
        if url.endswith('.gif') and mp4_instead_gif:
            # Let's try to find .mp4 file.
            url_mp4 = url[:-4] + '.mp4'
            if CONTENT_MP4 == what_is_inside(url_mp4):
                return TYPE_GIF, url_mp4, 'mp4'
        return TYPE_GIF, url, 'gif'

    if url.endswith('.gifv'):
        if mp4_instead_gif:
            url_mp4 = url[:-5] + '.mp4'
            if CONTENT_MP4 == what_is_inside(url_mp4):
                return TYPE_GIF, url_mp4, 'mp4'
        if CONTENT_GIF in what_is_inside(url[0:-1]):
            return TYPE_GIF, url[0:-1], 'gif'

    if submission.is_self is True:
        # Self submission with text
        return 'text', None, None

    if urlparse(url).netloc == 'imgur.com':
        # Imgur
        imgur_config = yaml.load(open('imgur.yml').read())
        imgur_client = ImgurClient(imgur_config['client_id'],
                                   imgur_config['client_secret'])
        path_parts = urlparse(url).path.split('/')
        if path_parts[1] == 'gallery':
            # TODO: gallary handling
            return 'other', url, None
        elif path_parts[1] == 'topic':
            # TODO: topic handling
            return 'other', url, None
        elif path_parts[1] == 'a':
            # An imgur album
            album = imgur_client.get_album(path_parts[2])
            story = {}
            for num, img in enumerate(album.images):
                number = num + 1
                what = TYPE_IMG
                link = img['link']
                ext = img['type'].split('/')[1]
                if img['animated']:
                    what = TYPE_GIF
                    link = img['mp4'] if mp4_instead_gif else img['gifv'][:-1]
                    ext = 'mp4' if mp4_instead_gif else 'gif'
                story[number] = {'url': link, 'what': what, 'ext': ext}
            return 'album', story, None
        else:
            # Just imgur img
            img = imgur_client.get_image(path_parts[1].split('.')[0])
            if not img.animated:
                return TYPE_IMG, img.link, img.type.split('/')[1]
            else:
                if mp4_instead_gif:
                    return TYPE_GIF, img.mp4, 'mp4'
                else:
                    # return 'gif', img.link, 'gif'
                    return TYPE_GIF, img.gifv[:-1], 'gif'
    else:
        return 'other', url, None
Exemplo n.º 18
0
from imgurpython import ImgurClient

import requests
from PIL import Image
import io

client_id = 'c2058ecfc76d75f'
client_secret = '5fe636c3e7a032b56b2120fe82eb3071c790c5ff'

client = ImgurClient(client_id, client_secret)

# Example request from album galleries
#items = client.get_album_images("f0H0u") #This is doggos
items2 = client.get_album_images("XqBdP")  #This is for not doggos

item = client.get_image("nhTyj4d")
webbrowser.open_new(item.link)

with urllib.request.urlopen(item.link) as url:
    f = io.BytesIO(url.read())

img = Image.open(f)

img.show()


def image_grayscale(picture):
    new_list = []
    for p in picture.getdata():
        new_red = int(p[0] * 0.299)
        new_green = int(p[1] * 0.587)
Exemplo n.º 19
0
class Imgur(Plugin):
    CONFIG_DEFAULTS = {
        'client_id': None,
        'client_secret': None,
    }

    CONFIG_ENVVARS = {
        'client_id': ['IMGUR_CLIENT_ID'],
        'client_secret': ['IMGUR_CLIENT_SECRET'],
    }

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.client = ImgurClient(self.config_get('client_id'),
                                  self.config_get('client_secret'))

    @Plugin.integrate_with('linkinfo')
    def integrate_with_linkinfo(self, linkinfo):
        linkinfo.register_handler(lambda url: url.netloc in ('imgur.com', 'i.imgur.com'),
                                  self._linkinfo_handler, exclusive=True)

    def _linkinfo_handler(self, url, match):
        # Split up endpoint and ID: /<image>, /a/<album> or /gallery/<id>
        kind, _, id = url.path.lstrip('/').rpartition('/')
        # Strip file extension from direct image links
        id = id.partition('.')[0]

        try:
            if kind == '':
                nsfw, title = self._format_image(self.client.get_image(id))
            elif kind == 'a':
                nsfw, title = self._format_album(self.client.get_album(id), url.fragment)
            elif kind == 'gallery':
                data = self.client.gallery_item(id)
                if data.is_album:
                    nsfw, title = self._format_album(data, None)
                else:
                    nsfw, title = self._format_image(data)
            else:
                nsfw, title = False, None
        except ImgurClientError as e:
            return LinkInfoResult(url, str(e), is_error=True)

        if title:
            return LinkInfoResult(url, title, nsfw=nsfw)
        else:
            return None

    @staticmethod
    def _format_image(data):
        title = data.title or ''
        return data.nsfw or 'nsfw' in title.lower(), title

    @staticmethod
    def _format_album(data, image_id):
        title = '{0} ({1})'.format(data.title or 'Untitled album',
                                   pluralize(data.images_count, 'image', 'images'))
        images = {i['id']: i for i in data.images}
        image = images.get(image_id)
        if image and image['title']:
            title += ': ' + image['title']
        return data.nsfw or 'nsfw' in title.lower(), title
Exemplo n.º 20
0
class SpiffyTitles(callbacks.Plugin):
    """Displays link titles when posted in a channel"""
    threaded = True
    callBefore = ["Web"]
    link_cache = []
    handlers = {}
    wall_clock_timeout = 8
    max_request_retries = 3
    imgur_client = None
    
    def __init__(self, irc):
        self.__parent = super(SpiffyTitles, self)
        self.__parent.__init__(irc)
            
        self.wall_clock_timeout = self.registryValue("wallClockTimeoutInSeconds")
        self.default_handler_enabled = self.registryValue("defaultHandlerEnabled")
        
        self.add_handlers()
    
    def add_handlers(self):
        """
        Adds all handlers
        """
        self.add_youtube_handlers()
        self.add_imdb_handlers()
        self.add_imgur_handlers()
        self.add_coub_handlers()
        self.add_vimeo_handlers()
        self.add_dailymotion_handlers()
        self.add_wikipedia_handlers()
    
    def add_dailymotion_handlers(self):
        self.handlers["www.dailymotion.com"] = self.handler_dailymotion
    
    def add_vimeo_handlers(self):
        self.handlers["vimeo.com"] = self.handler_vimeo
    
    def add_coub_handlers(self):
        self.handlers["coub.com"] = self.handler_coub
    
    def add_wikipedia_handlers(self):
        self.handlers["en.wikipedia.org"] = self.handler_wikipedia

    def handler_dailymotion(self, url, info, channel):
        """
        Handles dailymotion links
        """
        dailymotion_handler_enabled = self.registryValue("dailymotionHandlerEnabled", channel=channel)
        log.debug("SpiffyTitles: calling dailymotion handler for %s" % url)
        title = None
        video_id = None
        
        """ Get video ID """
        if dailymotion_handler_enabled and "/video/" in info.path:
            video_id = info.path.lstrip("/video/").split("_")[0]
            
            if video_id is not None:
                api_url = "https://api.dailymotion.com/video/%s?fields=id,title,owner.screenname,duration,views_total" % video_id
                log.debug("SpiffyTitles: looking up dailymotion info: %s", api_url)
                agent = self.get_user_agent()
                headers = {
                    "User-Agent": agent
                }
                
                request = requests.get(api_url, headers=headers)
                
                ok = request.status_code == requests.codes.ok
                
                if ok:
                    response = json.loads(request.text)
                    
                    if response is not None and "title" in response:
                        video = response
                        dailymotion_template = Template(self.registryValue("dailymotionVideoTitleTemplate", channel=channel))
                        video["views_total"] = "{:,}".format(int(video["views_total"]))
                        video["duration"] = self.get_duration_from_seconds(video["duration"])
                        video["ownerscreenname"] = video["owner.screenname"]
                        
                        title = dailymotion_template.render(video)
                    else:
                        log.debug("SpiffyTitles: received unexpected payload from video: %s" % api_url)
                else:
                    log.error("SpiffyTitles: dailymotion handler returned %s: %s" % (request.status_code, request.text[:200]))
        
        if title is None:
            log.debug("SpiffyTitles: could not get dailymotion info for %s" % url)
            
            return self.handler_default(url, channel)
        else:
            return title
    
    def handler_vimeo(self, url, domain, channel):
        """
        Handles Vimeo links
        """
        vimeo_handler_enabled = self.registryValue("vimeoHandlerEnabled", channel=channel)
        log.debug("SpiffyTitles: calling vimeo handler for %s" % url)
        title = None
        video_id = None
        
        """ Get video ID """
        if vimeo_handler_enabled:
            result = re.search(r'^(http(s)://)?(www\.)?(vimeo\.com/)?(\d+)', url)
            
            if result is not None:
                video_id = result.group(5)
            
            if video_id is not None:
                api_url = "https://vimeo.com/api/v2/video/%s.json" % video_id
                log.debug("SpiffyTitles: looking up vimeo info: %s", api_url)
                agent = self.get_user_agent()
                headers = {
                    "User-Agent": agent
                }
                
                request = requests.get(api_url, headers=headers)
                
                ok = request.status_code == requests.codes.ok
                
                if ok:
                    response = json.loads(request.text)
                    
                    if response is not None and "title" in response[0]:
                        video = response[0]
                        vimeo_template = Template(self.registryValue("vimeoTitleTemplate", channel=channel))
                        
                        """ 
                        Some videos do not have this information available
                        """
                        if "stats_number_of_plays" in video:
                            video["stats_number_of_plays"] = "{:,}".format(int(video["stats_number_of_plays"]))
                        else:
                            video["stats_number_of_plays"] = 0
                        
                        if "stats_number_of_comments" in video:
                            video["stats_number_of_comments"] = "{:,}".format(int(video["stats_number_of_comments"]))
                        else:
                            video["stats_number_of_comments"] = 0
                        
                        video["duration"] = self.get_duration_from_seconds(video["duration"])
                        
                        title = vimeo_template.render(video)
                    else:
                        log.debug("SpiffyTitles: received unexpected payload from video: %s" % api_url)
                else:
                    log.error("SpiffyTitles: vimeo handler returned %s: %s" % (request.status_code, request.text[:200]))
        
        if title is None:
            log.debug("SpiffyTitles: could not get vimeo info for %s" % url)
            
            return self.handler_default(url, channel)
        else:
            return title
    
    def handler_coub(self, url, domain, channel):
        """
        Handles coub.com links
        """
        coub_handler_enabled = self.registryValue("coubHandlerEnabled", channel=channel)
        log.debug("SpiffyTitles: calling coub handler for %s" % url)
        title = None
        
        """ Get video ID """
        if coub_handler_enabled and "/view/" in url:
            video_id = url.split("/view/")[1]
            
            """ Remove any query strings """
            if "?" in video_id:
                video_id = video_id.split("?")[0]
                
            api_url = "http://coub.com/api/v2/coubs/%s" % video_id
            agent = self.get_user_agent()
            headers = {
                "User-Agent": agent
            }
            
            request = requests.get(api_url, headers=headers)
            
            ok = request.status_code == requests.codes.ok
            
            if ok:
                response = json.loads(request.text)
                
                if response:
                    video = response
                    coub_template = Template(self.registryValue("coubTemplate"))
                    
                    video["likes_count"] = "{:,}".format(int(video["likes_count"]))
                    video["recoubs_count"] = "{:,}".format(int(video["recoubs_count"]))
                    video["views_count"] = "{:,}".format(int(video["views_count"]))
                    
                    title = coub_template.render(video)
            else:
                log.error("SpiffyTitles: coub handler returned %s: %s" % (request.status_code, request.text[:200]))
        
        if title is None:
            if coub_handler_enabled:
                log.debug("SpiffyTitles: %s does not appear to be a video link!" % url)
            
            return self.handler_default(url, channel)
        else:
            return title
    
    def add_imgur_handlers(self):
        # Images mostly
        self.handlers["i.imgur.com"] = self.handler_imgur_image
        
        # Albums, galleries, etc
        self.handlers["imgur.com"] = self.handler_imgur
    
    def initialize_imgur_client(self, channel):
        """
        Check if imgur client id or secret are set, and if so initialize
        imgur API client
        """
        if self.imgur_client is None:
            imgur_client_id = self.registryValue("imgurClientID")
            imgur_client_secret = self.registryValue("imgurClientSecret")
            imgur_handler_enabled = self.registryValue("imgurHandlerEnabled", channel=channel)
            
            if imgur_handler_enabled and imgur_client_id and imgur_client_secret:
                log.debug("SpiffyTitles: enabling imgur handler")

                # Initialize API client
                try:
                    from imgurpython import ImgurClient
                    from imgurpython.helpers.error import ImgurClientError
                    
                    try:
                        self.imgur_client = ImgurClient(imgur_client_id, imgur_client_secret)                    
                    except ImgurClientError as e:
                        log.error("SpiffyTitles: imgur client error: %s" % (e.error_message))                    
                except ImportError as e:
                    log.error("SpiffyTitles ImportError: %s" % str(e))
            else:
                log.debug("SpiffyTitles: imgur handler disabled or empty client id/secret")
    
    def doPrivmsg(self, irc, msg):
        """
        Observe each channel message and look for links
        """
        channel = msg.args[0]
        ignore_actions = self.registryValue("ignoreActionLinks", channel=msg.args[0])
        is_channel = irc.isChannel(channel)
        is_ctcp = ircmsgs.isCtcp(msg)        
        message = msg.args[1]
        title = None
        bot_nick = irc.nick
        origin_nick = msg.nick
        is_message_from_self = origin_nick.lower() == bot_nick.lower()
        requires_capability = len(str(self.registryValue("requireCapability", channel=msg.args[0]))) > 0

        if is_message_from_self:
            return

        """
        Check if we require a capability to acknowledge this link
        """
        if requires_capability:
            user_has_capability = self.user_has_capability(msg)

            if not user_has_capability:
                return

        """
        Configuration option determines whether we should
        ignore links that appear within an action
        """
        if is_ctcp and ignore_actions:
            return

        if is_channel:
            channel_is_allowed = self.is_channel_allowed(channel)            
            url = self.get_url_from_message(message)
            ignore_match = self.message_matches_ignore_pattern(message)
            
            if ignore_match:
                log.debug("SpiffyTitles: ignoring message due to linkMessagePattern match")
                return
            
            if url:
                # Check if channel is allowed based on white/black list restrictions
                if not channel_is_allowed:
                    log.debug("SpiffyTitles: not responding to link in %s due to black/white list restrictions" % (channel))
                    return
                
                info = urlparse(url)
                domain = info.netloc
                is_ignored = self.is_ignored_domain(domain)
                
                if is_ignored:
                    log.debug("SpiffyTitles: URL ignored due to domain blacklist match: %s" % url)
                    return
                
                is_whitelisted_domain = self.is_whitelisted_domain(domain)
                
                if self.registryValue("whitelistDomainPattern") and not is_whitelisted_domain:
                    log.debug("SpiffyTitles: URL ignored due to domain whitelist mismatch: %s" % url)
                    return
                
                title = self.get_title_by_url(url, channel)
                
                if title is not None and title:
                    ignore_match = self.title_matches_ignore_pattern(title, channel)
                    
                    if ignore_match:
                        return
                    else:
                        irc.sendMsg(ircmsgs.privmsg(channel, title))
                else:
                    if self.default_handler_enabled:
                        log.debug("SpiffyTitles: could not get a title for %s" % (url))
                    else:   
                        log.debug("SpiffyTitles: could not get a title for %s but default handler is disabled" % (url))
    
    def get_title_by_url(self, url, channel):
        """
        Retrieves the title of a website based on the URL provided
        """
        info = urlparse(url)
        domain = info.netloc
        title = None
        
        """
        Check if we have this link cached according to the cache lifetime. If so, serve
        link from the cache instead of calling handlers.
        """
        cached_link = self.get_link_from_cache(url)
        
        if cached_link is not None:                    
            title = cached_link["title"]
        else:
            if domain in self.handlers:
                handler = self.handlers[domain]                        
                title = handler(url, info, channel)
            else:
                if self.default_handler_enabled:
                    title = self.handler_default(url, channel)
        
        if title is not None:
            title = self.get_formatted_title(title, channel)
            
            # Update link cache
            log.debug("SpiffyTitles: caching %s" % (url))
            now = datetime.datetime.now()
            self.link_cache.append({
                "url": url,
                "timestamp": now,
                "title": title
            })
        
        return title

    def t(self, irc, msg, args, query):
        """
        Retrieves title for a URL on demand
        """
        message = msg.args[1]
        channel = msg.args[0]
        url = self.get_url_from_message(message)
        title = None
        error_message = self.registryValue("onDemandTitleError", channel=channel)
        
        try:
            if url:
                title = self.get_title_by_url(query, channel)
        except:
            pass
        
        if title is not None and title:
            irc.sendMsg(ircmsgs.privmsg(channel, title))
        else:
            irc.sendMsg(ircmsgs.privmsg(channel, error_message))
    
    t = wrap(t, ['text'])
    
    def get_link_from_cache(self, url):
        """
        Looks for a URL in the link cache and returns info about if it's not stale
        according to the configured cache lifetime, or None.
        
        If linkCacheLifetimeInSeconds is 0, then cache is disabled and we can 
        immediately return
        """
        cache_lifetime_in_seconds = int(self.registryValue("linkCacheLifetimeInSeconds"))
        
        if cache_lifetime_in_seconds == 0:
            return
        
        # No cache yet
        if len(self.link_cache) == 0:
            return
        
        cached_link = None
        now = datetime.datetime.now()        
        stale = False
        seconds = 0
        
        for link in self.link_cache:
            if link["url"] == url:
                cached_link = link
                break
        
        # Found link, check timestamp
        if cached_link is not None:
            seconds = (now - cached_link["timestamp"]).total_seconds()
            stale = seconds >= cache_lifetime_in_seconds
        
        if stale:
            log.debug("SpiffyTitles: %s was sent %s seconds ago" % (url, seconds))
        else:
            log.debug("SpiffyTitles: serving link from cache: %s" % (url))
            return cached_link

    def add_imdb_handlers(self):
        """
        Enables meta info about IMDB links through the OMDB API
        """
        self.handlers["www.imdb.com"] = self.handler_imdb
        self.handlers["imdb.com"] = self.handler_imdb
    
    def add_youtube_handlers(self):
        """
        Adds handlers for Youtube videos. The handler is matched based on the
        domain used in the URL.
        """
        self.handlers["youtube.com"] = self.handler_youtube
        self.handlers["www.youtube.com"] = self.handler_youtube
        self.handlers["youtu.be"] = self.handler_youtube
        self.handlers["m.youtube.com"] = self.handler_youtube
    
    def is_channel_allowed(self, channel):
        """
        Checks channel whitelist and blacklist to determine if the current
        channel is allowed to display titles.
        """
        channel = channel.lower()
        is_allowed = False
        white_list = self.filter_empty(self.registryValue("channelWhitelist"))
        black_list = self.filter_empty(self.registryValue("channelBlacklist"))
        white_list_empty = len(white_list) == 0
        black_list_empty = len(black_list) == 0
        
        # Most basic case, which is that both white and blacklist are empty. Any channel is allowed.
        if white_list_empty and black_list_empty:
            is_allowed = True
        
        # If there is a white list, blacklist is ignored.
        if white_list:
            is_allowed = channel in white_list
        
        # Finally, check blacklist
        if not white_list and black_list:
            is_allowed = channel not in black_list
        
        return is_allowed
    
    def filter_empty(self, input):
        """
        Remove all empty strings from a list
        """
        return set([channel for channel in input if len(channel.strip())])
    
    def is_ignored_domain(self, domain):
        """
        Checks domain against a regular expression
        """
        pattern = self.registryValue("ignoredDomainPattern")
        
        if pattern:
            log.debug("SpiffyTitles: matching %s against %s" % (domain, str(pattern)))
            
            try:
                pattern_search_result = re.search(pattern, domain)
                
                if pattern_search_result is not None:
                    match = pattern_search_result.group()
                    
                    return match
            except re.Error:
                log.error("SpiffyTitles: invalid regular expression: %s" % (pattern))
    
    def is_whitelisted_domain(self, domain):
        """
        Checks domain against a regular expression
        """
        pattern = self.registryValue("whitelistDomainPattern")
        
        if pattern:
            log.debug("SpiffyTitles: matching %s against %s" % (domain, str(pattern)))
            
            try:
                pattern_search_result = re.search(pattern, domain)
                
                if pattern_search_result is not None:
                    match = pattern_search_result.group()
                    
                    return match
            except re.Error:
                log.error("SpiffyTitles: invalid regular expression: %s" % (pattern))
    
    def get_video_id_from_url(self, url, info):
        """
        Get YouTube video ID from URL
        """
        try:
            path = info.path
            domain = info.netloc
            video_id = ""
            
            if domain == "youtu.be":
                video_id = path.split("/")[1]
            else:
                parsed = cgi.parse_qsl(info.query)
                params = dict(parsed)
                
                if "v" in params:
                    video_id = params["v"]
            
            if video_id:
                return video_id
            else:
                log.error("SpiffyTitles: error getting video id from %s" % (url))
        
        except IndexError as e:
            log.error("SpiffyTitles: error getting video id from %s (%s)" % (url, str(e)))

    def handler_youtube(self, url, domain, channel):
        """
        Uses the Youtube API to provide additional meta data about
        Youtube Video links posted.
        """
        youtube_handler_enabled = self.registryValue("youtubeHandlerEnabled", channel=channel)
        developer_key = self.registryValue("youtubeDeveloperKey")
        
        if not youtube_handler_enabled:
            return None
        
        if not developer_key:
            log.info("SpiffyTitles: no Youtube developer key set! Check the documentation for instructions.")
            return None
        
        log.debug("SpiffyTitles: calling Youtube handler for %s" % (url))
        video_id = self.get_video_id_from_url(url, domain)
        yt_template = Template(self.registryValue("youtubeTitleTemplate", channel=channel))
        title = ""
        
        if video_id:
            options = {
                "part": "snippet,statistics,contentDetails",
                "maxResults": 1,
                "key": developer_key,
                "id": video_id
            }
            encoded_options = urlencode(options)
            api_url = "https://www.googleapis.com/youtube/v3/videos?%s" % (encoded_options)
            agent = self.get_user_agent()
            headers = {
                "User-Agent": agent
            }
            
            log.debug("SpiffyTitles: requesting %s" % (api_url))
            
            request = requests.get(api_url, headers=headers)            
            ok = request.status_code == requests.codes.ok
            
            if ok:
                response = json.loads(request.text)
                
                if response:
                    try:
                        if response["pageInfo"]["totalResults"] > 0:
                            items = response["items"]
                            video = items[0]
                            snippet = video["snippet"]
                            title = snippet["title"]
                            statistics = video["statistics"]
                            view_count = 0
                            like_count = 0
                            dislike_count = 0
                            comment_count = 0
                            favorite_count = 0

                            if "viewCount" in statistics:
                                view_count = "{:,}".format(int(statistics["viewCount"]))
                            
                            if "likeCount" in statistics:
                                like_count = "{:,}".format(int(statistics["likeCount"]))

                            if "dislikeCount" in statistics:
                                dislike_count = "{:,}".format(int(statistics["dislikeCount"]))

                            if "favoriteCount" in statistics:
                                favorite_count = "{:,}".format(int(statistics["favoriteCount"]))

                            if "commentCount" in statistics:
                                comment_count = "{:,}".format(int(statistics["commentCount"]))
                            
                            channel_title = snippet["channelTitle"]
                            duration_seconds = self.get_total_seconds_from_duration(video["contentDetails"]["duration"])

                            """
                            #23 - If duration is zero, then it"s a LIVE video
                            """
                            if duration_seconds > 0:
                                duration = self.get_duration_from_seconds(duration_seconds)
                            else:
                                duration = "LIVE"
                            
                            timestamp = self.get_timestamp_from_youtube_url(url)
                            yt_logo = self.get_youtube_logo()
                            
                            compiled_template = yt_template.render({
                                "title": title,
                                "duration": duration,
                                "timestamp": timestamp,
                                "view_count": view_count,
                                "like_count": like_count,
                                "dislike_count": dislike_count,
                                "comment_count": comment_count,
                                "favorite_count": favorite_count,
                                "channel_title": channel_title,
                                "yt_logo": yt_logo
                            })
                            
                            title = compiled_template
                        else:
                            log.debug("SpiffyTitles: video appears to be private; no results!")
                        
                    except IndexError as e:
                        log.error("SpiffyTitles: IndexError parsing Youtube API JSON response: %s" % (str(e)))
                else:
                    log.error("SpiffyTitles: Error parsing Youtube API JSON response")
            else:
                log.error("SpiffyTitles: Youtube API HTTP %s: %s" % (request.status_code,
                                                                         request.text))
        
        # If we found a title, return that. otherwise, use default handler
        if title:
            return title
        else:
            log.debug("SpiffyTitles: falling back to default handler")
            
            return self.handler_default(url, channel)
    
    def get_duration_from_seconds(self, duration_seconds):
        m, s = divmod(duration_seconds, 60)
        h, m = divmod(m, 60)
        
        duration = "%02d:%02d" % (m, s)
        
        """ Only include hour if the video is at least 1 hour long """
        if h > 0:
            duration = "%02d:%s" % (h, duration)
        
        return duration
    
    def get_youtube_logo(self):
        colored_letters = [
            "%s" % ircutils.mircColor("You", fg="red", bg="white"),
            "%s" % ircutils.mircColor("Tube", fg="white", bg="red")
        ]
                        
        yt_logo = "".join(colored_letters)
        
        return yt_logo
      
    def get_total_seconds_from_duration(self, input):
        """
        Duration comes in a format like this: PT4M41S which translates to
        4 minutes and 41 seconds. This method returns the total seconds
        so that the duration can be parsed as usual.
        """
        pattern = regex  = re.compile("""
                   (?P<sign>    -?) P
                (?:(?P<years>  \d+) Y)?
                (?:(?P<months> \d+) M)?
                (?:(?P<days>   \d+) D)?
            (?:                     T
                (?:(?P<hours>  \d+) H)?
                (?:(?P<minutes>\d+) M)?
                (?:(?P<seconds>\d+) S)?
            )?
            """, re.VERBOSE)
        duration = regex.match(input).groupdict(0)
        
        delta = timedelta(hours=int(duration['hours']),
                          minutes=int(duration['minutes']),
                          seconds=int(duration['seconds']))
        
        return delta.total_seconds()

    def get_timestamp_from_youtube_url(self, url):
        """
        Get YouTube timestamp
        """
        pattern = r"[?&]t=([^&]+)"
        match = re.search(pattern, url)

        if match:
            timestamp = match.group(1).upper()
            try:
                seconds = float(timestamp)
            except ValueError:
                seconds = self.get_total_seconds_from_duration("PT" + timestamp)

            if seconds > 0:
                return self.get_duration_from_seconds(seconds)
        else:
            return ""        
        
    def handler_default(self, url, channel):
        """
        Default handler for websites
        """
        default_handler_enabled = self.registryValue("defaultHandlerEnabled", channel=channel)
        
        if default_handler_enabled:
            log.debug("SpiffyTitles: calling default handler for %s" % (url))
            default_template = Template(self.registryValue("defaultTitleTemplate", channel=channel))
            html = self.get_source_by_url(url)
            
            if html is not None and html:
                title = self.get_title_from_html(html)
                
                if title is not None:
                    title_template = default_template.render(title=title)
                    
                    return title_template
        else:
            log.debug("SpiffyTitles: default handler fired but doing nothing because disabled")
    
    def handler_imdb(self, url, info, channel):
        """
        Handles imdb.com links, querying the OMDB API for additional info
        
        Typical IMDB URL: http://www.imdb.com/title/tt2467372/
        """
        headers = self.get_headers()
        result = None
        
        if not self.registryValue("imdbHandlerEnabled", channel=channel):
            log.debug("SpiffyTitles: IMDB handler disabled. Falling back to default handler.")
            
            return self.handler_default(url, channel)
        
        # Don't care about query strings
        if "?" in url:
            url = url.split("?")[0]
        
        # We can only accommodate a specific format of URL here
        if "/title/" in url:
            imdb_id = url.split("/title/")[1].rstrip("/")
            omdb_url = "http://www.omdbapi.com/?i=%s&plot=short&r=json&tomatoes=true" % (imdb_id)
            
            try:
                request = requests.get(omdb_url, timeout=10, headers=headers)
                
                if request.status_code == requests.codes.ok:
                    response = json.loads(request.text)
                    result = None
                    imdb_template = Template(self.registryValue("imdbTemplate"))
                    not_found = "Error" in response
                    unknown_error = response["Response"] != "True"
                    
                    if not_found or unknown_error:
                        log.debug("SpiffyTitles: OMDB error for %s" % (omdb_url))
                    else:
                        result = imdb_template.render(response)
                else:
                    log.error("SpiffyTitles OMDB API %s - %s" % (request.status_code, request.text)) 
            
            except requests.exceptions.Timeout as e:
                log.error("SpiffyTitles imdb Timeout: %s" % (str(e)))
            except requests.exceptions.ConnectionError as e:
                log.error("SpiffyTitles imdb ConnectionError: %s" % (str(e)))
            except requests.exceptions.HTTPError as e:
                log.error("SpiffyTitles imdb HTTPError: %s" % (str(e)))
        
        if result is not None:
            return result
        else:
            log.debug("SpiffyTitles: IMDB handler failed. calling default handler")
            
            return self.handler_default(url, channel)
    
    def handler_wikipedia(self, url, domain, channel):
        """
        Queries wikipedia API for article extracts.
        """
        wikipedia_handler_enabled = self.registryValue("wikipedia.enabled", channel=channel)
        if not wikipedia_handler_enabled:
            return self.handler_default(url, channel)

        self.log.debug("SpiffyTitles: calling Wikipedia handler for %s" % (url))

        pattern = r"/(?:w(?:iki))/(?P<page>[^/]+)$"
        info = urlparse(url)
        match = re.search(pattern, info.path)
        if not match:
            self.log.debug("SpiffyTitles: no title found.")
            return self.handler_default(url, channel)
        elif info.fragment and self.registryValue("wikipedia.ignoreSectionLinks", channel=channel):
            self.log.debug("SpiffyTitles: ignoring section link.")
            return self.handler_default(url, channel)
        else:
            page_title = match.groupdict()['page']

        default_api_params = {
            "format":      "json",
            "action":      "query",
            "prop":        "extracts",
            "exsentences": "2",
            "exlimit":     "1",
            "exintro":     "",
            "explaintext": ""
        }
        extra_params = dict(parse_qsl('&'.join(self.registryValue("wikipedia.apiParams", channel=channel))))
        title_param  = { self.registryValue("wikipedia.titleParam", channel=channel): page_title }

        # merge dicts
        api_params = default_api_params.copy()
        api_params.update(extra_params)
        api_params.update(title_param)
        api_url = "https://en.wikipedia.org/w/api.php?%s" % ('&'.join("%s=%s" % (key, val) for (key,val) in api_params.iteritems()))

        agent = self.get_user_agent()
        headers = {
            "User-Agent": agent
        }
        extract = ""

        self.log.debug("SpiffyTitles: requesting %s" % (api_url))

        request = requests.get(api_url, headers=headers)            
        ok = request.status_code == requests.codes.ok
        
        if ok:
            response = json.loads(request.text)
            
            if response:
                try:
                    extract = response['query']['pages'].values()[0]['extract']
                except KeyError as e:
                    self.log.error("SpiffyTitles: KeyError parsing Wikipedia API JSON response: %s" % (str(e)))
            else:
                self.log.error("SpiffyTitles: Error parsing Wikipedia API JSON response")
        else:
            self.log.error("SpiffyTitles: Wikipedia API HTTP %s: %s" % (request.status_code, request.text))

        if extract:
            if (self.registryValue("wikipedia.removeParentheses")):
                extract = re.sub(r' ?\([^)]*\)', '', extract)
            max_chars = self.registryValue("wikipedia.maxChars", channel=channel)
            if len(extract) > max_chars:
                extract = extract[:max_chars - 3].rsplit(' ', 1)[0].rstrip(',.') + '...'

            wikipedia_template = Template(self.registryValue("wikipedia.extractTemplate", channel=channel))
            return wikipedia_template.render({"extract": extract})
        else:
            self.log.debug("SpiffyTitles: falling back to default handler")
            
            return self.handler_default(url, channel)


    def is_valid_imgur_id(self, input):
        """
        Tests if input matches the typical imgur id, which seems to be alphanumeric. Images, galleries,
        and albums all share their format in their identifier.
        """
        match = re.match(r"[a-z0-9]+", input, re.IGNORECASE)
        
        return match is not None
    
    def handler_imgur(self, url, info, channel):
        """
        Queries imgur API for additional information about imgur links.

        This handler is for any imgur.com domain.
        """
        self.initialize_imgur_client(channel)
        
        is_album = info.path.startswith("/a/")
        is_gallery = info.path.startswith("/gallery/")
        is_image_page = not is_album and not is_gallery and re.match(r"^\/[a-zA-Z0-9]+", info.path)
        result = None
        
        if is_album:
            result = self.handler_imgur_album(url, info, channel)
        #elif is_image_page:
        #    result = self.handler_imgur_image(url, info)
        else:
            result = self.handler_default(url, channel)
        
        return result
    
    def handler_imgur_album(self, url, info, channel):
        """
        Handles retrieving information about albums from the imgur API.
        
        imgur provides the following information about albums: https://api.imgur.com/models/album
        """
        from imgurpython.helpers.error import ImgurClientRateLimitError
        from imgurpython.helpers.error import ImgurClientError
        self.initialize_imgur_client(channel)
        
        if self.imgur_client:
            album_id = info.path.split("/a/")[1]
            
            """ If there is a query string appended, remove it """
            if "?" in album_id:
                album_id = album_id.split("?")[0]
            
            if self.is_valid_imgur_id(album_id):
                log.debug("SpiffyTitles: found imgur album id %s" % (album_id))
                
                try:
                    album = self.imgur_client.get_album(album_id)
                    
                    if album:
                        imgur_album_template = Template(self.registryValue("imgurAlbumTemplate", channel=channel))
                        compiled_template = imgur_album_template.render({
                            "title": album.title,
                            "section": album.section,
                            "view_count": "{:,}".format(album.views),
                            "image_count": "{:,}".format(album.images_count),
                            "nsfw": album.nsfw
                        })
                        
                        return compiled_template
                    else:
                        log.error("SpiffyTitles: imgur album API returned unexpected results!")

                except ImgurClientRateLimitError as e:
                    log.error("SpiffyTitles: imgur rate limit error: %s" % (e.error_message))
                except ImgurClientError as e:
                    log.error("SpiffyTitles: imgur client error: %s" % (e.error_message))
            else:
                log.debug("SpiffyTitles: unable to determine album id for %s" % (url))
        else:
            return self.handler_default(url, channel)
    
    def handler_imgur_image(self, url, info, channel):
        """
        Handles retrieving information about images from the imgur API.
        
        Used for both direct images and imgur.com/some_image_id_here type links, as
        they're both single images.
        """
        self.initialize_imgur_client(channel)
        
        from imgurpython.helpers.error import ImgurClientRateLimitError
        from imgurpython.helpers.error import ImgurClientError
        title = None
        
        if self.imgur_client:
            """ 
            If there is a period in the path, it's a direct link to an image. If not, then
            it's a imgur.com/some_image_id_here type link
            """
            if "." in info.path:
                path = info.path.lstrip("/")
                image_id = path.split(".")[0]
            else:
                image_id = info.path.lstrip("/")
            
            if self.is_valid_imgur_id(image_id):
                log.debug("SpiffyTitles: found image id %s" % (image_id))
                
                try:
                    image = self.imgur_client.get_image(image_id)
                    
                    if image:
                        imgur_template = Template(self.registryValue("imgurTemplate", channel=channel))
                        readable_file_size = self.get_readable_file_size(image.size)
                        compiled_template = imgur_template.render({
                            "title": image.title,
                            "type": image.type,
                            "nsfw": image.nsfw,
                            "width": image.width,
                            "height": image.height,
                            "view_count": "{:,}".format(image.views),
                            "file_size": readable_file_size,
                            "section": image.section
                        })
                        
                        title = compiled_template
                    else:
                        log.error("SpiffyTitles: imgur API returned unexpected results!")
                except ImgurClientRateLimitError as e:
                    log.error("SpiffyTitles: imgur rate limit error: %s" % (e.error_message))
                except ImgurClientError as e:
                    log.error("SpiffyTitles: imgur client error: %s" % (e.error_message))
            else:
                log.error("SpiffyTitles: error retrieving image id for %s" % (url))
        
        if title is not None:
            return title
        else:
            return self.handler_default(url, channel)
    
    def get_readable_file_size(self, num, suffix="B"):
        """
        Returns human readable file size
        """
        for unit in ["","Ki","Mi","Gi","Ti","Pi","Ei","Zi"]:
            if abs(num) < 1024.0:
                return "%3.1f%s%s" % (num, unit, suffix)
            num /= 1024.0
        return "%.1f%s%s" % (num, "Yi", suffix)
    
    def get_formatted_title(self, title, channel):
        """
        Remove cruft from title and apply bold if applicable
        """
        useBold = self.registryValue("useBold", channel=channel)
        
        # Replace anywhere in string
        title = title.replace("\n", " ")
        title = title.replace("\t", " ")
        title = re.sub(" +", " ", title)
        
        if useBold:
            title = ircutils.bold(title)
        
        title = title.strip()
        
        return title
    
    def get_title_from_html(self, html):
        """
        Retrieves value of <title> tag from HTML
        """
        soup = BeautifulSoup(html, "lxml")
        
        if soup is not None:
            """
            Some websites have more than one title tag, so get all of them 
            and take the last value.
            """
            head = soup.find("head")
            titles = head.find_all("title")
            
            if titles is not None and len(titles):                
                title_text = titles[-1].get_text()
                
                if len(title_text):
                    stripped_title = title_text.strip()
                    
                    return stripped_title
    
    @timeout_decorator.timeout(wall_clock_timeout)
    def get_source_by_url(self, url, retries=1):
        """
        Get the HTML of a website based on a URL
        """
        max_retries = self.registryValue("maxRetries")
        
        if retries is None:
            retries = 1
        
        if retries >= max_retries:
            log.debug("SpiffyTitles: hit maximum retries for %s" % url)
            
            return None
        
        log.debug("SpiffyTitles: attempt #%s for %s" % (retries, url))
        
        try:
            headers = self.get_headers()
            
            log.debug("SpiffyTitles: requesting %s" % (url))
            
            request = requests.get(url, headers=headers, timeout=10, allow_redirects=True)
            
            if request.status_code == requests.codes.ok:
                # Check the content type which comes in the format: "text/html; charset=UTF-8"
                content_type = request.headers.get("content-type").split(";")[0].strip()
                acceptable_types = self.registryValue("mimeTypes")
                
                log.debug("SpiffyTitles: content type %s" % (content_type))
                
                if content_type in acceptable_types:
                    text = request.content
                    
                    if text:
                        return text
                    else:
                        log.debug("SpiffyTitles: empty content from %s" % (url))                        
                
                else:
                    log.debug("SpiffyTitles: unacceptable mime type %s for url %s" % (content_type, url))
            else:
                log.error("SpiffyTitles HTTP response code %s - %s" % (request.status_code, 
                                                                            request.content))
        
        except timeout_decorator.TimeoutError:
            log.error("SpiffyTitles: wall timeout!")
            
            self.get_source_by_url(url, retries+1)        
        except requests.exceptions.MissingSchema as e:
            urlWithSchema = "http://%s" % (url)
            log.error("SpiffyTitles missing schema. Retrying with %s" % (urlWithSchema))
            return self.get_source_by_url(urlWithSchema)
        except requests.exceptions.Timeout as e:
            log.error("SpiffyTitles Timeout: %s" % (str(e)))
            
            self.get_source_by_url(url, retries+1)            
        except requests.exceptions.ConnectionError as e:
            log.error("SpiffyTitles ConnectionError: %s" % (str(e)))
            
            self.get_source_by_url(url, retries+1)            
        except requests.exceptions.HTTPError as e:
            log.error("SpiffyTitles HTTPError: %s" % (str(e)))
        except requests.exceptions.InvalidURL as e:
            log.error("SpiffyTitles InvalidURL: %s" % (str(e)))
    
    def get_headers(self):
        agent = self.get_user_agent()
        self.accept_language = self.registryValue("language")
        
        headers = {
            "User-Agent": agent,
            "Accept-Language": ";".join((self.accept_language, "q=1.0"))
        }
        
        return headers
    
    def get_user_agent(self):
        """
        Returns a random user agent from the ones available
        """
        agents = self.registryValue("userAgents")
        
        return random.choice(agents)
    
    def message_matches_ignore_pattern(self, input):
        """
        Checks message against linkMessageIgnorePattern to determine
        whether the message should be ignored.
        """
        match = False
        pattern = self.registryValue("linkMessageIgnorePattern")
        
        if pattern:
            match = re.search(pattern, input)
        
        return match
    
    def title_matches_ignore_pattern(self, input, channel):
        """
        Checks message against ignoredTitlePattern to determine
        whether the title should be ignored.
        """
        match = False
        pattern = self.registryValue("ignoredTitlePattern", channel=channel)
        
        if pattern:
            match = re.search(pattern, input)

            if match:
                log.debug("SpiffyTitles: title %s matches ignoredTitlePattern for %s" % (input, channel))
        
        return match
    
    def get_url_from_message(self, input):
        """
        Find the first string that looks like a URL from the message
        """
        url_re = self.registryValue("urlRegularExpression")
        match = re.search(url_re, input)
        
        if match:
            raw_url = match.group(0).strip()
            url = self.remove_control_characters(unicode(raw_url))

            return url
    
    def remove_control_characters(self, s):
        return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C")

    def user_has_capability(self, msg):
        channel = msg.args[0]
        mask = msg.prefix
        required_capability = self.registryValue("requireCapability")
        cap = ircdb.makeChannelCapability(channel, required_capability)
        has_cap = ircdb.checkCapability(mask, cap, ignoreDefaultAllow=True)

        if has_cap:
            log.debug("SpiffyTitles: %s has required capability '%s'" % (mask, required_capability))
        else:
            log.debug("SpiffyTitles: %s does NOT have required capability '%s'" % (mask, required_capability))

        return has_cap
Exemplo n.º 21
0
    def downloader(self):
        """
        Main download method. Gets index of saved posts from reddit using PRAW, then checks them against the posts
        already saved in the database. Posts will be downloaded and saved according to type of post (selfpost, image,
        image album, webm, article.)

        :return: None
        """
        self.set_output_thread_condition(1)
        self.stop_request.clear()
        warnings.warn(
            "Suppressed Resource warning",
            ResourceWarning)  # suppresses sll unclosed socket warnings.
        logger = self.logger

        logger.info("\n###########\nStarting SR\n###########")

        logger.debug("Getting settings from db")
        get_comments = self.settings_dict['save_comments'].value
        number_of_comments = self.settings_dict['number_of_comments'].value

        path = "static/SRDownloads"
        if not os.path.exists(path):
            os.makedirs(path)

        # Authenticate with Reddit
        logger.info('Authenticating with Reddit')
        client_id = '_Nxh9h0Tys5KCQ'
        redirect_uri = 'http://127.0.0.1:5000/authorize_callback'
        refresh_token = self.settings_dict['reddit_refresh_token'].value
        user_agent = "SavedRetriever 0.9 by /u/fuzzycut"

        try:
            r = praw.Reddit(user_agent)
            r.set_oauth_app_info(client_id, '', redirect_uri)
            access_information = r.refresh_access_information(refresh_token)
            r.set_access_credentials(**access_information)
            logger.info("Authenticated")
        except Exception as e:
            logger.error(e)
            self.set_output_thread_condition(2)
            raise SystemExit
        time_since_accesstoken = time.time()

        index = set()
        try:  # Create index of unique post codes
            for post in models.Post.query.all():
                index.add(post.code)
        except OSError:
            logger.error("Unable to create index")
            raise SystemExit

        logger.info("Beginning to save files to db...")
        items = r.get_me().get_saved(limit=None)
        self.post_downloaded_count = 0
        # Convert saved post generator to a list in order to iterate backwards, so that the most recent saved post
        # is the most recently downloaded
        for i in list(items)[::-1]:
            if self.stop_request.is_set():
                logger.info('Cancelling download...')
                break

            if (time.time() - time_since_accesstoken
                ) / 60 > 55:  # Refresh the access token before it runs out.
                logger.debug('Refreshing Reddit token')
                r.refresh_access_information(
                    access_information['refresh_token'])
                time_since_accesstoken = time.time()

            name = i.name

            if name not in index:  # file has not been downloaded
                permalink = i.permalink
                title = i.link_title if hasattr(i, 'link_title') else i.title
                date = datetime.datetime.fromtimestamp(i.created)
                post = None
                author = str(i.author)
                user = models.Author.query.filter_by(username=author)
                logger.info('Getting post ' + name + ' - ' + title[:255])
                if user.count() == 0:  # user is not in db
                    user = models.Author(username=author)
                    self.db.session.add(user)
                    self.db.session.commit()
                else:
                    user = user.first()
                comments = self._get_comments(
                    i, number_of_comments,
                    r) if get_comments == 'True' else "{}"
                # ========== #
                # IS COMMENT #
                # ========== #
                if hasattr(i, 'body_html'):
                    logger.debug("{} is comment".format(name))
                    body = i.body_html

                    # html output
                    body = self.subreddit_linker(body)
                    summary = body[:600]
                    summary = bleach.clean(summary,
                                           tags=self.allowed_tags,
                                           attributes=self.allowed_attrs,
                                           strip=True)
                    post = models.Post(permalink=permalink,
                                       title=title,
                                       body_content=body,
                                       date_posted=date,
                                       author_id=user.id,
                                       code=name,
                                       type='text',
                                       summary=summary,
                                       comments=comments)

                # ============ #
                # IS SELF-POST #
                # ============ #
                elif hasattr(i, 'is_self') and i.is_self is True:
                    logger.debug('{} is self-post'.format(name))
                    text = i.selftext_html if i.selftext_html is not None else ""

                    # html output
                    text = self.subreddit_linker(text)
                    summary = text[:600]
                    summary = bleach.clean(summary,
                                           tags=self.allowed_tags,
                                           attributes=self.allowed_attrs,
                                           strip=True)
                    post = models.Post(permalink=permalink,
                                       title=title,
                                       body_content=text,
                                       date_posted=date,
                                       author_id=user.id,
                                       code=name,
                                       type='text',
                                       summary=summary,
                                       comments=comments)

                # ====================== #
                # IS DIRECT LINKED IMAGE #
                # ====================== #
                elif (hasattr(i, 'url') and (self._get_image_url_type(
                        i.url) in ['jpg', 'png', 'gif', 'gifv', 'pdf'])
                      or "reddituploads" in i.url):
                    logger.debug('{} is direct linked image'.format(name))
                    url = i.url
                    base_filename = "{}_image.{}".format(
                        name, self._get_image_url_type(url))
                    filename = path + "/" + base_filename
                    filetype = 'image'

                    if url[-4:] == "gifv":
                        url = url.replace('gifv', 'mp4')
                        filename = filename.replace('gifv', 'mp4')
                        base_filename = base_filename.replace('gifv', 'mp4')
                        base_filename = base_filename.replace(
                            '_image', '_video')
                        filetype = 'video'

                    # image downloader section
                    if os.path.exists(filename) and (
                            os.path.getsize(filename) >
                            0):  # If image exists and is valid
                        image_downloaded = True
                        logger.info(
                            "Image already exists - {}".format(base_filename))
                    else:
                        image_downloaded = self.image_saver(url, filename)

                    if image_downloaded:
                        logger.info(
                            'Downloaded image - {}'.format(base_filename))
                        self._add_image_to_db(base_filename, filename)

                        if filename.split('.')[-1] == 'pdf':
                            img = '<a href="static/SRDownloads/{}">Click here for link to downloaded pdf</a>'.format(
                                base_filename)
                        elif filename.split('.')[-1] == 'mp4':
                            img = '<video class="sr-image img-responsive" id="share-video" autoplay="" muted=""' \
                                  ' loop=""><source id="mp4Source" src="/img/{}" type=' \
                                  '"video/mp4">Sorry,' \
                                  ' your browser doesn\'t support HTML5 video.  </video>'.format(base_filename)
                        else:
                            img = '<a href="/img/{0}"><img class="sr-image img-responsive" src="/img/{0}">' \
                                  '</a>'.format(base_filename)
                    else:
                        img = "Image failed to download - It may be temporarily or permanently unavailable"

                    img_json = [{
                        "name": "",
                        "filename": base_filename,
                        "description": ""
                    }]
                    img_json = json.dumps(img_json)
                    post = models.Post(permalink=permalink,
                                       title=title,
                                       body_content=img_json,
                                       date_posted=date,
                                       author_id=user.id,
                                       code=name,
                                       type=filetype,
                                       summary=img,
                                       comments=comments)

                # =============== #
                # IS GFYCAT IMAGE #
                # =============== #
                elif hasattr(i, 'url') and 'gfycat.com' in i.url:
                    json_url = 'https://gfycat.com/cajax/get/'
                    gfy_id = i.url.split('/')[-1]
                    url = json_url + gfy_id
                    data = None
                    try:
                        with urllib.request.urlopen(url) as response:
                            data = response.read().decode('utf-8')
                    except urllib.error.HTTPError:
                        logger.warn("Unable to open gfycat url" + url)

                    json_data = json.loads(data)
                    base_filename = "{}_video.{}".format(
                        name,
                        'mp4')  # filename for image. regex same as above.
                    filename = path + "/" + base_filename
                    if os.path.exists(filename) and (
                            os.path.getsize(filename) >
                            0):  # If image exists and is valid
                        image_downloaded = True
                        logger.info(
                            "Image already exists - {}".format(base_filename))
                    else:
                        image_downloaded = self.image_saver(
                            json_data['gfyItem']['mp4Url'], filename)

                    if image_downloaded:
                        logger.info(
                            'Downloaded video - {}'.format(base_filename))
                        self._add_image_to_db(base_filename, filename)

                        img = '<video class="sr-image img-responsive" id="share-video" autoplay="" muted="" loop="">' \
                              '<source id="mp4Source" src="/img/{}" type="video/mp4">Sorry, your browser doesn\'t support ' \
                              'HTML5 video.  </video>'.format(base_filename)
                    else:
                        img = "Image failed to download - It may be temporarily or permanently unavailable"

                    img_json = [{
                        "name": "",
                        "filename": base_filename,
                        "description": ""
                    }]
                    img_json = json.dumps(img_json)
                    post = models.Post(permalink=permalink,
                                       title=title,
                                       body_content=img_json,
                                       date_posted=date,
                                       author_id=user.id,
                                       code=name,
                                       type='video',
                                       summary=img,
                                       comments=comments)

                # ============== #
                # IS IMGUR ALBUM #
                # ============== #
                elif hasattr(
                        i, 'url'
                ) and 'imgur' in i.url:  # Add option to download images to folder.
                    logger.debug('{} is Imgur album'.format(name))
                    url = i.url
                    # body = "<h2>{}</h2>".format(title)
                    body = []
                    summary = ''

                    # imgur api section
                    client = ImgurClient('755357eb4cd70bd', None)
                    pattern = '\/([A-z0-9]{5,7})'  # matches any 5-7 long word that comes after a forward slash (/).
                    match = re.findall(pattern, url)
                    gallery_id = match[-1].replace(
                        '/', '')  # removes any forward slashes for processing
                    gallery = []
                    filename = None
                    try:
                        gallery = client.get_album_images(gallery_id)
                    except imgurpython.helpers.error.ImgurClientError:  # if 'gallery' is actually just a lone image
                        try:
                            gallery = [client.get_image(gallery_id)]
                        except imgurpython.helpers.error.ImgurClientError as error:  # if gallery does not exist.
                            if error.status_code != 404:
                                logger.error("**{} - {}**".format(
                                    error.status_code, error.error_message))
                            else:
                                logger.error(error)

                    img_path = path

                    first_image = True
                    for image in gallery:  # add if gallery > 10, then just add a link (would be too large for the note)
                        image_name = image.title if image.title is not None else ""
                        # image_description = image.description if image.description is not None else ""
                        if image.description != title and image.description is not None:
                            image_description = image.description
                        else:
                            image_description = ""
                        image_filetype = image.type.split('/')[1]
                        image_id = image.id
                        image_link = image.link
                        # sets up downloaded filename and html for embedding image
                        base_filename = "{}_image.{}".format(
                            image_id, image_filetype)
                        img_json = [{
                            "name": image_name,
                            "filename": base_filename,
                            "description": image_description
                        }]
                        filename = img_path + "/" + base_filename
                        # only download if file doesn't already exist
                        if os.path.exists(filename) and (
                                os.path.getsize(filename) > 0):
                            image_downloaded = True
                            logger.info('Image already exists - {}'.format(
                                base_filename))
                        else:
                            image_downloaded = self.image_saver(
                                image_link, filename)

                        if image_downloaded:
                            logger.info(
                                'Image downloaded - {}'.format(base_filename))
                            self._add_image_to_db(base_filename, filename)

                        if first_image:
                            summary = '<a href="/img/{0}"><img src="/img/{0}"' \
                                      ' class="sr-image img-responsive"></a>'.format(base_filename)
                            first_image = False

                        body += img_json

                    post = models.Post(permalink=permalink,
                                       title=title + " - Album",
                                       body_content=json.dumps(body),
                                       date_posted=date,
                                       author_id=user.id,
                                       code=name,
                                       type='album',
                                       summary=summary,
                                       comments=comments)

                # ========== #
                # IS ARTICLE #
                # ========== #
                elif hasattr(i, 'title') and i.is_self is False:
                    logger.debug('{} is article/webpage'.format(name))
                    url = i.url
                    html = None
                    try:
                        # Set header to trick some sites into letting the script pull the article
                        header = {
                            'User-Agent':
                            'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) '
                            'Gecko/2009021910 Firefox/3.0.7'
                        }
                        request = urllib.request.Request(url, headers=header)
                        with urllib.request.urlopen(request) as response:
                            html = response.read()
                    except urllib.error.HTTPError as e:
                        self.logger.error(
                            "Unable to access article url\n %s\n %s\n %s", e,
                            url, i.name)
                        continue
                    except urllib.error.URLError as e:
                        self.logger.error(
                            "Unable to access article url\n %s\n %s\n %s", e,
                            url, i.name)
                        continue

                    article = Document(html)
                    article_text = article.summary()
                    article_text = bleach.clean(article_text,
                                                tags=self.allowed_tags,
                                                attributes=self.allowed_attrs,
                                                strip=True)
                    summary = article_text[:600]
                    summary = bleach.clean(summary,
                                           tags=self.allowed_tags,
                                           attributes=self.allowed_attrs,
                                           strip=True)
                    article_text = self._make_article_img_responsive(
                        article_text)
                    article_text = '<a href="{}">Original article</a>'.format(
                        url) + article_text

                    if article_text is None:  # if unable to parse document, manually set an error message
                        article_text = 'Unable to parse page - See <a href="{}">here</a> for the original link'.format(
                            url)
                    # article = "<a href='{}'>{}</a><br/>{}<br/>".format(url, title, article)  # source of article

                    post = models.Post(permalink=permalink,
                                       title=title,
                                       body_content=article_text,
                                       date_posted=date,
                                       author_id=user.id,
                                       code=name,
                                       type='article',
                                       summary=summary,
                                       comments=comments)

                # end of checking for saved items #
                try:
                    self.db.session.add(post)
                    self.db.session.commit()
                except InterfaceError:
                    self.db.session.rollback()
                    self.logger.error("Error adding post to db - {}".format(
                        post.title))
                    continue
                self.post_downloaded_count += 1
                logger.info('Saved ' + name + ' - ' + title[:255])

        # end of for loop
        logger.info("All items downloaded")
        self.set_output_thread_condition(2)
Exemplo n.º 22
0
from imgurpython import ImgurClient
from credentials import creds
from random import randint
import urllib
import os

client_id = creds['client_id']
client_secret = creds['client_secret']

client = ImgurClient(client_id, client_secret)

items = client.subreddit_gallery('earthporn',
                                 sort='time',
                                 window='week',
                                 page=0)

rand = randint(0, len(items) - 1)
image_id = items[rand].id

url = client.get_image(image_id).link

local_name = 'image.' + url[-3:]

urllib.request.urlretrieve(url, local_name)

os.system(
    'gsettings set org.cinnamon.desktop.background picture-uri "file:///home/john/PythonStuff/backchange/'
    + local_name + '"')
Exemplo n.º 23
0
class LinkedMediaHelper:
    """
    ImgurHelper provides methods to collect data / content from Imgur and Gfycat
    """
    def __init__(
        self,
        config: Configuration,
        imgur_secrets: str = 'imgur.secret',
        gfycat_secrets: str = 'gfycat.secret',
    ):
        self.logger = config.bot.logger
        self.save_dir = config.media.folder

        try:
            imgur_config = self._get_imgur_secrets(imgur_secrets)
            self.imgur_client = ImgurClient(
                imgur_config['Imgur']['ClientID'],
                imgur_config['Imgur']['ClientSecret'],
            )

            gfycat_config = self._get_gfycat_secrets(gfycat_secrets)
            self.gfycat_client = GfycatClient(
                gfycat_config['Gfycat']['ClientID'],
                gfycat_config['Gfycat']['ClientSecret'],
            )

        except ImgurClientError as imgur_error:
            self.logger.error('Error on creating ImgurClient: %s', imgur_error)
            self.logger.error(FATAL_TOOTBOT_ERROR)
            sys.exit(1)
        except GfycatClientError as gfycat_error:
            self.logger.error('Error on creating GfycatClient: %s',
                              gfycat_error)
            self.logger.error(FATAL_TOOTBOT_ERROR)
            sys.exit(1)

    def _get_gfycat_secrets(self,
                            gfycat_secrets: str) -> configparser.ConfigParser:
        """
        _get_gfycat_secrets checks if the Gfycat api secrets file exists.
        - If the file exists, this methods reads the the files and returns the secrets in as a dict.
        - If the file doesn't exist it asks the user over stdin to supply these values and then
          saves them into the gfycat_secrets file

        Arguments:
            gfycat_secrets (string): file name of secrets file for API credentials

        Returns:
            imgur_config (dict): Dictionary containing the client id and client secret needed to
            login to Gfycat
        """

        if not os.path.exists(gfycat_secrets):
            self.logger.warning(
                'Gfycat API keys not found. (See wiki if you need help).')

            # Whitespaces are stripped from input: https://stackoverflow.com/a/3739939
            gfycat_client_id = ''.join(
                input("[ .. ] Enter Gfycat client ID: ").split())
            gfycat_client_secret = ''.join(
                input("[ .. ] Enter Gfycat client secret: ").split())
            # Make sure authentication is working
            try:
                gfycat_client = GfycatClient(gfycat_client_id,
                                             gfycat_client_secret)

                # If this call doesn't work, it'll throw an ImgurClientError
                gfycat_client.query_gfy('oddyearlyhorsefly')
                # It worked, so save the keys to a file
                gfycat_config = configparser.ConfigParser()
                gfycat_config['Gfycat'] = {
                    'ClientID': gfycat_client_id,
                    'ClientSecret': gfycat_client_secret,
                }
                with open(gfycat_secrets, 'w') as file:
                    gfycat_config.write(file)
                file.close()
            except GfycatClientError as gfycat_error:
                self.logger.error('Error while logging into Gfycat: %s',
                                  gfycat_error)
                self.logger.error(FATAL_TOOTBOT_ERROR)
                sys.exit(1)
        else:
            # Read API keys from secret file
            gfycat_config = configparser.ConfigParser()
            gfycat_config.read(gfycat_secrets)

        return gfycat_config

    def _get_imgur_secrets(self,
                           imgur_secrets: str) -> configparser.ConfigParser:
        """
        _get_imgur_secrets checks if the Imgur api secrets file exists.
        - If the file exists, this methods reads the the files and returns the secrets in as a dict.
        - If the file doesn't exist it asks the user over stdin to supply these values and then
          saves them into the imgur_secrets file

        Arguments:
            imgur_secrets (string): file name of secrets file for API credentials

        Returns:
            imgur_config (dict): Dictionary containing the client id and client secret needed to
            login to Imgur
        """

        if not os.path.exists(imgur_secrets):
            self.logger.warning(
                'Imgur API keys not found. (See wiki if you need help).')

            # Whitespaces are stripped from input: https://stackoverflow.com/a/3739939
            imgur_client_id = ''.join(
                input("[ .. ] Enter Imgur client ID: ").split())
            imgur_client_secret = ''.join(
                input("[ .. ] Enter Imgur client secret: ").split())
            # Make sure authentication is working
            try:
                imgur_client = ImgurClient(imgur_client_id,
                                           imgur_client_secret)

                # If this call doesn't work, it'll throw an ImgurClientError
                imgur_client.get_album('dqOyj')
                # It worked, so save the keys to a file
                imgur_config = configparser.ConfigParser()
                imgur_config['Imgur'] = {
                    'ClientID': imgur_client_id,
                    'ClientSecret': imgur_client_secret,
                }
                with open(imgur_secrets, 'w') as file:
                    imgur_config.write(file)
                file.close()
            except ImgurClientError as imgur_error:
                self.logger.error('Error while logging into Imgur: %s',
                                  imgur_error)
                self.logger.error(FATAL_TOOTBOT_ERROR)
                sys.exit(1)
        else:
            # Read API keys from secret file
            imgur_config = configparser.ConfigParser()
            imgur_config.read(imgur_secrets)

        return imgur_config

    def get_imgur_image(self, img_url: str, max_images: int = 4) -> List[str]:
        """
        get_imgur_image downloads images from imgur.

        Arguments:
            img_url: url of imgur image to download
            max_images: maximum number of images to download and process, Defaults to 4

        Returns:
            file_paths (string): path to downloaded image or None if no image was downloaded
        """

        # Working demo of regex: https://regex101.com/r/G29uGl/2
        regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)"
        regex_match = re.search(regex, img_url, flags=0)

        if not regex_match:
            self.logger.error(
                'Could not identify Imgur image/gallery ID at: %s', img_url)
            return []

        # Get the Imgur image/gallery ID
        imgur_id = regex_match.group(1)

        image_urls = self._get_image_urls(img_url, imgur_id)

        # Download and process individual images (up to max_images)
        imgur_paths = []
        for image_url in image_urls:
            # If the URL is a GIFV or MP4 link, change it to the GIF version
            file_extension = os.path.splitext(image_url)[-1].lower()
            if file_extension == '.gifv':
                file_extension = '.gif'
                image_url = image_url.replace('.gifv', '.gif')
            elif file_extension == '.mp4':
                file_extension = '.gif'
                image_url = image_url.replace('.mp4', '.gif')

            # Download the image
            file_path = self.save_dir + '/' + imgur_id + '_' + str(
                len(imgur_paths)) + file_extension
            self.logger.info('Downloading Imgur image at URL %s to %s',
                             image_url, file_path)
            current_image = save_file(image_url, file_path, self.logger)

            # Imgur will sometimes return a single-frame thumbnail
            # instead of a GIF, so we need to check for this
            if file_extension != '.gif' or self._check_imgur_gif(file_path):
                imgur_paths.append(current_image)

            if len(imgur_paths) == max_images:
                break

        return imgur_paths

    def _get_image_urls(self, img_url: str, imgur_id: str) -> List[str]:
        """
        _get_image_urls builds a list of urls of all Imgur images identified by imgur_id

        Arguments:
            img_url: URL to IMGUR post
            imgur_id: ID for IMGUR post

        Returns:
            imgur_urls: List of urls to images of Imgur post identified byr imgur_id
        """
        image_urls = []
        try:
            if any(s in img_url
                   for s in ('/a/', '/gallery/')):  # Gallery links
                self.logger.info('Imgur link points to gallery: %s', img_url)
                images = self.imgur_client.get_album_images(imgur_id)
                for image in images:
                    image_urls.append(image.link)
            else:  # Single image
                image_urls = [self.imgur_client.get_image(imgur_id).link]
        except ImgurClientError as imgur_error:
            self.logger.error('Could not get information from imgur: %s',
                              imgur_error)
        return image_urls

    def _check_imgur_gif(self, file_path: str) -> bool:
        """
        _check_imgur_gif checks if a file downloaded from imgur is indeed a gif. If file is not
        a gif, remove the file.

        Arguments:
            file_path: file name and path to downloaded image

        Returns:
             True if downloaded image is indeed a GIF, otherwise returns False
        """
        img = PILImage.open(file_path)
        mime = PILImage.MIME[img.format]
        img.close()

        if mime != 'image/gif':
            self.logger.warning('Imgur: not a GIF, not posting')
            try:
                os.remove(file_path)
            except OSError as remove_error:
                self.logger.error('Error while deleting media file: %s',
                                  remove_error)
            return False

        return True

    def get_gfycat_image(self, img_url: str) -> Optional[str]:
        """
        get_gfycat_image downloads full resolution images from gfycat.

        Arguments:
            img_url (string): url of gfycat image to download

        Returns:
            file_path (string): path to downloaded image or None if no image was downloaded
        """
        gfycat_url = ""
        file_path = self.save_dir + '/'
        try:
            gfycat_name = os.path.basename(urlsplit(img_url).path)
            response = requests.get(img_url)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'lxml')
            for tag in soup.find_all("source", src=True):
                src = tag['src']
                if "giant" in src and "mp4" in src:
                    gfycat_url = src
            file_path += gfycat_name + '.mp4'
        except (requests.ConnectionError, requests.Timeout, requests.HTTPError,
                GfycatClientError) as gfycat_error:
            self.logger.error('Error downloading Gfycat link: %s',
                              gfycat_error)
            return None

        if gfycat_url == '':
            self.logger.debug('Empty Gfycat URL; no attachment to download')
            return None

        self.logger.info('Downloading Gfycat at URL %s to %s', gfycat_url,
                         file_path)
        return save_file(gfycat_url, file_path, self.logger)

    def get_reddit_image(self, img_url: str) -> str:
        """
        get_reddit_image downloads full resolution images from i.reddit or reddituploads.

        Arguments:
            img_url (string): url of imgur image to download

        Returns:
            file_path (string): path to downloaded image or None if no image was downloaded
        """
        file_name = os.path.basename(urlsplit(img_url).path)
        file_extension = os.path.splitext(img_url)[1].lower()
        # Fix for issue with i.reddituploads.com links not having a
        # file extension in the URL
        if not file_extension:
            file_extension += '.jpg'
            file_name += '.jpg'
            img_url += '.jpg'
        # Download the file
        file_path = self.save_dir + '/' + file_name
        self.logger.info(
            'Downloading file at URL %s to %s, file type identified as %s',
            img_url,
            file_path,
            file_extension,
        )
        return save_file(img_url, file_path, self.logger)

    def get_reddit_gallery(self,
                           reddit_post: Submission,
                           max_images: int = 4) -> List[str]:
        """
        get_reddit_gallery downloads up to max_images images from a reddit gallery post and returns
        a List of file_paths downloaded images

        Arguments:
            reddit_post (reddit_post):  reddit post / submission object
            max_images (int): [optional] maximum number of images to download. Default is 4

        Returns:
            file_paths (List[str]) a list of the paths to downloadeed files. If no images have been
            downloaded, and empty list will be returned.
        """
        file_paths = []
        for item in sorted(reddit_post.gallery_data['items'],
                           key=lambda x: x['id']):
            media_id = item['media_id']
            meta = reddit_post.media_metadata[media_id]
            self.logger.debug('Media Metadata: %s', meta)
            if 'e' in meta and meta['e'] == 'Image':
                source = meta['s']
                save_path = self.save_dir + '/' + media_id + '.' + meta[
                    'm'].split('/')[1]
                self.logger.info('Gallery file_path, source: %s - %s',
                                 save_path, source['u'])
                self.logger.debug('A[%4dx%04d] %s' %
                                  (source['x'], source['y'], source['u']))
                file_paths.append(
                    save_file(source['u'], save_path, self.logger))

                if len(file_paths) == max_images:
                    break

        return file_paths

    def get_reddit_video(self, reddit_post: Submission) -> str:
        """
        get_reddit_video downloads full resolution video from i.reddit or reddituploads.

        Arguments:
            reddit_post (reddit_post): reddit post / submission object

        Returns:
            file_path (string): path to downloaded video or None if no image was downloaded
        """
        # Get URL for MP4 version of reddit video
        video_url = reddit_post.media['reddit_video']['fallback_url']
        file_path = self.save_dir + '/' + reddit_post.id + '.mp4'
        self.logger.info('Downloading Reddit video at URL %s to %s', video_url,
                         file_path)
        return save_file(video_url, file_path, self.logger)

    def get_giphy_image(self, img_url: str) -> Optional[str]:
        """
        get_giphy_image downloads full or low resolution image from giphy

        Arguments:
            img_url (string): url of giphy image to download

        Returns:
            file_path (string): path to downloaded image or None if no image was downloaded
        """
        # Working demo of regex: https://regex101.com/r/o8m1kA/2
        regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)"
        match = re.search(regex, img_url, flags=0)
        if not match:
            self.logger.error('Could not identify Giphy ID in this URL: %s',
                              img_url)
            return None

        # Get the Giphy ID
        giphy_id = match.group(3)
        # Download the MP4 version of the GIF
        giphy_url = 'https://media.giphy.com/media/' + giphy_id + '/giphy.mp4'
        file_path = self.save_dir + '/' + giphy_id + 'giphy.mp4'
        giphy_file = save_file(giphy_url, file_path, self.logger)
        self.logger.info('Downloading Giphy at URL %s to %s', giphy_url,
                         file_path)

        return giphy_file

    def get_generic_image(self, img_url: str) -> Optional[str]:
        """
        get_generic_image downloads image or video from a generic url to a media file.

        Arguments:
            img_url (string): url to image or video file

        Returns:
            file_path (string): path to downloaded video or None if no image was downloaded
        """
        # First check if URL starts with http:// or https://
        regex = r"^https?://"
        match = re.search(regex, img_url, flags=0)
        if not match:
            self.logger.info('Post link is not a full link: %s', img_url)
            return None

        # Check if URL is an image or MP4 file, based on the MIME type
        image_formats = ('image/png', 'image/jpeg', 'image/gif', 'image/webp',
                         'video/mp4')
        try:
            img_site = urlopen(img_url)
        except (URLError, UnicodeEncodeError) as url_error:
            self.logger.error('Error while opening URL %s', url_error)
            return None

        meta = img_site.info()
        if meta["content-type"] not in image_formats:
            self.logger.error('URL does not point to a valid image file: %s',
                              img_url)
            return None

        # URL appears to be an image, so download it
        file_name = os.path.basename(urlsplit(img_url).path)
        file_path = self.save_dir + '/' + file_name
        self.logger.info('Downloading file at URL %s to %s', img_url,
                         file_path)
        return save_file(img_url, file_path, self.logger)
Exemplo n.º 24
0
class ImgurStorage(Storage):
    """
    A storage class providing access to resources in a Dropbox Public folder.
    """

    def __init__(self, location='/'):
        self.client = ImgurClient(
            CONSUMER_ID,
            CONSUMER_SECRET,
            ACCESS_TOKEN,
            ACCESS_TOKEN_REFRESH)
        logger.info("Logged in Imgur storage")
        self.account_info = self.client.get_account(USERNAME)
        self.albums = self.client.get_account_albums(USERNAME)
        self.location = location
        self.base_url = 'https://api.imgur.com/3/account/{url}/'.format(url=self.account_info.url)

    def _get_abs_path(self, name):
        return os.path.join(self.location, name)

    def _open(self, name, mode='rb'):
        remote_file = self.client.get_image(name, self, mode=mode)
        return remote_file

    def _save(self, name, content):
        name = self._get_abs_path(name)
        directory = os.path.dirname(name)
        logger.info([a.title for a in self.albums])
        logger.info(name)
        logger.info(directory)
        if not self.exists(directory) and directory:
            album = self.client.create_album({"title": directory})
            self.albums = self.client.get_account_albums(USERNAME)
        album = [a for a in self.albums if a.title == directory][0]
        #if not response['is_dir']:
        #     raise IOError("%s exists and is not a directory." % directory)
        response = self._client_upload_from_fd(content, {"album": album.id, "name": name, "title": name}, False)
        return response["name"]

    def _client_upload_from_fd(self, fd, config=None, anon=True):
        """ use a file descriptor to perform a make_request """
        if not config:
            config = dict()

        contents = fd.read()
        b64 = base64.b64encode(contents)

        data = {
            'image': b64,
            'type': 'base64',
        }

        data.update({meta: config[meta] for meta in set(self.client.allowed_image_fields).intersection(config.keys())})
        return self.client.make_request('POST', 'upload', data, anon)

    def delete(self, name):
        name = self._get_abs_path(name)
        self.client.delete_image(name)

    def exists(self, name):
        name = self._get_abs_path(name)
        if len([a for a in self.albums if a.title == name]) > 0:
            return True
        try:
            album = [a for a in self.albums if a.title == os.path.dirname(name)][0]
            images = self.client.get_album_images(album.id)
            metadata = self.client.get_image(name)
            if len([im for im in images if im.name == name]) > 0:
                logger.info(dir(metadata))
                return True
        except ImgurClientError as e:
            if e.status_code == 404: # not found
                return False
            raise e
        except IndexError as e:
            return False
        else:
            return True
        return False

    def listdir(self, path):
        path = self._get_abs_path(path)
        response = self.client.get_image(path)
        directories = []
        files = []
        for entry in response.get('contents', []):
            if entry['is_dir']:
                directories.append(os.path.basename(entry['path']))
            else:
                files.append(os.path.basename(entry['path']))
        return directories, files

    def size(self, path):
        cache_key = 'django-imgur-size:%s' % filepath_to_uri(path)
        size = cache.get(cache_key)

        if not size:
            directory = os.path.dirname(path)
            name = os.path.basename(path)
            album = [a for a in self.albums if a.title == directory][0]
            images = self.client.get_album_images(album.id)
            image = [im for im in images if im.name == path][0]
            size = self.client.get_image(image.id).size
            cache.set(cache_key, size)

        return size

    def url(self, path):
        cache_key = 'django-imgur-url:%s' % filepath_to_uri(path)
        url = cache.get(cache_key)

        if not url:
            directory = os.path.dirname(path)
            name = os.path.basename(path)
            album = [a for a in self.albums if a.title == directory][0]
            images = self.client.get_album_images(album.id)
            image = [im for im in images if im.name == path][0]
            url = self.client.get_image(image.id).link
            cache.set(cache_key, url)

        return url

    def get_available_name(self, name, max_length=None):
        """
        Returns a filename that's free on the target storage system, and
        available for new content to be written to.
        """
        #name = self._get_abs_path(name)
        #dir_name, file_name = os.path.split(name)
        #file_root, file_ext = os.path.splitext(file_name)
        ## If the filename already exists, add an underscore and a number (before
        ## the file extension, if one exists) to the filename until the generated
        ## filename doesn't exist.
        #count = itertools.count(1)
        #while self.exists(name):
        #    # file_ext includes the dot.
        #    name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext))

        return name
Exemplo n.º 25
0
class ImgurStorage(Storage):
    """
    A storage class providing access to resources in a Dropbox Public folder.
    """
    def __init__(self, location='/'):
        self.client = ImgurClient(CONSUMER_ID, CONSUMER_SECRET, ACCESS_TOKEN,
                                  ACCESS_TOKEN_REFRESH)
        logger.info("Logged in Imgur storage")
        self.account_info = self.client.get_account(USERNAME)
        self.albums = self.client.get_account_albums(USERNAME)
        self.location = location
        self.base_url = 'https://api.imgur.com/3/account/{url}/'.format(
            url=self.account_info.url)

    def _get_abs_path(self, name):
        return os.path.join(self.location, name)

    def _open(self, name, mode='rb'):
        remote_file = self.client.get_image(name, self, mode=mode)
        return remote_file

    def _save(self, name, content):
        name = self._get_abs_path(name)
        directory = os.path.dirname(name)
        logger.info([a.title for a in self.albums])
        logger.info(name)
        logger.info(directory)
        if not self.exists(directory) and directory:
            album = self.client.create_album({"title": directory})
            self.albums = self.client.get_account_albums(USERNAME)
        album = [a for a in self.albums if a.title == directory][0]
        #if not response['is_dir']:
        #     raise IOError("%s exists and is not a directory." % directory)
        response = self._client_upload_from_fd(content, {
            "album": album.id,
            "name": name,
            "title": name
        }, False)
        return response["name"]

    def _client_upload_from_fd(self, fd, config=None, anon=True):
        """ use a file descriptor to perform a make_request """
        if not config:
            config = dict()

        contents = fd.read()
        b64 = base64.b64encode(contents)

        data = {
            'image': b64,
            'type': 'base64',
        }

        data.update({
            meta: config[meta]
            for meta in set(self.client.allowed_image_fields).intersection(
                list(config.keys()))
        })
        return self.client.make_request('POST', 'upload', data, anon)

    def delete(self, name):
        name = self._get_abs_path(name)
        self.client.delete_image(name)

    def exists(self, name):
        name = self._get_abs_path(name)
        if len([a for a in self.albums if a.title == name]) > 0:
            return True
        try:
            album = [
                a for a in self.albums if a.title == os.path.dirname(name)
            ][0]
            images = self.client.get_album_images(album.id)
            metadata = self.client.get_image(name)
            if len([im for im in images if im.name == name]) > 0:
                logger.info(dir(metadata))
                return True
        except ImgurClientError as e:
            if e.status_code == 404:  # not found
                return False
            raise e
        except IndexError as e:
            return False
        else:
            return True
        return False

    def listdir(self, path):
        path = self._get_abs_path(path)
        response = self.client.get_image(path)
        directories = []
        files = []
        for entry in response.get('contents', []):
            if entry['is_dir']:
                directories.append(os.path.basename(entry['path']))
            else:
                files.append(os.path.basename(entry['path']))
        return directories, files

    def size(self, path):
        cache_key = 'django-imgur-size:%s' % filepath_to_uri(path)
        size = cache.get(cache_key)

        if not size:
            directory = os.path.dirname(path)
            name = os.path.basename(path)
            album = [a for a in self.albums if a.title == directory][0]
            images = self.client.get_album_images(album.id)
            image = [im for im in images if im.name == path][0]
            size = self.client.get_image(image.id).size
            cache.set(cache_key, size)

        return size

    def url(self, path):
        cache_key = 'django-imgur-url:%s' % filepath_to_uri(path)
        url = cache.get(cache_key)

        if not url:
            directory = os.path.dirname(path)
            name = os.path.basename(path)
            album = [a for a in self.albums if a.title == directory][0]
            images = self.client.get_album_images(album.id)
            image = [im for im in images if im.name == path][0]
            url = self.client.get_image(image.id).link
            cache.set(cache_key, url)

        return url

    def get_available_name(self, name, max_length=None):
        """
        Returns a filename that's free on the target storage system, and
        available for new content to be written to.
        """
        #name = self._get_abs_path(name)
        #dir_name, file_name = os.path.split(name)
        #file_root, file_ext = os.path.splitext(file_name)
        ## If the filename already exists, add an underscore and a number (before
        ## the file extension, if one exists) to the filename until the generated
        ## filename doesn't exist.
        #count = itertools.count(1)
        #while self.exists(name):
        #    # file_ext includes the dot.
        #    name = os.path.join(dir_name, "%s_%s%s" % (file_root, count.next(), file_ext))

        return name
Exemplo n.º 26
0
def get_media(img_url, IMGUR_CLIENT, IMGUR_CLIENT_SECRET):
    # Make sure config file exists
    try:
        config = configparser.ConfigParser()
        config.read('config.ini')
    except BaseException as e:
        print('[EROR] Error while reading config file:', str(e))
        sys.exit()
    # Make sure media folder exists
    IMAGE_DIR = config['MediaSettings']['MediaFolder']
    if not os.path.exists(IMAGE_DIR):
        os.makedirs(IMAGE_DIR)
        print('[ OK ] Media folder not found, created a new one')
    # Download and save the linked image
    if any(s in img_url
           for s in ('i.redd.it',
                     'i.reddituploads.com')):  # Reddit-hosted images
        file_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
        file_extension = os.path.splitext(img_url)[-1].lower()
        # Fix for issue with i.reddituploads.com links not having a file extension in the URL
        if not file_extension:
            file_extension += '.jpg'
            file_name += '.jpg'
            img_url += '.jpg'
        # Download the file
        file_path = IMAGE_DIR + '/' + file_name
        print('[ OK ] Downloading file at URL ' + img_url + ' to ' +
              file_path + ', file type identified as ' + file_extension)
        img = save_file(img_url, file_path)
        return img
    elif ('v.redd.it' in img_url):  # Reddit video
        print(
            '[WARN] Reddit videos can not be uploaded to Twitter, due to API limitations'
        )
        return
    elif ('imgur.com' in img_url):  # Imgur
        try:
            client = ImgurClient(IMGUR_CLIENT, IMGUR_CLIENT_SECRET)
        except BaseException as e:
            print('[EROR] Error while authenticating with Imgur:', str(e))
            return
        # Working demo of regex: https://regex101.com/r/G29uGl/2
        regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)"
        m = re.search(regex, img_url, flags=0)
        if m:
            # Get the Imgur image/gallery ID
            id = m.group(1)
            if any(s in img_url
                   for s in ('/a/', '/gallery/')):  # Gallery links
                images = client.get_album_images(id)
                # Only the first image in a gallery is used
                imgur_url = images[0].link
            else:  # Single image
                imgur_url = client.get_image(id).link
            # If the URL is a GIFV or MP4 link, change it to the GIF version
            file_extension = os.path.splitext(imgur_url)[-1].lower()
            if (file_extension == '.gifv'):
                file_extension = file_extension.replace('.gifv', '.gif')
                imgur_url = imgur_url.replace('.gifv', '.gif')
            elif (file_extension == '.mp4'):
                file_extension = file_extension.replace('.mp4', '.gif')
                imgur_url = imgur_url.replace('.mp4', '.gif')
            # Download the image
            file_path = IMAGE_DIR + '/' + id + file_extension
            print('[ OK ] Downloading Imgur image at URL ' + imgur_url +
                  ' to ' + file_path)
            imgur_file = save_file(imgur_url, file_path)
            # Imgur will sometimes return a single-frame thumbnail instead of a GIF, so we need to check for this
            if (file_extension == '.gif'):
                # Open the file using the Pillow library
                img = Image.open(imgur_file)
                # Get the MIME type
                mime = Image.MIME[img.format]
                if (mime == 'image/gif'):
                    # Image is indeed a GIF, so it can be posted
                    img.close()
                    return imgur_file
                else:
                    # Image is not actually a GIF, so don't post it
                    print(
                        '[WARN] Imgur has not processed a GIF version of this link, so it can not be posted to Twitter'
                    )
                    img.close()
                    # Delete the image
                    try:
                        os.remove(imgur_file)
                    except BaseException as e:
                        print('[EROR] Error while deleting media file:',
                              str(e))
                    return
            else:
                return imgur_file
        else:
            print(
                '[EROR] Could not identify Imgur image/gallery ID in this URL:',
                img_url)
            return
    elif ('gfycat.com' in img_url):  # Gfycat
        try:
            gfycat_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
            client = GfycatClient()
            gfycat_info = client.query_gfy(gfycat_name)
        except BaseException as e:
            print('[EROR] Error downloading Gfycat link:', str(e))
            return
        # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs
        gfycat_url = gfycat_info['gfyItem']['max2mbGif']
        file_path = IMAGE_DIR + '/' + gfycat_name + '.gif'
        print('[ OK ] Downloading Gfycat at URL ' + gfycat_url + ' to ' +
              file_path)
        gfycat_file = save_file(gfycat_url, file_path)
        return gfycat_file
    elif ('giphy.com' in img_url):  # Giphy
        # Working demo of regex: https://regex101.com/r/o8m1kA/2
        regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)"
        m = re.search(regex, img_url, flags=0)
        if m:
            # Get the Giphy ID
            id = m.group(3)
            # Download the 2MB version because Tweepy has a 3MB upload limit for GIFs
            giphy_url = 'https://media.giphy.com/media/' + id + '/giphy-downsized.gif'
            file_path = IMAGE_DIR + '/' + id + '-downsized.gif'
            print('[ OK ] Downloading Giphy at URL ' + giphy_url + ' to ' +
                  file_path)
            giphy_file = save_file(giphy_url, file_path)
            # Check the hash to make sure it's not a GIF saying "This content is not available"
            # More info: https://github.com/corbindavenport/tootbot/issues/8
            hash = hashlib.md5(file_as_bytes(open(giphy_file,
                                                  'rb'))).hexdigest()
            if (hash == '59a41d58693283c72d9da8ae0561e4e5'):
                print(
                    '[WARN] Giphy has not processed a 2MB GIF version of this link, so it can not be posted to Twitter'
                )
                return
            else:
                return giphy_file
        else:
            print('[EROR] Could not identify Giphy ID in this URL:', img_url)
            return
    else:
        # Check if URL is an image, based on the MIME type
        image_formats = ('image/png', 'image/jpeg', 'image/gif', 'image/webp')
        img_site = urlopen(img_url)
        meta = img_site.info()
        if meta["content-type"] in image_formats:
            # URL appears to be an image, so download it
            file_name = os.path.basename(urllib.parse.urlsplit(img_url).path)
            file_path = IMAGE_DIR + '/' + file_name
            print('[ OK ] Downloading file at URL ' + img_url + ' to ' +
                  file_path)
            try:
                img = save_file(img_url, file_path)
                return img
            except BaseException as e:
                print('[EROR] Error while downloading image:', str(e))
                return
        else:
            print('[EROR] URL does not point to a valid image file')
            return
Exemplo n.º 27
0
class ImgurExtractor(BaseExtractor):

    url_key = 'imgur'

    def __init__(self, post, reddit_object, content_display_only=False):
        """
        A subclass of the BaseExtractor class.  This class interacts exclusively with the imgur website through the
        imgur api via ImgurPython
        """
        super().__init__(post, reddit_object, content_display_only)
        self.connected = False
        self.imgur_client_id = self.settings_manager.imgur_client_id
        self.imgur_client_secret = self.settings_manager.imgur_client_secret
        if self.imgur_client_id is None or self.imgur_client_secret is None:
            if LogUtils.imgur_client_error_log_count < 1:
                LogUtils.imgur_client_error_log_count += 1
                message = 'No valid Imgur client detected.  In order to download content from imgur.com, you must ' \
                          'have a valid imgur client id and client secret.  Please see the imgur client information' \
                          'dialog in the settings menu.'
                self.handle_failed_extract(message=message, imgur_client_id_valid=self.imgur_client_id is not None,
                                           imgur_client_secret_valid=self.imgur_client_secret is not None)
        else:
            try:
                self.client = ImgurClient(self.imgur_client_id, self.imgur_client_secret)
                self.connected = True
            except ImgurClientError as e:
                if e.status_code == 500:
                    self.over_capacity_error()
                else:
                    self.unknown_connection_error(e.status_code)
            except:
                message = 'Failed to connect to imgur.com'
                self.handle_failed_extract(message=message, save=True, extractor_error_message=message)

    def extract_content(self):
        """Dictates what type of page container a link is and then dictates which extraction method should be used"""
        if self.connected:
            try:
                if 'i.imgur' in self.url:
                    self.extract_direct_link()

                elif "/a/" in self.url:
                    self.extract_album()
                elif '/gallery/' in self.url:
                    try:
                        self.extract_album()
                    except:
                        pass
                elif self.url.lower().endswith(Const.ALL_EXT):
                    self.extract_direct_mislinked()
                else:
                    self.extract_single()
            except ImgurClientError as e:
                self.handle_client_error(e.status_code)
            except ImgurClientRateLimitError:
                self.rate_limit_exceeded_error()
            except:
                self.failed_to_locate_error()

    def handle_client_error(self, status_code):
        if status_code == 403:
            if self.client.credits['ClientRemaining'] is None:
                self.failed_to_locate_error()
            elif self.client.credits['ClientRemaining'] <= 0:
                self.no_credit_error()
            else:
                self.failed_to_locate_error()
        if status_code == 429:
            self.rate_limit_exceeded_error()
        if status_code == 500:
            self.over_capacity_error()
        if status_code == 404:
            self.does_not_exist_error()

    def rate_limit_exceeded_error(self):
        message = 'Imgur rate limit exceeded'
        self.handle_failed_extract(message=message, save=True, imgur_error_message='rate limit exceeded')

    def no_credit_error(self):
        message = 'Not enough imgur credits to extract post'
        self.handle_failed_extract(message=message, save=True, imgur_error_message='not enough credits')

    def over_capacity_error(self):
        message = 'Imgur is currently over capacity'
        self.handle_failed_extract(message=message, save=True, imgur_error_message='over capacity')

    def does_not_exist_error(self):
        message = 'Content does not exist.  This most likely means that the content has been deleted on Imgur but ' \
                  'the post still remains on reddit'
        self.handle_failed_extract(message=message, imgur_error_message='Content does not exist')

    def failed_to_locate_error(self):
        message = 'Failed to locate content'
        self.handle_failed_extract(message=message, extractor_error_message=message)

    def unknown_connection_error(self, status_code):
        message = 'Unknown imgur connection error'
        self.handle_failed_extract(message=message, save=True, status_code=status_code)

    def extract_album(self):
        count = 1
        domain, album_id = self.url.rsplit('/', 1)
        for pic in self.client.get_album_images(album_id):
            url = pic.link
            address, extension = url.rsplit('.', 1)
            file_name = self.get_filename(album_id)
            if pic.type == 'image/gif' and pic.animated:
                extension = 'mp4'
                url = pic.mp4
            self.make_content(url, file_name, extension, count)
            count += 1

    def extract_single(self):
        domain, image_id = self.url.rsplit('/', 1)
        pic = self.client.get_image(image_id)
        url = pic.link
        address, extension = url.rsplit('.', 1)
        file_name = self.get_filename(image_id)
        if pic.type == 'image/gif' and pic.animated:
            extension = 'mp4'
            url = pic.mp4
        self.make_content(url, file_name, extension)

    def extract_direct_link(self):
        for ext in Const.ALL_EXT:
            if ext in self.url:
                index = self.url.find(ext)
                url = '%s%s' % (self.url[:index], ext)

        try:
            domain, id_with_ext = url.rsplit('/', 1)
            image_id, extension = id_with_ext.rsplit('.', 1)
            file_name = self.get_filename(image_id)
            if url.endswith('gifv') or url.endswith('gif'):
                picture = self.client.get_image(image_id)
                if picture.type == 'image/gif' and picture.animated:
                    url = picture.mp4
                    extension = 'mp4'
            self.make_content(url, file_name, extension)
        except NameError:
            message = 'Unrecognized extension'
            self.handle_failed_extract(message=message, extractor_error_message=message)

    def extract_direct_mislinked(self):
        """
        All direct links to imgur.com must start with 'https://i.imgur.  Sometimes links get mis labeled somehow when
        they are posted.  This method is to add the correct address beginning to mislinked imgur urls and get a proper
        extraction
        """
        for ext in Const.ALL_EXT:
            if ext in self.url:
                index = self.url.find(ext)
                url = '%s%s' % (self.url[:index], ext)

        try:
            domain, id_with_ext = url.rsplit('/', 1)
            domain = 'https://i.imgur.com/'
            url = '%s%s' % (domain, id_with_ext)
            image_id, extension = id_with_ext.rsplit('.', 1)
            file_name = self.get_filename(image_id)
            if url.endswith('gifv') or url.endswith('gif'):
                picture = self.client.get_image(image_id)
                if picture.type == 'image/gif' and picture.animated:
                    url = picture.mp4
                    extension = 'mp4'
            self.make_content(url, file_name, extension)
        except NameError:
            message = 'Unrecognized extension'
            self.handle_failed_extract(message=message, extractor_error_message=message)
Exemplo n.º 28
0
def main():
    if not os.path.isfile('credentials.config'):  # if credentials file does not exist, start the first run function
        first_run()  # Authenticate and generate the credentials file.

    # command line switches function
    args = read_command_args()
    use_evernote = args.e
    debug_mode = args.debug
    delete_files = args.t if use_evernote is True else False
    path = args.p
    info_mode = args.i

    if debug_mode:
        # print("Warning - Debug mode active. Files will be downloaded, but not added to index")
        logger = create_logger(log_to_console=True)
        logger.setLevel(logging.DEBUG)
        logger.info('Warning - Debug mode active. Files will be downloaded, but not added to index')
    elif info_mode:
        warnings.warn("Suppressed Resource warning", ResourceWarning)  # suppresses sll unclosed socket warnings.
        logger = create_logger(log_to_console=True)
    else:
        warnings.warn("Suppressed Resource warning", ResourceWarning)  # suppresses sll unclosed socket warnings.
        logger = create_logger()

    logger.info("\n###########\nStarting SR\n###########")

    try:
        with open('credentials.config', 'r') as json_file:
            credentials = json.load(json_file)  # get various OAuth tokens
    except OSError:
        logger.error('Unable to open credentials file')
        raise SystemExit

    # Create the downloads folder on the specified path, or in the dir where file is stored.
    if path is not "":
        path = path[0]
    else:
        path = os.getcwd()
    path += "/SRDownloads"

    if not os.path.exists(path):
        os.makedirs(path)

    # Authenticate with Reddit
    logger.info('Authenticating with Reddit')
    client_id = credentials['reddit']['client_id']
    client_secret = credentials['reddit']['client_secret']
    redirect_uri = credentials['reddit']['redirect_uri']
    refresh_token = credentials['reddit']['refresh_token']
    user_agent = "SavedRetriever 0.9 by /u/fuzzycut"

    try:
        r = praw.Reddit(user_agent=user_agent,
                        oauth_client_id=client_id,
                        oauth_client_secret=client_secret,
                        oauth_redirect_uri=redirect_uri)

        access_information = r.refresh_access_information(refresh_token)
        r.set_access_credentials(**access_information)
    except Exception as e:
        logger.error(e)
        raise SystemExit
    time_since_accesstoken = time.time()

    index = set()
    if os.path.isfile('index.txt'):  # checking for  index file, which contains index of downloaded files.
        try:
            with open('index.txt', 'r') as ind:
                for line in ind:
                    index.add(line[:-1])  # -1 truncates the newline in the index file.
        except OSError:
            logger.error("Unable to open index file for reading")
            raise SystemExit

    if use_evernote is True:
        enclient = evernoteWrapper.Client(credentials['evernote']['dev_token'], 'Saved from Reddit')

    html_index_file = None
    if delete_files is False:  # only create index if we're going to use it.
        html_index_file = html_index.index(r.get_me().name, path)

    try:
        ind = open('index.txt', 'a')  # open index file for appending
    except OSError:
        logger.error("Unable to open index file for writing")
        raise SystemExit

    logger.info("Beginning to save files...")
    for i in r.get_me().get_saved(limit=None):
        if (time.time() - time_since_accesstoken) / 60 > 55:  # Refresh the access token before it runs out.
            logger.debug('Refreshing Reddit token')
            r.refresh_access_information(access_information['refresh_token'])
            time_since_accesstoken = time.time()

        name = i.name
        file_name = name  # to stop ide complaining.
        note = None
        evernote_tags = ('Reddit', 'SavedRetriever', '/r/' + i.subreddit.display_name)  # add config for this later

        # logger.info('Saving post - {}'.format(name))

        if name not in index:  # file has not been downloaded
            permalink = i.permalink
            author = i.author
            title = i.link_title if hasattr(i, 'link_title') else i.title
            # ========== #
            # IS COMMENT #
            # ========== #
            if hasattr(i, 'body_html'):
                logger.debug("{} is comment".format(name))
                body = i.body_html

                # html output
                body = subreddit_linker(body)
                output = html_output_string(permalink, author, body, title)
                if delete_files is False:
                    file_name = html_writer(path, name, output)

                # en api section
                if use_evernote is True:
                    enclient.new_note(title)
                    enclient.add_html(output)
                    enclient.add_tag(*evernote_tags)  # the * is very important. It unpacks the tags tuple properly
                    note = enclient.create_note()
            # ============ #
            # IS SELF-POST #
            # ============ #
            elif hasattr(i, 'is_self') and i.is_self is True:
                logger.debug('{} is self-post'.format(name))
                text = i.selftext_html if i.selftext_html is not None else ""

                # html output
                text = subreddit_linker(text)
                output = html_output_string(permalink, author, text, title)
                if delete_files is False:
                    file_name = html_writer(path, name, output)

                # en api section
                if use_evernote is True:
                    enclient.new_note(title)
                    enclient.add_tag(*evernote_tags)
                    enclient.add_html(output)
                    note = enclient.create_note()
            # ====================== #
            # IS DIRECT LINKED IMAGE #
            # ====================== #
            elif hasattr(i, 'url') and re.sub("([^A-z0-9])\w+", "", i.url.split('.')[-1]) in ['jpg', 'png', 'gif', 'gifv', 'pdf']:
                """
                Need to check file types and test pdf. How does this handle gfycat and webm? Can EN display that inline?
                The regex in the if is to strip out non-valid filetype chars.
                """
                logger.debug('{} is direct linked image'.format(name))
                url = i.url
                base_filename = "{}_image.{}".format(name, re.sub("([^A-z0-9])\w+", "", url.split('.')[
                    -1]))  # filename for image. regex same as above.
                filename = path + "/" + base_filename

                # image downloader section
                if os.path.exists(filename) and (os.path.getsize(filename) > 0):  # If image exists and is valid
                    image_downloaded = True
                    logger.info("Image already exists - {}".format(base_filename))
                else:
                    image_downloaded = image_saver(url, filename)
                    logger.info('Downloaded image - {}'.format(base_filename))

                if image_downloaded:
                    # write image as <img> or link to local pdf downloaded in html file
                    if filename.split('.')[-1] == 'pdf':
                        img = '<a href="{}">Click here for link to downloaded pdf</a>'.format(base_filename)
                    else:
                        img = '<br><a href="{0}"><img src="{0}"></a>'.format(
                            base_filename)  # html for embedding in html file
                else:
                    img = "Image failed to download - It may be temporarily or permanently unavailable"

                # Evernote api section
                if use_evernote is True:
                    enclient.new_note(title)
                    enclient.add_tag(*evernote_tags)
                    enclient.add_html(html_output_string_image(permalink, author, "", title))  # should add body="" in the function
                    if image_downloaded:
                        enclient.add_resource(filename)
                    note = enclient.create_note()

                if delete_files is False:
                    file_name = html_writer(path, name, html_output_string_image(permalink, author, img, title))
                else:
                    os.remove(filename)
            # ============== #
            # IS IMGUR ALBUM #
            # ============== #
            elif hasattr(i, 'url') and 'imgur' in i.url:  # Add option to download images to folder.
                logger.debug('{} is Imgur album'.format(name))
                url = i.url
                body = "<h2>{}</h2>".format(title)

                # imgur api section
                client = ImgurClient(credentials['imgur']['client_id'], credentials['imgur']['client_secret'])
                pattern = '\/([A-z0-9]{5,7})'  # matches any 5-7 long word that comes after a forward slash (/).
                match = re.findall(pattern, url)
                gallery_id = match[-1].replace('/', '')  # removes any forward slashes for processing
                gallery = []
                filename = None
                try:
                    gallery = client.get_album_images(gallery_id)
                except imgurpython.helpers.error.ImgurClientError:  # if 'gallery' is actually just a lone image
                    try:
                        gallery = [client.get_image(gallery_id)]
                    except imgurpython.helpers.error.ImgurClientError as error:  # if gallery does not exist. Is this the best way to do this?
                        if debug_mode is True or error.status_code != 404:
                            print("**{} - {}**".format(error.status_code, error.error_message))

                # img_path = 'Downloads/{}'.format(gallery_id)
                img_path = path + "/" + gallery_id
                if not os.path.exists(img_path):
                    os.makedirs(img_path)
                for image in gallery:  # add if gallery > 10, then just add a link (would be too large for the note)
                    image_name = image.title if image.title is not None else ""
                    image_description = image.description if image.description is not None else ""
                    image_filetype = image.type.split('/')[1]
                    image_id = image.id
                    image_link = image.link
                    # sets up downloaded filename and html for embedding image
                    base_filename = "{}_image.{}".format(image_id, image_filetype)
                    img = '<p><h3>{0}</h3><a href="{1}/{2}"><img src="{1}/{2}"></a><br/>{3}</p>'.format(image_name,
                                                                                                        gallery_id,
                                                                                                        base_filename,
                                                                                                        image_description)
                    filename = img_path + "/" + base_filename
                    if os.path.exists(filename) and (os.path.getsize(filename) > 0):  # only download if file doesn't already exist
                        logger.info('Image already exists - {}'.format(base_filename))
                    else:
                        image_saver(image_link, filename)
                        logger.info('Image downloaded - {}'.format(base_filename))
                    body += img

                # Evernote api section
                if use_evernote is True:
                    enclient.new_note(title)
                    enclient.add_tag(*evernote_tags)
                    if len(gallery) == 1 and filename is not None:
                        enclient.add_html(html_output_string_image(permalink, author, "", title))
                        enclient.add_resource(filename)
                    else:
                        enclient.add_html(html_output_string_image(permalink, author,
                        'This album is too large to embed; please see <a href="{}">here</a> for the original link.'.format(url),
                                                             title))
                    note = enclient.create_note()

                if delete_files is False:
                    file_name = html_writer(path, name, html_output_string_image(permalink, author, body, title))
                else:
                    shutil.rmtree(img_path)
            # ========== #
            # IS ARTICLE #
            # ========== #
            elif hasattr(i, 'title') and i.is_self is False:
                # This section needs work. It is semi-complete. Ultimately, adding in the full article is the goal.
                logger.debug('{} is article/webpage'.format(name))
                url = i.url

                # readability api section
                os.environ["READABILITY_PARSER_TOKEN"] = credentials['readability'][
                    'parser_key']  # set the environment variable as the parser key
                logger.info('Initializing Readability Client')
                parse = ParserClient()  # readability api doesn't take the token directly
                parse_response = parse.get_article(url)
                article = parse_response.json()
                if 'content' not in article:  # if unable to parse document, manually set an error message
                    article['content'] = 'Unable to parse page - See <a href="{}">here</a> for the original link'.format(url)
                article = article['content']
                article = "<a href='{}'>{}</a><br/>{}<br/>".format(url, title, article)  # source of article

                # html output section.
                output = html_output_string(permalink, author, article, title)
                if delete_files is False:
                    file_name = html_writer(path, name, output)

                # Evernote section
                if use_evernote is True:
                    enclient.new_note(title)
                    enclient.add_tag(*evernote_tags)
                    output = html_output_string(permalink, author, article, title)
                    enclient.add_html(output)

                    # Add html file to note
                    # enclient.add_resource("Downloads/{}.html".format(name))
                    note = enclient.create_note()

            # end of checking for saved items #
            failed_upload = False
            if use_evernote is True:
                if note is not None:
                    # print("Saved {:9} - GUID: {}".format(name, note.guid))
                    logger.info('Saved {:9} - GUID: {}'.format(name, note.guid))
                else:  # Upload failed
                    # print("Saved {:9} - Note failed to upload".format(name))
                    logger.info('Saved {:9} - Note failed to upload'.format(name))
                    failed_upload = True
            elif use_evernote is False:
                # print("Saved " + name)
                logger.info('Saved ' + name)
            if not debug_mode and not failed_upload:
                ind.write(name + "\n")
                ind.flush()  # this fixes python not writing the file if it terminates before .close() can be called
                if delete_files is False:
                    html_index_file.add_link(title, file_name, permalink)

    # end of for loop
    ind.close()
    logger.info("All items downloaded")
    if delete_files is False:
        html_index_file.save_and_close()
    else:  # try remove downloads if -t is set, but don't force it if directory has things in it already.
        try:
            os.rmdir('Downloads')
        except OSError:
            logger.error("Unable to remove files")
Exemplo n.º 29
0
 def get_image(self,image_id):
     client = ImgurClient(**self.__class__.imgur_creds)
     image = client.get_image(image_id)
     return image
Exemplo n.º 30
0
def get_url(submission, mp4_instead_gif=True):
    '''
    return TYPE, URL
    E.x.: return 'img', 'http://example.com/pic.png'
    '''
    def what_is_inside(url):
        header = requests.head(url).headers
        if 'Content-Type' in header:
            return header['Content-Type']
        else:
            return ''

    # If reddit native gallery
    if hasattr(submission, 'gallery_data'):
        dict_of_dicts_of_pics = dict()
        list_of_media = dict()
        for item in submission.gallery_data['items']:
            list_of_media[item['id']] = item['media_id']
        counter = 0
        for item in sorted(list_of_media.items(), key=lambda item: item[0]):
            if counter % 10 == 0:
                dict_of_dicts_of_pics[counter // 10] = dict()
            item_with_media = submission.media_metadata[item[1]]['s']
            if 'u' in item_with_media:
                # It's a pic
                dict_of_dicts_of_pics[counter // 10][counter] = {
                    'url': item_with_media['u'],
                    'type': 'pic'
                }
            else:
                # It's a gif
                dict_of_dicts_of_pics[counter // 10][counter] = {
                    'url': item_with_media['mp4'],
                    'type': 'video'
                }
            counter += 1
        return TYPE_GALLERY, dict_of_dicts_of_pics

    url = submission.url
    url_content = what_is_inside(url)

    if submission.is_video:
        if 'reddit_video' in submission.media:
            if submission.media['reddit_video'].get('is_gif', False):
                return TYPE_GIF, submission.media['reddit_video'][
                    'fallback_url']
            return TYPE_VIDEO, submission.media['reddit_video']['fallback_url']
            # return TYPE_OTHER, url

    try:
        if len(submission.crosspost_parent_list) > 0:
            parent_submission_json = submission.crosspost_parent_list[0]
            if parent_submission_json['is_video'] == True:
                if 'reddit_video' in parent_submission_json['media']:
                    if parent_submission_json['media']['reddit_video'].get(
                            'is_gif', False):
                        return TYPE_GIF, parent_submission_json['media'][
                            'reddit_video']['fallback_url']
                    return TYPE_VIDEO, parent_submission_json['media'][
                        'reddit_video']['fallback_url']
    except:
        # Not a crosspost
        pass

    if (CONTENT_JPEG == url_content or CONTENT_PNG == url_content):
        return TYPE_IMG, url

    if CONTENT_GIF in url_content:
        if url.endswith('.gif') and mp4_instead_gif:
            # Let's try to find .mp4 file.
            url_mp4 = url[:-4] + '.mp4'
            if CONTENT_MP4 == what_is_inside(url_mp4):
                return TYPE_GIF, url_mp4
        return TYPE_GIF, url

    if url.endswith('.gifv'):
        if mp4_instead_gif:
            url_mp4 = url[:-5] + '.mp4'
            if CONTENT_MP4 == what_is_inside(url_mp4):
                return TYPE_GIF, url_mp4
        if CONTENT_GIF in what_is_inside(url[0:-1]):
            return TYPE_GIF, url[0:-1]

    if submission.is_self is True:
        # Self submission with text
        return TYPE_TEXT, None

    if urlparse(url).netloc == 'imgur.com':
        # Imgur
        imgur_config = yaml.safe_load(
            open(os.path.join('configs', 'imgur.yml')).read())
        imgur_client = ImgurClient(imgur_config['client_id'],
                                   imgur_config['client_secret'])
        path_parts = urlparse(url).path.split('/')
        if path_parts[1] == 'gallery':
            # TODO: gallary handling
            return TYPE_OTHER, url
        elif path_parts[1] == 'topic':
            # TODO: topic handling
            return TYPE_OTHER, url
        elif path_parts[1] == 'a':
            # An imgur album
            album = imgur_client.get_album(path_parts[2])
            story = dict()
            for num, img in enumerate(album.images):
                number = num + 1
                what = TYPE_IMG
                link = img['link']
                ext = img['type'].split('/')[1]
                if img['animated']:
                    what = TYPE_GIF
                    link = img['mp4'] if mp4_instead_gif else img['gifv'][:-1]
                    ext = 'mp4' if mp4_instead_gif else 'gif'
                story[number] = {'url': link, 'what': what, 'ext': ext}
            if len(story) == 1:
                return story[1]['what'], story[1]['url']
            return TYPE_ALBUM, story
        else:
            # Just imgur img
            img = imgur_client.get_image(path_parts[1].split('.')[0])
            if not img.animated:
                return TYPE_IMG, img.link
            else:
                if mp4_instead_gif:
                    return TYPE_GIF, img.mp4
                else:
                    # return 'gif', img.link
                    return TYPE_GIF, img.gifv[:-1]
    elif 'gfycat.com' in urlparse(url).netloc:
        rname = re.findall(r'gfycat.com\/(?:detail\/)?(\w*)', url)[0]
        try:
            r = requests.get(GFYCAT_GET + rname)
            if r.status_code != 200:
                logging.info('Gfy fail prevented!')
                return TYPE_OTHER, url
            urls = r.json()['gfyItem']
            if mp4_instead_gif:
                return TYPE_GIF, urls['mp4Url']
            else:
                return TYPE_GIF, urls['max5mbGif']
        except KeyError:
            logging.info('Gfy fail prevented!')
            return TYPE_OTHER, url
    else:
        return TYPE_OTHER, url
Exemplo n.º 31
0
class Bot:

    def __init__(self,  videobot, slave_bot):
        """
        Initializes the Imgur Bot with credentials stored in environment variables.
        :return: ImgurClient object
        """
        IMGUR_CLIENT_ID = os.environ.get("IMGUR_CLIENT_ID")
        IMGUR_CLIENT_SECRET = os.environ.get("IMGUR_CLIENT_SECRET")
        self.client = ImgurClient(IMGUR_CLIENT_ID, IMGUR_CLIENT_SECRET)
        self.supported_video_formats = ['gif','gifv', 'webm', 'mp4']
        self.slave_bot = slave_bot
        self.video_bot = videobot

    def handle_album(self, album_link):

        """
        handles imgur links of the format: imgur.com/a/<id>, imgur.com/<id>#<img id>
        :type album_link: 'str'
        :rtype message: 'str' - Analysis message from Bot.
        :rtype status: <Dict> - {'nsfw':<float>, 'sfw':<float>}
        """
        temp = album_link.split('/')[-1]
        album_id = temp.split('#')[0]

        message = None
        status = {}

        try:
            album = self.client.get_album(album_id=album_id)

            imgur_flag = album.nsfw


            if imgur_flag:
                message = 'Album marked NSFW on Imgur.'
                message = '**[Hover to reveal](#s "' + message + '")**'  # reddit spoiler tag added.

            elif not imgur_flag:

                images_list = self.client.get_album_images(album_id)

                links = [item.link for item in images_list[0:10]
                         if item.type.split('/')[-1] not in self.supported_video_formats]
                links_videos = [item.link for item in images_list[0:10]
                                if item.type.split('/')[-1] in self.supported_video_formats]
                # Ensures only 10 images/gifs are processed in case album is very large.

                temp1, _ = self.handle_videos(links_videos)
                temp2, _ = self.handle_images(links)

                status.update(temp1)
                status.update(temp2)

                # for all images, if SFW - mark SFW.
                # if any image is not SFW, find out which one.

                max_nsfw= (None, 0)
                min_sfw = (None, 100)
                for k,v in status.items():
                    labels = sorted(status[k].items(), key=operator.itemgetter(1), reverse=True)

                    tag, confidence = labels[0]

                    if tag is 'SFW' and confidence<=min_sfw[1]:
                        min_sfw = labels[0]

                    elif tag is not 'SFW' and confidence>max_nsfw[1]:
                        max_nsfw = labels[0]


                if max_nsfw != (None, 0):
                    message = "Album has "+str(max_nsfw[0])+" image(s). I'm {0:.2f}% confident.".format(max_nsfw[1])

                elif max_nsfw == (None, 0):

                    message = "Album has "+str(min_sfw[0])+" image(s). I'm {0:.2f}% confident.".format(min_sfw[1])


                message = '**[Hover to reveal](#s "'+message+' ")**'  #reddit spoiler tag added.

        except error.ImgurClientError as e:
            status = None
            message = None
            print ('Imgur Error:', e.error_message)

        return status, message

    def handle_images(self, links):
        status = {}
        message = None

        valid_links = [self.ensure_extension(aLink) for aLink in links
                       if aLink.split('.')[-1].lower() not in ['gif', 'gifv', 'mp4', 'webm']]

        status = self.slave_bot.analyze(valid_links)

        if len(valid_links) == 1:
            link = valid_links[0]
            labels = sorted(status[link].items(), key=operator.itemgetter(1), reverse=True)
            tag, confidence = labels[0]
            message = tag + ". I'm  {0:.2f}% confident.".format(confidence)
            if tag is 'SFW':
                manning_distance = self.slave_bot.clarifai_bot.match_template(link, 'manning')
                if manning_distance is not None and manning_distance <= 0.01:
                    message += ' Might be Manning Face.'

            message = '**[Hover to reveal](#s "' + message + ' ")**'  # reddit spoiler tag added.

        return status, message

    def handle_gallery(self, gallery_link):
        item_id = gallery_link.split('/')[-1]
        # user linked to either an album or an image from the imgur gallery.
        # assume it is album. if it's a 404, assume it is an image.

        message = ''
        status = {}

        try:
            album = self.client.get_album(album_id=item_id)

            imgur_flag = album.nsfw

            if imgur_flag:
                status = {}
                message = 'Album marked NSFW on Imgur.'
                message = '**[Hover to reveal](#s "' + message + '")**'  # reddit spoiler tag added.

            elif not imgur_flag:
                status, message = self.handle_album(album.link)

        except error.ImgurClientError as e:
            try:
                image = self.client.get_image(item_id)
                imgur_flag = image.nsfw

                if imgur_flag:
                    message = 'Item marked NSFW on Imgur.'
                    message = '**[Hover to reveal](#s "' + message + '")**'  # reddit spoiler tag added.

                elif not imgur_flag:

                    if image.type.split('/')[-1] in self.supported_video_formats:
                        status, message = self.handle_videos([image.link])
                    else:
                        status, message = self.handle_images([image.link])

            except error.ImgurClientError as e:
                status = None
                message = None
                print('Imgur Error', e.error_message)

        return status, message

    def handle_videos(self, links):
        status = {}
        message = None
        for each_url in links:
            link = self.ensure_extension(each_url)

            # link is now 'imgur.com/id.extension'
            video_id = link.split('/')[-1].split('.')[0]
            filename = video_id+'.mp4'
            mp4_link = 'http://i.imgur.com/'+filename
            urllib.urlretrieve(mp4_link, filename)
            status.update({each_url:self.video_bot.make_prediction(filename)})

            if os.path.exists(filename):
                os.remove(filename)

        if len(links) == 1:
            link = links[0]
            labels = sorted(status[link].items(), key=operator.itemgetter(1), reverse=True)
            tag, confidence = labels[0]
            message = tag + ". I'm  {0:.2f}% confident.".format(confidence)
            message = '**[Hover to reveal](#s "' + message + ' ")**'  # reddit spoiler tag added.

        return status, message

    def ensure_extension(self, url):
        temp = url.split('/')[-1]  # will be <image_id>.<extension> or <image_id>
        if '.' not in temp:
            image_id = temp

            url = self.client.get_image(image_id).link
            return url
        else:
            return url
Exemplo n.º 32
0
 img_client_secret = ''
 mashape_key = ''
 if mashape_key:
     img_client = ImgurClient(img_client_id, img_client_secret, mashape_key=mashape_key)
 else:
     img_client = ImgurClient(img_client_id, img_client_secret)
 for i in range(1,25):
     data = {}
     for gallery_img in img_client.subreddit_gallery(subr, page=i):
         url = gallery_img.link
         # check if image is in jpg format and has not already been processed
         if len(re.findall('imgur.com/.+\.jpg',url))>0 and found_url.get(url,0)==0:
             print("writing " + url)
             img_id = re.findall('.com.+\.jpg',url)[0][5:-4]
             try:
                 img = img_client.get_image(img_id)
             except:
                 continue
             # get images in m format to make it easier to process for the network
             response = requests.get(url[:-4]+'m.jpg')
             label = img.nsfw
             if label:
                 path = 'nsfw/'
             else:
                 path = 'normal/'
             try:
                 if not os.path.exists(path+img_id+'.jpg'):
                     f = open(path+img_id+'.jpg',"wb")
                     f.write(response.content)
                     f.close()
                     found_url[url] = 1
Exemplo n.º 33
0
def get_hd_media(submission, IMGUR_CLIENT, IMGUR_CLIENT_SECRET):
    media_url = submission.url
    # Make sure config file exists
    try:
        config = configparser.ConfigParser()
        config.read('config.ini')
    except BaseException as e:
        print('[EROR] Error while reading config file:', str(e))
        sys.exit()
    # Make sure media folder exists
    IMAGE_DIR = config['MediaSettings']['MediaFolder']
    if not os.path.exists(IMAGE_DIR):
        os.makedirs(IMAGE_DIR)
        print('[ OK ] Media folder not found, created a new one')
    # Download and save the linked image
    if any(s in media_url
           for s in ('i.redd.it',
                     'i.reddituploads.com')):  # Reddit-hosted images
        file_name = os.path.basename(urllib.parse.urlsplit(media_url).path)
        file_extension = os.path.splitext(media_url)[-1].lower()
        # Fix for issue with i.reddituploads.com links not having a file extension in the URL
        if not file_extension:
            file_extension += '.jpg'
            file_name += '.jpg'
            media_url += '.jpg'
        # Download the file
        file_path = IMAGE_DIR + '/' + file_name
        print('[ OK ] Downloading file at URL ' + media_url + ' to ' +
              file_path + ', file type identified as ' + file_extension)
        img = save_file(media_url, file_path)
        return img
    elif ('v.redd.it' in media_url):  # Reddit video
        if submission.media:
            # Get URL for MP4 version of reddit video
            video_url = submission.media['reddit_video']['fallback_url']
            # Download the file
            file_path = IMAGE_DIR + '/' + submission.id + '.mp4'
            print('[ OK ] Downloading Reddit video at URL ' + video_url +
                  ' to ' + file_path)
            video = save_file(video_url, file_path)
            return video
        else:
            print('[EROR] Reddit API returned no media for this URL:',
                  media_url)
            return
    elif ('imgur.com' in media_url):  # Imgur
        try:
            client = ImgurClient(IMGUR_CLIENT, IMGUR_CLIENT_SECRET)
        except BaseException as e:
            print('[EROR] Error while authenticating with Imgur:', str(e))
            return
        # Working demo of regex: https://regex101.com/r/G29uGl/2
        regex = r"(?:.*)imgur\.com(?:\/gallery\/|\/a\/|\/)(.*?)(?:\/.*|\.|$)"
        m = re.search(regex, media_url, flags=0)
        if m:
            # Get the Imgur image/gallery ID
            id = m.group(1)
            if any(s in media_url
                   for s in ('/a/', '/gallery/')):  # Gallery links
                images = client.get_album_images(id)
                # Only the first image in a gallery is used
                imgur_url = images[0].link
                print(images[0])
            else:  # Single image/GIF
                if client.get_image(id).type == 'image/gif':
                    # If the image is a GIF, use the MP4 version
                    imgur_url = client.get_image(id).mp4
                else:
                    imgur_url = client.get_image(id).link
            file_extension = os.path.splitext(imgur_url)[-1].lower()
            # Download the image
            file_path = IMAGE_DIR + '/' + id + file_extension
            print('[ OK ] Downloading Imgur image at URL ' + imgur_url +
                  ' to ' + file_path)
            imgur_file = save_file(imgur_url, file_path)
            return imgur_file
        else:
            print(
                '[EROR] Could not identify Imgur image/gallery ID in this URL:',
                media_url)
            return
    elif ('gfycat.com' in media_url):  # Gfycat
        try:
            gfycat_name = os.path.basename(
                urllib.parse.urlsplit(media_url).path)
            client = GfycatClient()
            gfycat_info = client.query_gfy(gfycat_name)
        except BaseException as e:
            print('[EROR] Error downloading Gfycat link:', str(e))
            return
        # Download the Mp4 version
        gfycat_url = gfycat_info['gfyItem']['mp4Url']
        file_path = IMAGE_DIR + '/' + gfycat_name + '.mp4'
        print('[ OK ] Downloading Gfycat at URL ' + gfycat_url + ' to ' +
              file_path)
        gfycat_file = save_file(gfycat_url, file_path)
        return gfycat_file
    elif ('giphy.com' in media_url):  # Giphy
        # Working demo of regex: https://regex101.com/r/o8m1kA/2
        regex = r"https?://((?:.*)giphy\.com/media/|giphy.com/gifs/|i.giphy.com/)(.*-)?(\w+)(/|\n)"
        m = re.search(regex, media_url, flags=0)
        if m:
            # Get the Giphy ID
            id = m.group(3)
            # Download the MP4 version of the GIF
            giphy_url = 'https://media.giphy.com/media/' + id + '/giphy.mp4'
            file_path = IMAGE_DIR + '/' + id + 'giphy.mp4'
            print('[ OK ] Downloading Giphy at URL ' + giphy_url + ' to ' +
                  file_path)
            giphy_file = save_file(giphy_url, file_path)
            return giphy_file
        else:
            print('[EROR] Could not identify Giphy ID in this URL:', media_url)
            return
    else:
        # Check if URL is an image or MP4 file, based on the MIME type
        image_formats = ('image/png', 'image/jpeg', 'image/gif', 'image/webp',
                         'video/mp4')
        img_site = urlopen(media_url)
        meta = img_site.info()
        if meta["content-type"] in image_formats:
            # URL appears to be an image, so download it
            file_name = os.path.basename(urllib.parse.urlsplit(media_url).path)
            file_path = IMAGE_DIR + '/' + file_name
            print('[ OK ] Downloading file at URL ' + media_url + ' to ' +
                  file_path)
            try:
                img = save_file(media_url, file_path)
                return img
            except BaseException as e:
                print('[EROR] Error while downloading image:', str(e))
                return
        else:
            print('[EROR] URL does not point to a valid image file.')
            return
Exemplo n.º 34
0
class Getter(threading.Thread):
    def __init__(self,dwQ,errQ,stopped,filePath,initFile,cfreader):
        threading.Thread.__init__(self,daemon=False)
        self.errorQueue=errQ
        self.downloadQueue=dwQ
        self.stopped=stopped
        self.imgurClient=ImgurClient("4ff2bb9d9c640f2", "8b036ffa680a1304814f48eff9e93206c096727f")
        self.paramReader=cfreader
        self.downloadPath=PathHolder()
        self.downloadPath = self.paramReader.readParam(filePath+ "\\" + initFile,self.downloadPath)
        logger = logging.getLogger('Image_Downloader')
        logger.setLevel(logging.DEBUG)
        fh = logging.FileHandler('logs/downloads.log')
        fh.setLevel(logging.DEBUG)
        # create console handler with a higher log level
        ch = logging.StreamHandler()
        ch.setLevel(logging.ERROR)
        # create formatter and add it to the handlers
        formatter = logging.Formatter('[%(filename)s:%(lineno)s - %(funcName)20s() ] %(asctime)s %(levelname)s:%(message)s')
        fh.setFormatter(formatter)
        ch.setFormatter(formatter)
# add the handlers to the logger
        logger.addHandler(fh)
        logger.addHandler(ch)
        logger.debug("-----------------------------")
        logger.debug("Init complete")

    def run(self):
        logger= logging.getLogger('Image_Downloader')
        logger.debug("Thread started")
        while not self.stopped.is_set():
            self.getURL()

    def getURL(self):
        logger= logging.getLogger('Image_Downloader')            
        while not self.downloadQueue.empty():
            downloadList=None
            try:
                downloadList=self.downloadQueue.get()
            except:
                continue
            self.downloadQueue.task_done()
            logger.debug("Dequeued and marked done")    
            for downloadObject in downloadList:
                if "imgur" in downloadObject.domain:
                    logger.debug("Imgur URL found")
                    fileName, fileExtension = os.path.splitext(downloadObject.url)
                    if "/a/" in fileName:
                        albumPart=downloadObject.url.split("/a/")[1]
                        albumName=albumPart.split('/')[0].split('#')[0]
                        self.handleAlbum(albumName,downloadObject.subreddit,downloadObject.id,downloadObject.title)
                    elif fileExtension is not None and len(fileExtension) > 0:
                        self.handleImage(downloadObject.url,downloadObject.subreddit,downloadObject.id,downloadObject.title,fileExtension)
                    else:
                        fileId=downloadObject.url.split("/")[-1]
                        for f in fileId.split(","):
                            self.handleURL(f,downloadObject.subreddit,downloadObject.id,downloadObject.title)
                else:
                    logger.debug("Non imgur URL")
                    self.errorQueue.put(ErrorLog(subredditName=downloadObject.subreddit,failedItemName=downloadObject.id,failReason="Domain not suppported"))
        logger.debug("Returning from fuction")
            
                        
                
    def handleImage(self,url,redditName,id,title,fileExtension):
        logger= logging.getLogger('Image_Downloader')
        directory=self.downloadPath.pathToHold + redditName
        valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
        name=''.join(c for c in title if c in valid_chars)
        fname = (name[:30]) if len(name) > 30 else name
        fileExtension=fileExtension.split("?")[0]
        file=directory + "\\" + fname +"_" + id +  fileExtension
        logger.debug("From "+ url + "to " + file)
        try:
            if not os.path.exists(directory):
                os.mkdir(directory)
        except OSError as e:
            logger.debug(e.strerror)
            self.errorQueue.put(ErrorLog(subredditName=redditName,failedItemName=id,failReason=e.strerror,retries=0))
        if not os.path.exists(file) and not os.path.exists(directory + "\\" + name +"_" + id +  fileExtension):
            try:
                r = requests.get(url, stream=True)
                if r.status_code == 200:
                    with open(file, 'wb') as f:
                        for chunk in r.iter_content(1024):
                            f.write(chunk)
            except (requests.exceptions.RequestException,requests.exceptions.ConnectionError,requests.exceptions.HTTPError,requests.exceptions.URLRequired,requests.exceptions.TooManyRedirects,requests.exceptions.ConnectTimeout,requests.exceptions.ReadTimeout,requests.exceptions.Timeout) as e:
                logger.debug(e.__class__.__name__)
                self.errorQueue.put(ErrorLog(subredditName=redditName,failedItemName=id,failReason=e.__class__.__name__,retries=0))
            except OSError as e:
                logger.debug(e.strerror +" " + file )
                self.errorQueue.put(ErrorLog(subredditName=redditName,failedItemName=id,failReason=e.strerror,retries=0))
        
    def handleAlbum(self,albumName,redditName,id,title):
        logger= logging.getLogger('Image_Downloader')
        logger.debug("Found an Album to download" + albumName)
        for imageObject in self.imgurClient.get_album_images(albumName):
            logger.debug("Next item in the album")
            self.handleImageObject(imageObject,redditName,title)
     
    def handleURL(self,fileId,subreddit,id,title):
        logger= logging.getLogger('Image_Downloader')
        logger.debug("Found a wrapped image: "+ fileId)
        try:
            image=self.imgurClient.get_image(fileId)
            logger.debug("Got the image back " )
            self.handleImageObject(image,subreddit,title)
            logger.debug("Done with this wrap")
        except ImgurClientError as e:
            logger.debug(e.error_message)
            logger.debug(e.status_code)
        except Exception as e:
            logger.debug(type(e))

        
    def handleImageObject(self,imageObject,reddit,title):
        logger= logging.getLogger('Image_Downloader')
        logger.debug("Got the an image to download " )            
        fileName, fileExtension = os.path.splitext(imageObject.link)
        if fileExtension is not None and len(fileExtension) > 0:
            self.handleImage(imageObject.link,reddit,imageObject.id,title,fileExtension)
Exemplo n.º 35
0
def get_url(submission, mp4_instead_gif=True):
    '''
    return TYPE, URL, EXTENSION
    E.x.: return 'img', 'http://example.com/pic.png', 'png'
    '''
    
    def what_is_inside(url):
        header = requests.head(url).headers
        if 'Content-Type' in header:
            return header['Content-Type']
        else:
            return ''

    url = submission.url
    url_content = what_is_inside(url)

    if (CONTENT_JPEG == url_content or CONTENT_PNG == url_content):
        return TYPE_IMG, url, url_content.split('/')[1]

    if CONTENT_GIF in url_content:
        if url.endswith('.gif') and mp4_instead_gif:
            # Let's try to find .mp4 file.
            url_mp4 = url[:-4] + '.mp4'
            if CONTENT_MP4 == what_is_inside(url_mp4):
                return TYPE_GIF, url_mp4, 'mp4'
        return TYPE_GIF, url, 'gif'
    
    if url.endswith('.gifv'):
        if mp4_instead_gif:
            url_mp4 = url[:-5] + '.mp4'
            if CONTENT_MP4 == what_is_inside(url_mp4):
                return TYPE_GIF, url_mp4, 'mp4'
        if CONTENT_GIF in what_is_inside(url[0:-1]):
            return TYPE_GIF, url[0:-1], 'gif'

    if submission.is_self is True:
        # Self submission with text
        return TYPE_TEXT, None, None

    if urlparse(url).netloc == 'imgur.com':
        # Imgur
        imgur_config = yaml.load(open(os.path.join('configs', 'imgur.yml')).read())
        imgur_client = ImgurClient(imgur_config['client_id'], imgur_config['client_secret'])
        path_parts = urlparse(url).path.split('/')
        if path_parts[1] == 'gallery':
            # TODO: gallary handling
            return TYPE_OTHER, url, None
        elif path_parts[1] == 'topic':
            # TODO: topic handling
            return TYPE_OTHER, url, None
        elif path_parts[1] == 'a':
            # An imgur album
            album = imgur_client.get_album(path_parts[2])
            story = dict()
            for num, img in enumerate(album.images):
                number = num + 1
                what = TYPE_IMG
                link = img['link']
                ext = img['type'].split('/')[1]
                if img['animated']:
                    what = TYPE_GIF
                    link = img['mp4'] if mp4_instead_gif else img['gifv'][:-1]
                    ext = 'mp4' if mp4_instead_gif else 'gif'
                story[number] = {
                    'url': link,
                    'what': what,
                    'ext': ext
                }
            if len(story) == 1:
                return story[1]['what'], story[1]['url'], story[1]['ext']
            return TYPE_ALBUM, story, None
        else:
            # Just imgur img
            img = imgur_client.get_image(path_parts[1].split('.')[0])
            if not img.animated:
                return TYPE_IMG, img.link, img.type.split('/')[1]
            else:
                if mp4_instead_gif:
                    return TYPE_GIF, img.mp4, 'mp4'
                else:
                    # return 'gif', img.link, 'gif'
                    return TYPE_GIF, img.gifv[:-1], 'gif'
    elif 'gfycat.com' in urlparse(url).netloc:
        client = GfycatClient()
        rname = re.findall(r'gfycat.com\/(?:detail\/)?(\w*)', url)[0]
        try:
            urls = client.query_gfy(rname)['gfyItem']
            if mp4_instead_gif:
                return TYPE_GIF, urls['mp4Url'], 'mp4'
            else:
                return TYPE_GIF, urls['max5mbGif'], 'gif'
        except KeyError:
            logging.info('Gfy fail prevented!')
            return TYPE_OTHER, url, None
    else:
        return TYPE_OTHER, url, None
Exemplo n.º 36
0
    from urllib.request import urlopen
root = tk.Tk()
root.title("display a website image")
# a little more than width and height of image
w = 520
h = 320
x = 80
y = 100
# use width x height + x_offset + y_offset (no spaces!)
root.geometry("%dx%d+%d+%d" % (w, h, x, y))
# this GIF picture previously downloaded to tinypic.com
#image_url = "http://i46.tinypic.com/r9oh0j.gif"

client_id = 'c2058ecfc76d75f'
client_secret = '5fe636c3e7a032b56b2120fe82eb3071c790c5ff'

client = ImgurClient(client_id, client_secret)

item = client.get_image("nhTyj4d.jpg")
print(item.link)
#image_url = client.get_image("nhTyj4d.jpg")
#image_byt = urlopen(image_url).read()
#image_b64 = base64.encodestring(image_byt)
#photo = tk.PhotoImage(data=image_b64)
# create a white canvas
#cv = tk.Canvas(bg='white')
#cv.pack(side='top', fill='both', expand='yes')
# put the image on the canvas with
# create_image(xpos, ypos, image, anchor)
#cv.create_image(10, 10, image=photo, anchor='nw')
#root.mainloop()