Пример #1
0
def is_valid_sequence_url(url, verbose=False):
    m = re.search("\[(.+?)-(.+?)\]", url)
    if not m:
        if verbose: log.warning(f"no sequence was found in {url}")
        return False
    # else
    return True
Пример #2
0
    def delete_physically(self, death_list):
        """
        Delete the images in the list from the file system.

        Return value: number of images that were removed successfully.
        """
        total = len(death_list)
        cnt = 0
        msg = "deleting"

        for idx, img in enumerate(death_list, start=1):
            percent = round(idx * 100 / total)
            log.info(f"{msg} {percent}%")
            self.message_label.setText(msg)
            self.progressbar.show()
            self.progressbar.setValue(percent)
            QApplication.processEvents()

            p = Path(img.get_absolute_path_or_url())
            # log.debug(f"removing {str(p)}")
            p.unlink()
            if not p.exists():
                cnt += 1
            else:
                log.warning(f"couldn't remove {str(p)}")
        #
        sleep(0.2)  # make the progressbar visible
        self.message_label.setText("")
        self.progressbar.hide()
        #
        return cnt
Пример #3
0
 def remove_elem(self) -> None:
     first = self.q.popleft()
     first.p.unlink()
     if not first.p.exists():
         self.size -= first.size
         # log.debug(f"cache news: {first.name} was deleted")
     else:
         log.warning(f"cache news: couldn't remove {first.name}")
Пример #4
0
def connection():
    global client

    try:
        client = ImgurClient(cfg.IMGUR_CLIENT_ID, cfg.IMGUR_CLIENT_SECRET)
    except Exception as e:
        log.warning("missing or wrong imgur API keys")
        log.warning(f"imgur exception: {str(e)}")
        client = None
Пример #5
0
 def read(self):
     categories = cfg.categories_file()
     try:
         with open(categories) as f:
             log.info(f"{categories} was read")
             return yaml.safe_load(f)
     except Exception as e:
         log.warning("couldn't read {0}".format(categories))
         log.warning(e)
         return {}
Пример #6
0
 def shrink(self) -> None:
     # if we are below the threshold => OK, nothing to do
     if self.size <= self.max_size_bytes:
         return
     # else, if the cache folder's size is over the limit
     while True:
         if self.size <= self.max_size_bytes:
             break
         if len(self.q) == 1:
             log.warning("the cache folder grew too big but it has just one element")
             log.warning("Tip: increase the cache size, the current value is too small.")
             break
         self.remove_elem()
Пример #7
0
def extract_images_from_a_specific_post(url):
    if cfg.TUMBLR_API_KEY is None:
        log.warning(f"no tumblr API key found, cannot process {url}")
        return []
    #
    res = extract_parts_from(url)
    # print(res)

    urls = []
    if res:
        blog_name, post_id = res
        # print(parsed)
        api_call = f"https://api.tumblr.com/v2/blog/{blog_name}.tumblr.com/posts/photo?id={post_id}&api_key={cfg.TUMBLR_API_KEY}"
        # print("#", api_call)
        # print()
        try:
            d = requests.get(api_call, timeout=cfg.REQUESTS_TIMEOUT).json()
        except:
            log.error(f"problem with the tumblr post {url}")
            return []
        # else
        if "errors" not in d:
            # pprint(d)
            posts = d["response"]["posts"]
            for post in posts:
                photos = post["photos"]
                for photo in photos:
                    img_url = photo["original_size"]["url"]
                    if Path(img_url).suffix.lower() in cfg.SUPPORTED_FORMATS:
                        urls.append(img_url)
                #
            #
        else:
            log.warning("Unauthorized tumblr access. Is your API key valid?")
    #
    return urls
Пример #8
0
def extract_images_from_an_album(url):
    if client is None:
        connection()

    # if it's still None
    if client is None:
        log.warning(f"problem with your imgur API keys, cannot process {url}")
        return []
    #
    res = []
    album_id = get_album_id(url)
    if album_id:
        images = []
        try:
            images = client.get_album_images(album_id)
        except (imgurpython.helpers.error.ImgurClientError, TypeError):
            log.warning(f"problem with album {url}")
        except imgurpython.helpers.error.ImgurClientRateLimitError:
            log.warning("Imgur API: rate-limit exceeded", file=sys.stderr)

        res = [img.link for img in images]
    #
    return res
Пример #9
0
def read_subreddit(subreddit, after_id=None, statusbar=None, mainWindow=None):
    try:
        if mainWindow:
            mainWindow.loading_line.show()
        if not after_id:
            img_url = url_template.format(subreddit=subreddit)
        else:
            img_url = url_template_with_after_id.format(subreddit=subreddit,
                                                        after_id=after_id)
        r = requests.get(img_url,
                         headers=cfg.headers,
                         timeout=cfg.REQUESTS_TIMEOUT)
        d = r.json()
        res = []
        total = len(d["data"]["children"])
        for idx, child in enumerate(d["data"]["children"], start=1):
            percent = round(idx * 100 / total)
            log.info(f"{percent}%")
            if statusbar:
                statusbar.progressbar.show()
                statusbar.progressbar.setValue(percent)
                # statusbar.flash_message(blue(f"{percent} %"))
                # without this nothing appeared until 100%:
                QApplication.processEvents(
                )  # reason: https://stackoverflow.com/a/29917237/232485
            entry = child["data"]
            domain = entry["domain"]
            link = entry["url"]
            after_id = entry[
                "name"]  # use this for a new page that comes after this entry
            extra = {
                'subreddit':
                subreddit,
                'after_id':
                after_id,
                'next_page_url':
                f'https://www.reddit.com/r/{subreddit}/.json?after={after_id}'
            }
            if Path(link).suffix.lower() in cfg.SUPPORTED_FORMATS:
                res.append(ImageWithExtraInfo(link, extra))
                continue
            #
            if domain.endswith(".tumblr.com"):
                # print("# tumblr found:", link)
                try:
                    images = tumblr.extract_images_from_a_specific_post(link)
                except:
                    log.warning(f"cannot extract images from {link}")
                    images = []
                # print("# extracted images:", len(images))
                for img_url in images:
                    if Path(img_url).suffix.lower() in cfg.SUPPORTED_FORMATS:
                        res.append(ImageWithExtraInfo(img_url, extra))
                    #
                #
                continue
            # end tumblr section
            if domain.endswith("imgur.com"):
                if imgur.is_album(link):
                    try:
                        images = imgur.extract_images_from_an_album(link)
                    except:
                        log.warning(f"cannot extract images from {link}")
                        images = []
                    for img_url in images:
                        if Path(img_url).suffix.lower(
                        ) in cfg.SUPPORTED_FORMATS:
                            res.append(ImageWithExtraInfo(img_url, extra))
                        #
                    #
                else:
                    # it's on imgur.com but it's not an album
                    # it may be a single image embedded in an HTML page
                    try:
                        img_url = link + ".jpg"  # it works sometimes
                        r = requests.head(img_url,
                                          headers=cfg.headers,
                                          timeout=cfg.REQUESTS_TIMEOUT)
                        if r.ok:
                            res.append(ImageWithExtraInfo(img_url, extra))
                    except:
                        log.warning(f"problem with {link} -> {url}")
            # end imgur section
        #
        return res
    except KeyError:
        log.warning(f"cannot extract data from {img_url}")
        return []
    except Exception as e:
        log.warning(f"exception: {str(e)}")
        log.warning(f"problem with {img_url}")
        return []
    finally:
        if statusbar:
            statusbar.progressbar.hide()
        if mainWindow:
            mainWindow.loading_line.hide()