def __init__(self, params): self.user = params[0] GalleryTest.__init__(self, self.user) root = reddit_get.run_wrapped(self.user, "", False) success = gallery_get.flush_jobs() self.post_run(root, success, params[1:])
def __init__(self, params): self.url = params[0] GalleryTest.__init__(self, self.url) root = gallery_get.run_wrapped(self.url, "", flushJobs=False) success = gallery_get.flush_jobs() self.post_run(root, success, params[1:])
def run(self): reddit_json = self.get_user_json() if "data" in reddit_json: visited_links = set() for post in reddit_json['data']['children']: data = post['data'] url = data['url'] domain = urlparse(url).netloc.lower() if any(x in domain for x in NON_GALLERY_DOMAINS) and not any(x in url for x in GALLERY_PATH_EXCEPTIONS): print("Skipping non-gallery link: " + url) continue elif url.lower() in visited_links: print("Skipping already visited link: " + url) continue else: visited_links.add(url.lower()) self.process_reddit_post(url, self.folder_from_post(data)) if self.flush_jobs: gallery_get.flush_jobs()
def run(self): reddit_json = self.get_user_json() if "data" in reddit_json: visited_links = set() num_valid_posts = 0 for post in reddit_json['data']['children']: data = post['data'] url = data['url'] domain = urlparse(url).netloc.lower() if any(x in domain for x in NON_GALLERY_DOMAINS): print("Skipping non-gallery link: " + url) continue elif url.lower() in visited_links: print("Skipping already visited link: " + url) continue else: visited_links.add(url.lower()) self.process_reddit_post(url, self.folder_from_post(data)) if self.flush_jobs: gallery_get.flush_jobs()
def run_internal(user, dest): reddit_json_str = "" reddit_json = {} localpath = user + ".json" if os.path.exists(localpath): print "Getting JSON data from local file (%s)" % localpath reddit_json_str = open(localpath,"r").read() reddit_json = json.loads(reddit_json_str) else: print "Requesting JSON data from reddit..." for i in range(5): reddit_json_str = urllib.urlopen(reddit_url(user)).read() reddit_json = json.loads(reddit_json_str) if "data" in reddit_json: break else: time.sleep(2) if not "data" in reddit_json: print "ERROR getting json data after several retries! Does the user exist?" print "If so, try saving the contents of the following to [USERNAME].json and try again." print reddit_url(user) else: visited_links = set() num_valid_posts = 0 for post in reddit_json['data']['children']: url = post['data']['url'] if url.lower() in visited_links: print "Skipping already visited link: " + url continue else: visited_links.add(url.lower()) cdate = post['data']['created'] sdate = datetime.datetime.fromtimestamp(cdate).strftime("%Y-%m-%d") title = post['data']['title'].replace('/', '_').replace('\\', '_').strip() if title: title = " - " + title folder = os.path.join(dest, user, gallery_get.safestr(sdate + title)) if "/i.imgur.com/" in url: download_image(url, folder) elif "/imgur.com/a/" in url: if not gallery_get.run_wrapped(url, folder, titleAsFolder=True, cacheDest=False, flushJobs=False): return False elif "/imgur.com/" in url: # Create direct image URL with dummy extension (otherwise it will redirect) # Then get correct extension from header # (This is way faster than opening the redirect) img_base = url.replace("/imgur.com/","/i.imgur.com/") ext = "jpg" file = urllib.urlopen("%s.%s" % (img_base, ext)) real_ext = file.headers.get("content-type")[6:] if real_ext != "jpeg": # jpeg -> jpg ext = real_ext download_image("%s.%s" % (img_base, ext), folder) elif "vidble.com/album" in url: if not gallery_get.run_wrapped(url, folder, titleAsFolder=True, cacheDest=False, flushJobs=False): return False elif url.endswith(".jpg") or url.endswith(".jpeg") or url.endswith(".gif"): download_image(url, folder) else: continue num_valid_posts += 1 gallery_get.flush_jobs() if num_valid_posts == 0: print "\nApparently this user hasn't submitted any imgur links. Nothing to do."
def run_internal(user, dest): reddit_json_str = "" reddit_json = {} localpath = user + ".json" if os.path.exists(localpath): print "Getting JSON data from local file (%s)" % localpath reddit_json_str = open(localpath, "r").read() reddit_json = json.loads(reddit_json_str) else: print "Requesting JSON data from reddit..." for i in range(5): reddit_json_str = urllib.urlopen(reddit_url(user)).read() reddit_json = json.loads(reddit_json_str) if "data" in reddit_json: break else: time.sleep(2) if not "data" in reddit_json: print "ERROR getting json data after several retries! Does the user exist?" print "If so, try saving the contents of the following to [USERNAME].json and try again." print reddit_url(user) else: visited_links = set() num_valid_posts = 0 for post in reddit_json['data']['children']: url = post['data']['url'] if url.lower() in visited_links: print "Skipping already visited link: " + url continue else: visited_links.add(url.lower()) cdate = post['data']['created'] sdate = datetime.datetime.fromtimestamp(cdate).strftime("%Y-%m-%d") title = post['data']['title'].replace('/', '_').replace('\\', '_').strip() if title: title = " - " + title folder = os.path.join(dest, user, gallery_get.safestr(sdate + title)) if "/i.imgur.com/" in url: download_image(url, folder) elif "/imgur.com/a/" in url: if not gallery_get.run_wrapped(url, folder, titleAsFolder=True, cacheDest=False, flushJobs=False): return False elif "/imgur.com/" in url: # Create direct image URL with dummy extension (otherwise it will redirect) # Then get correct extension from header # (This is way faster than opening the redirect) img_base = url.replace("/imgur.com/", "/i.imgur.com/") ext = "jpg" file = urllib.urlopen("%s.%s" % (img_base, ext)) real_ext = file.headers.get("content-type")[6:] if real_ext != "jpeg": # jpeg -> jpg ext = real_ext download_image("%s.%s" % (img_base, ext), folder) elif "vidble.com/album" in url: if not gallery_get.run_wrapped(url, folder, titleAsFolder=True, cacheDest=False, flushJobs=False): return False elif url.endswith(".jpg") or url.endswith(".jpeg") or url.endswith( ".gif"): download_image(url, folder) else: continue num_valid_posts += 1 gallery_get.flush_jobs() if num_valid_posts == 0: print "\nApparently this user hasn't submitted any imgur links. Nothing to do."