def add_existing_image(self, user, oldimage, oldpath, subdir='', album_id=-1): if 'tumblr' in oldpath: # Can't properly handle tumblr links self.debug('cannot properly handle tumblr links; trying anyway') #return if subdir == '' and album_id == -1: self.debug('adding image: %s' % oldpath) # Ensure image is an actual image try: dims = ImageUtils.get_dimensions(oldpath) except: self.debug('failed to load image: %s, skipping' % oldpath) return newimage = path.join(ImageUtils.get_root(), 'content', user, subdir, oldimage) newimage = newimage.replace('.jpeg.jpg', '.jpg') thumbnail = path.join(ImageUtils.get_root(), 'content', user, subdir, 'thumbs', oldimage) thumbnail = thumbnail.replace('.jpeg.jpg', '.jpg') if path.exists(newimage): self.debug('new image already exists: %s' % newimage) return ImageUtils.create_subdirectories(path.join(ImageUtils.get_root(), 'content', user, subdir, 'thumbs')) copy2(oldpath, newimage) try: ImageUtils.create_thumbnail(newimage, thumbnail) except Exception, e: self.debug('failed to create thumbnail: %s' % str(e)) thumbnail = path.join(ImageUtils.get_root(), 'images', 'nothumb.png')
def __init__(self): # Single file that all output is written to, to track usage self.exit_if_already_started() self.root_log = open(path.join(ImageUtils.get_root(), 'history.log'), 'a') self.logger = self.root_log # Logger used by helper classes self.db = DB() # Database instance self.reddit = Reddit() self.excluded_subs = self.db.get_excluded_subreddits()
def setup_loggers_for_user(self, user): # Create directories if needed user_dir = path.join(ImageUtils.get_root(), 'content', user) ImageUtils.create_subdirectories(user_dir) # Setup logger self.logger = open(path.join(user_dir, 'history.log'), 'a') self.db.logger = self.logger ImageUtils.logger = self.logger self.reddit.logger = self.logger
def delete_album(self, cursor, rowid, path): # Delete images cursor.execute('delete from medias where album_id = ?', [rowid]) # Delete pending URLs cursor.execute('delete from urls where album_id = ?', [rowid]) # Delete album cursor.execute('delete from albums where path = ?', [path]) # Delete directory + files path = ospath.join(ImageUtils.get_root(), path) rmtree(path)
def remove_user(self, user): userid = self.get_user_id(user) user = self.select_one('username', 'users', where='id = ?', values=[userid]) self.delete('posts', 'userid = ?', [userid]) self.delete('comments', 'userid = ?', [userid]) self.delete('albums', 'userid = ?', [userid]) self.delete('users', 'username like ?', [user]) self.delete('newusers', 'username like ?', [user]) dirpath = path.join(ImageUtils.get_root(), 'content', user) if path.exists(dirpath): rmtree(dirpath)
def setup_loggers_for_user(self, user): # Create directories if needed user_dir = path.join(ImageUtils.get_root(), 'content', user) ImageUtils.create_subdirectories(user_dir) # Setup logger log_level = self.db.get_config('log_level', default='user') if log_level == 'none': self.logger = open(devnull, 'w') elif log_level == 'user': self.logger = open(path.join(user_dir, 'history.log'), 'a') elif log_level == 'global': self.logger = self.root_log self.db.logger = self.logger ImageUtils.logger = self.logger self.reddit.logger = self.logger
def add_existing_album(self, user, oldalbum, oldpath): newalbum = path.join(ImageUtils.get_root(), 'content', user, oldalbum) if path.exists(newalbum): self.debug('album already exists: %s' % newalbum) return (post, comment, imgid) = self.get_post_comment_id(oldalbum) url = 'http://imgur.com/a/%s' % imgid try: album_id = self.add_album(newalbum, user, url, post, comment) except Exception, e: self.debug('add_existing_album: failed: %s' % str(e)) return
def process_url(self, url, url_index, child): self.debug('%s: process_url: %s' % (child.author, url)) # Ignore duplicate albums if self.db.album_exists(url): self.debug('''%s: process_url: album %s already exists in database. Permalink: %s Object: %s''' % (child.author, url, child.permalink(), str(child))) return userid = self.db.get_user_id(child.author) if type(child) == Post: base_fname = '%s-%d' % (child.id, url_index) postid = child.id commid = None elif type(child) == Comment: base_fname = '%s-%s-%d' % (child.post_id, child.id, url_index) postid = child.post_id commid = child.id working_dir = path.join(ImageUtils.get_root(), 'content', child.author) # A single URL can contain multiple medias (i.e. albums) try: (media_type, albumname, medias) = ImageUtils.get_urls(url) except Exception, e: self.debug('%s: process_url: unable to get URLs for %s: %s' % (child.author, url, str(e))) if 'domain not supported' in str(e): # Save domain-not-supported URLs to new file user_dir = path.join(ImageUtils.get_root(), 'content', child.author) f = open(path.join(user_dir, 'unsupported.txt'), 'a') f.write(url) f.write('\n') f.flush() f.close() return
def __init__(self): # Single file that all output is written to, to track usage self.exit_if_already_started() self.db = DB() # Database instance log_level = self.db.get_config('log_level', default='user') if log_level == 'none': self.root_log = open(devnull, 'w') else: self.root_log = open(path.join(ImageUtils.get_root(), 'history.log'), 'a') self.logger = self.root_log # Logger used by helper classes self.reddit = Reddit() self.excluded_subs = self.db.get_excluded_subreddits()
def start(self): stale_count = self.db.count('urls', 'pending != 0') if stale_count > 0: print 'MAIN: found %d stale (interrupted) URLs, marking as non-pending...' % stale_count self.db.update('urls', 'pending = 0') self.db.commit() print 'MAIN: starting infinite loop...' already_printed_sleep_msg = False while True: sleep(0.1) while len(self.results) > 0: # self.results is the list of downloaded medias to be added to the DB result = self.results.pop() self.handle_result(result) # Remove recently-completed rips while len(self.to_remove) > 0: (albumid, iindex) = self.to_remove.pop() self.db.delete('urls', 'album_id = ? and i_index = ?', [ albumid, iindex ] ) self.db.commit() try: # Get next URL to retrieve url = self.get_next_url() except Exception, e: if str(e) == 'no URLs found': if not already_printed_sleep_msg: already_printed_sleep_msg = True print 'MAIN: no urls to get, sleeping 500ms' sleep(0.5) else: print 'MAIN: get_next_url(): Exception: %s:\n%s' % (str(e), format_exc()) continue # We have a URL to download & add to DB (url) already_printed_sleep_msg = False # Wait for thread count to drop while len(self.current_threads) >= MAX_THREADS: sleep(0.1) self.current_threads.append(None) # Create new thread to download the media, add to self.results print 'MAIN: %s #%d: launching handler for: %s' % (url['path'], url['i_index'], url['url']) # Create subdirs from main thread to avoid race condition dirname = path.join(ImageUtils.get_root(), 'rips', url['path'], 'thumbs') ImageUtils.create_subdirectories(dirname) args = (url,) t = Thread(target=self.retrieve_result_from_url, args=args) t.start()
def add_existing_album(self, user, oldalbum, oldpath): newalbum = path.join(ImageUtils.get_root(), 'content', user, oldalbum) if path.exists(newalbum): self.debug('album already exists: %s' % newalbum) return (post, comment, imgid) = self.get_post_comment_id(oldalbum) url = 'http://imgur.com/a/%s' % imgid try: album_id = self.add_album(newalbum, user, url, post, comment) except Exception as e: self.debug('add_existing_album: failed: %s' % str(e)) return for image in listdir(oldpath): self.debug('add_existing_album: image=%s' % path.join(oldpath, image)) fakeimage = post if comment != None: fakeimage = '%s-%s' % (fakeimage, comment) fakeimage = '%s_%s' % (fakeimage, image.split('_')[-1]) self.add_existing_image(user, fakeimage, path.join(oldpath, image), subdir=oldalbum, album_id=album_id) # Add post p = Post() p.id = post p.author = user if comment == None: p.url = url p.created = path.getctime(oldpath) p.subreddit = '' p.title = '' try: self.add_post(p, legacy=1) except Exception as e: #self.debug('add_existing_image: %s' % str(e)) pass # Add comment if comment != None: c = Comment() c.id = comment c.post_id = post c.author = user if comment != None: c.body = url p.created = path.getctime(oldpath) try: self.add_comment(c, legacy=1) except Exception as e: #self.debug('add_existing_image: %s' % str(e)) pass
def __init__(self): # Single file that all output is written to, to track usage self.exit_if_already_started() self.db = DB() # Database instance log_level = self.db.get_config('log_level', default='user') if log_level == 'none': self.root_log = open(devnull, 'w') else: self.root_log = open( path.join(ImageUtils.get_root(), 'history.log'), 'a') self.logger = self.root_log # Logger used by helper classes self.reddit = Reddit() self.excluded_subs = self.db.get_excluded_subreddits()
def __init__(self): # Single file that all output is written to, to track usage self.exit_if_already_started() self.root_log = open(path.join(ImageUtils.get_root(), 'history.log'), 'a') self.logger = self.root_log # Logger used by helper classes self.db = DB() # Database instance self.reddit = Reddit() try: (username, password) = self.db.get_credentials('reddit') try: self.reddit.login(username, password) except Exception, e: self.debug('__init__: failed to login to reddit: %s' % str(e)) except Exception, e: self.debug('__init__: failed to get reddit credentials: %s' % str(e))
def retrieve_result_from_url(self, url): # url contains album_id, index, url, type, path, and saveas # TODO logging into dirname/log.txt # Construct base result result = { 'album_id' : url['album_id'], 'i_index' : url['i_index'], 'url' : url['url'], 'valid' : 0, # 'error' : None, # 'type' : url['type'], 'image_name': url['saveas'], 'filesize' : 0, # 'width' : 0, # 'height' : 0, # 'thumb_name': None, # 't_width' : 0, # 't_height' : 0, # 'metadata' : url['metadata'], 'path' : url['path'] } # Get save directory dirname = path.join(ImageUtils.get_root(), 'rips', url['path']) # Generate save path saveas = path.join(dirname, url['saveas']) if path.exists(saveas): print 'THREAD: %s: removing existing file %s' % (url['path'], saveas) remove(saveas) try: meta = self.httpy.get_meta(url['url']) except Exception, e: # Can't get meta? Can't get image! print 'THREAD: %s: failed to get_meta from %s: %s\n%s' % (url['path'], url['url'], str(e), format_exc()) result['error'] = 'failed to get metadata from %s: %s\n%s' % (url['url'], str(e), format_exc()) self.to_remove.append( (url['album_id'], url['i_index'] ) ) self.results.append(result) self.current_threads.pop() return
def process_url(self, url, url_index, child): self.debug('process_url: %s' % url) userid = self.db.get_user_id(child.author) if type(child) == Post: base_fname = '%s-%d' % (child.id, url_index) postid = child.id commid = None elif type(child) == Comment: base_fname = '%s-%s-%d' % (child.post_id, child.id, url_index) postid = child.post_id commid = child.id working_dir = path.join(ImageUtils.get_root(), 'content', child.author) # A single URL can contain multiple medias (i.e. albums) try: (media_type, albumname, medias) = ImageUtils.get_urls(url) except Exception, e: self.debug('%s: process_url: unable to get URLs for %s: %s' % (child.author, url, str(e))) return
def start(self): print "MAIN: starting infinite loop..." already_printed_sleep_msg = False while True: sleep(0.1) if len(self.results) > 0: # self.results is the list of downloaded medias to be added to the DB result = self.results.pop() self.handle_result(result) try: # Get next URL to retrieve url = self.get_next_url() except Exception, e: if str(e) == "no URLs found": if not already_printed_sleep_msg: already_printed_sleep_msg = True print "MAIN: no urls to get, sleeping 500ms" sleep(0.5) else: print "MAIN: get_next_url(): Exception: %s:\n%s" % (str(e), format_exc()) continue # We have a URL to download & add to DB (url) already_printed_sleep_msg = False # Wait for thread count to drop while len(self.current_threads) >= MAX_THREADS: sleep(0.1) self.current_threads.append(None) # Create new thread to download the media, add to self.results print "MAIN: %s #%d: launching handler for: %s" % (url["path"], url["i_index"], url["url"]) # Create subdirs from main thread to avoid race condition dirname = path.join(ImageUtils.get_root(), "rips", url["path"], "thumbs") ImageUtils.create_subdirectories(dirname) args = (url,) t = Thread(target=self.retrieve_result_from_url, args=args) t.start()
def poll_user(self, user): # Create directories if needed user_dir = path.join(ImageUtils.get_root(), 'content', user) ImageUtils.create_subdirectories(user_dir) # Setup logger self.logger = open(path.join(user_dir, 'history.log'), 'a') self.db.logger = self.logger ImageUtils.logger = self.logger self.reddit.logger = self.logger since_id = self.db.get_last_since_id(user) # Get posts/comments for user self.debug('%s: poll_user: since "%s"' % (user, since_id)) try: children = self.reddit.get_user(user, since=since_id) except Exception, e: if '404: Not Found' in str(e): # User is deleted, mark it as such self.debug('%s: poll_user: user is 404, marking as deleted' % user) self.db.mark_as_deleted(user) return self.debug('%s: poll_user: error %s' % (user, str(e))) return
'\n\t' + 'site text primary key, \n\t' + 'username text, \n\t' + 'password text \n\t', 'config' : '\n\t' + 'key text primary key, \n\t' + 'value text \n\t', 'friends' : '\n\t' + 'username text primary key\n\t', } DB_FILE = path.join(ImageUtils.get_root(), 'database.db') class DB: def __init__(self): self.logger = stderr if path.exists(DB_FILE): self.debug('__init__: using database file: %s' % DB_FILE) else: self.debug('__init__: database file (%s) not found, creating...' % DB_FILE) self.conn = None self.conn = sqlite3.connect(DB_FILE) #TODO CHANGE BACK, encoding='utf-8') self.conn.text_factory = lambda x: unicode(x, "utf-8", "ignore") # Don't create tables if not supplied. if SCHEMA != None and SCHEMA != {} and len(SCHEMA) > 0: # Create table for every schema given. for key in SCHEMA:
#!/usr/bin/python from os import listdir, path, walk from DB import DB from ImageUtils import ImageUtils db = DB() root = ImageUtils.get_root() ''' Iterates over existing sets, adds sets to database, attempts to populate DB with information based on filenames: * URL (http://i.imgur.com/<image> * Post ID * Comment ID * Creation time Copies existing set to new directory (/content/), Generates new thumbnails for the sets ''' def populate_db(): for user in listdir(path.join(root, 'users')): userdir = path.join(root, 'users', user) if not path.isdir(userdir): continue for item in listdir(userdir): itempath = path.join(userdir, item) if path.isfile(itempath): # Image #print "image: %s" % itempath db.add_existing_image(user, item, itempath)
# Get media information (width, height, size) try: (width, height) = ImageUtils.get_dimensions(saveas) except Exception, e: # If we cannot process the media file, skip it! self.debug('%s: process_url: #%d %s' % (child.author, media_index + 1, str(e))) continue size = path.getsize(saveas) # Create thumbnail savethumbas = path.join(working_dir, 'thumbs', fname) try: savethumbas = ImageUtils.create_thumbnail(saveas, savethumbas) except Exception, e: savethumbas = path.join(ImageUtils.get_root(), 'images', 'nothumb.png') self.debug( '%s: process_url: failed to create thumb #%d: %s, using default' % (child.author, media_index + 1, str(e))) # Add to DB self.db.add_image(saveas, child.author, media, width, height, size, savethumbas, media_type, album_id, postid, commid) self.db.update_user(child.author) def infinite_loop(self): users = self.db.get_users(new=False) last_user = self.db.get_config('last_user')
def retrieve_result_from_url(self, url): # url contains album_id, index, url, type, path, and saveas # TODO logging into dirname/log.txt # Construct base result result = { "album_id": url["album_id"], "i_index": url["i_index"], "url": url["url"], "valid": 0, # "error": None, # "type": url["type"], "image_name": url["saveas"], "filesize": 0, # "width": 0, # "height": 0, # "thumb_name": None, # "t_width": 0, # "t_height": 0, # "metadata": url["metadata"], "path": url["path"], } # Get save directory dirname = path.join(ImageUtils.get_root(), "rips", url["path"]) # Generate save path saveas = path.join(dirname, url["saveas"]) if path.exists(saveas): print "THREAD: %s: removing existing file %s" % (url["path"], saveas) remove(saveas) meta = self.httpy.get_meta(url["url"]) if "imgur.com" in url["url"] and "Content-length" in meta and meta["Content-length"] == "503": print "THREAD: %s: imgur image was not found (503b) at %s" % (url["path"], url["url"]) result["error"] = "imgur image was not found (503b) at %s" % url["url"] self.results.append(result) self.current_threads.pop() return if "Content-type" in meta and "html" in meta["Content-Type"].lower(): print "THREAD: %s: url returned HTML content-type at %s" % (url["path"], url["url"]) result["error"] = "url returned HTML content-type at %s" % url["url"] self.results.append(result) self.current_threads.pop() return if meta["content-type"].lower().endswith("png"): # image/png result["type"] = "image" if not saveas.lower().endswith("png"): saveas = saveas[: saveas.rfind(".") + 1] + "png" elif meta["content-type"].lower().endswith("jpeg") or meta["content-type"].lower().endswith("jpg"): # image/jpg result["type"] = "image" if not saveas.lower().endswith("jpg"): saveas = saveas[: saveas.rfind(".") + 1] + "jpg" elif meta["content-type"].lower().endswith("gif"): # image/gif result["type"] = "image" if not saveas.lower().endswith("gif"): saveas = saveas[: saveas.rfind(".") + 1] + "gif" elif meta["content-type"].lower().endswith("mp4"): # video/mp4 result["type"] = "video" if not saveas.lower().endswith("mp4"): saveas = saveas[: saveas.rfind(".") + 1] + "mp4" elif meta["content-type"].lower().endswith("flv"): # video/flv result["type"] = "video" if not saveas.lower().endswith("flv"): saveas = saveas[: saveas.rfind(".") + 1] + "flv" elif meta["content-type"].lower().endswith("wmv"): # video/wmv result["type"] = "video" if not saveas.lower().endswith("wmv"): saveas = saveas[: saveas.rfind(".") + 1] + "wmv" result["image_name"] = path.basename(saveas) # Attempt to dowload image at URL try: self.httpy.download(url["url"], saveas) except Exception, e: print "THREAD: %s: failed to download %s to %s: %s\n%s" % ( url["path"], url["url"], saveas, str(e), str(format_exc()), ) result["error"] = "failed to download %s to %s: %s\n%s" % (url["url"], saveas, str(e), str(format_exc())) self.results.append(result) self.current_threads.pop() return
'\n\t' + 'site text primary key, \n\t' + 'username text, \n\t' + 'password text \n\t', 'config' : '\n\t' + 'key text primary key, \n\t' + 'value text \n\t', 'friends' : '\n\t' + 'username text primary key\n\t', } DB_FILE = path.join(ImageUtils.get_root(), 'database.db') class DB: def __init__(self): self.logger = stderr if path.exists(DB_FILE): self.debug('__init__: using database file: %s' % DB_FILE) else: self.debug('__init__: database file (%s) not found, creating...' % DB_FILE) self.conn = None self.conn = sqlite3.connect(DB_FILE) #TODO CHANGE BACK, encoding='utf-8') self.conn.text_factory = lambda x: str(x, "utf-8", "ignore") # Don't create tables if not supplied. if SCHEMA != None and SCHEMA != {} and len(SCHEMA) > 0: # Create table for every schema given. for key in SCHEMA:
def add_existing_image(self, user, oldimage, oldpath, subdir='', album_id=-1): if 'tumblr' in oldpath: # Can't properly handle tumblr links self.debug('cannot properly handle tumblr links; trying anyway') #return if subdir == '' and album_id == -1: self.debug('adding image: %s' % oldpath) # Ensure image is an actual image try: dims = ImageUtils.get_dimensions(oldpath) except: self.debug('failed to load image: %s, skipping' % oldpath) return newimage = path.join(ImageUtils.get_root(), 'content', user, subdir, oldimage) newimage = newimage.replace('.jpeg.jpg', '.jpg') thumbnail = path.join(ImageUtils.get_root(), 'content', user, subdir, 'thumbs', oldimage) thumbnail = thumbnail.replace('.jpeg.jpg', '.jpg') if path.exists(newimage): self.debug('new image already exists: %s' % newimage) return ImageUtils.create_subdirectories(path.join(ImageUtils.get_root(), 'content', user, subdir, 'thumbs')) copy2(oldpath, newimage) try: ImageUtils.create_thumbnail(newimage, thumbnail) except Exception as e: self.debug('failed to create thumbnail: %s' % str(e)) thumbnail = path.join(ImageUtils.get_root(), 'images', 'nothumb.png') (post, comment, imgid) = self.get_post_comment_id(oldimage) url = 'http://i.imgur.com/%s' % imgid dims = ImageUtils.get_dimensions(newimage) size = path.getsize(newimage) try: ImageUtils.create_thumbnail(newimage, thumbnail) except Exception as e: self.debug('add_existing_image: create_thumbnail failed: %s' % str(e)) thumbnail = path.join(ImageUtils.get_root(), 'images', 'nothumb.png') try: self.add_image(newimage, user, url, dims[0], dims[1], size, thumbnail, 'image', album_id, post, comment) except Exception as e: self.debug('add_existing_image: failed: %s' % str(e)) return if subdir == '' and album_id == -1: # Not an album # Add post p = Post() p.id = post p.author = user if comment == None: p.url = url p.created = path.getctime(oldpath) p.subreddit = '' p.title = '' try: self.add_post(p, legacy=1) except Exception as e: self.debug('add_existing_image: create post failed: %s' % str(e)) # Add comment if comment != None: c = Comment() c.id = comment c.post_id = post c.author = user if comment != None: c.body = url p.created = path.getctime(oldpath) try: self.add_comment(c, legacy=1) except Exception as e: self.debug('add_existing_image: create comment failed: %s' % str(e))
#!/usr/bin/python from os import listdir, path, walk from DB import DB from ImageUtils import ImageUtils db = DB() root = ImageUtils.get_root() ''' Iterates over existing sets, adds sets to database, attempts to populate DB with information based on filenames: * URL (http://i.imgur.com/<image> * Post ID * Comment ID * Creation time Copies existing set to new directory (/content/), Generates new thumbnails for the sets ''' def populate_db(): for user in listdir(path.join(root, 'users')): userdir = path.join(root, 'users', user) if not path.isdir(userdir): continue for item in listdir(userdir): itempath = path.join(userdir, item) if path.isfile(itempath): # Image #print "image: %s" % itempath db.add_existing_image(user, item, itempath) elif path.isdir(itempath):
self.debug('%s: process_url: downloading #%d %s' % (child.author, media_index + 1, media)) headers = { 'Referer' : url } ImageUtils.httpy.download(media, saveas, headers=headers) if path.getsize(saveas) == 503: raise Exception('503b = removed') except Exception, e: self.debug('%s: process_url: failed to download #%d: %s, moving on' % (child.author, media_index + 1, str(e))) continue # Get media information (width, height, thumbsaveas) if media_type == 'audio': # Audio files don't have width/height/thumbnail width = height = 0 savethumbas = path.join(ImageUtils.get_root(), 'images', 'audio.png') else: try: (width, height) = ImageUtils.get_dimensions(saveas) except Exception, e: # If we cannot process the media file, skip it! self.debug('%s: process_url: #%d %s' % (child.author, media_index + 1, str(e))) continue # Create thumbnail if needed if self.db.get_config('save_thumbnails', 'true') == 'false': savethumbas = path.join(ImageUtils.get_root(), 'images', 'nothumb.png') else: savethumbas = path.join(working_dir, 'thumbs', fname) try: savethumbas = ImageUtils.create_thumbnail(saveas, savethumbas)