def update_albums(self): # Get existing statuses html_output = '' # Iterate over rippers rippers = list(SiteBase.iter_rippers()) for (index, ripper) in enumerate(rippers): host = ripper.get_host() url = ripper.get_sample_url() try: print 'testing %s ripper...' % host result = ripper.test() ''' # For testing the UI if index % 3 == 0: result = None elif index % 3 == 1: result = 'this site does not work right' else: raise Exception('this ripper REALLY does not work right') ''' # test(): # 1. Throws exception if something really bad happens (can't access site) # 2. Returns error message (str) if album output isn't as expected # 3. Returns None if it works as expected available = int(result == None) message = result except Exception, e: available = -1 message = str(e) print e print format_exc() print host, url, available, message html_output += self.host_html(host, url, available, message)
def update_albums(self): # Get existing statuses now = timegm(gmtime()) cursor = self.db.conn.cursor() curexec = cursor.execute('select host, checked, available, message from sites') stored = curexec.fetchall() cursor.close() html_output = '' # Iterate over rippers, store new values for DB in 'insertmany' insertmany = [] rippers = list(SiteBase.iter_rippers()) ''' if len(rippers) % 3 != 0: rippers = rippers[0:-(len(rippers) % 3)] ''' for (index, ripper) in enumerate(rippers): host = ripper.get_host() url = ripper.get_sample_url() need_to_test = True # Find previous status / info oldhost = oldchecked = oldavailable = oldmessage = None for (oldhost, oldchecked, oldavailable, oldmessage) in stored: if oldhost == host: if now - oldchecked > SECONDS_BETWEEN_CHECKS: need_to_test = False break if need_to_test: checked = now try: print 'testing %s ripper...' % host result = ripper.test() ''' # For testing the UI if index % 3 == 0: result = None elif index % 3 == 1: result = 'this site does not work right' else: raise Exception('this ripper REALLY does not work right') ''' # test(): # 1. Throws exception if something really bad happens (can't access site) # 2. Returns error message (str) if album output isn't as expected # 3. Returns None if it works as expected available = int(result == None) message = result except Exception, e: available = -1 message = str(e) insertmany.append( (host, available, message, checked) ) else: available = oldavailable message = oldmessage checked = oldchecked print host, url, available, message, checked html_output += self.host_html(host, url, available, message, checked)
def get_album_name(self): url = self.url url = url[url.find('reddit.com/')+len('reddit.com/'):] url = url.replace('.json', '') url = url.replace('/?', '?') restrict_sr = True extra = '' if '?t=' in url: # Get top sort extra = url[url.find('?t=')+3:] elif '?sort=' in url: # Get sort extra = url[url.find('?sort=')+6:] elif '?q=' in url: # Get query extra = url[url.find('?q=')+3:] restrict_sr = 'restrict_sr=on' in url if '&' in extra: extra = extra.split('&')[0] if '?' in url: url = url.split('?')[0] if '#' in url: url = url.split('#')[0] albumname = [] after_reddit = False fields = url.split('/') if len(fields) > 4: fields = fields[0:4] for i in xrange(0, len(fields)): if i >= len(fields): break if fields[i] == 'user': fields[i] = 'u' if fields[i] == 'comments': fields[i] = 'c' if not restrict_sr and fields[i] == 'r': fields.pop(i) else: albumname.append(SiteBase.fs_safe(fields[i])) if extra != '': albumname.append(SiteBase.fs_safe(extra)) return '_'.join(albumname)
def get_album_name(self): url = self.url url = url[url.find('reddit.com/') + len('reddit.com/'):] url = url.replace('.json', '') url = url.replace('/?', '?') restrict_sr = True extra = '' if '?t=' in url: # Get top sort extra = url[url.find('?t=') + 3:] elif '?sort=' in url: # Get sort extra = url[url.find('?sort=') + 6:] elif '?q=' in url: # Get query extra = url[url.find('?q=') + 3:] restrict_sr = 'restrict_sr=on' in url if '&' in extra: extra = extra.split('&')[0] if '?' in url: url = url.split('?')[0] if '#' in url: url = url.split('#')[0] albumname = [] after_reddit = False fields = url.split('/') if len(fields) > 4: fields = fields[0:4] for i in xrange(0, len(fields)): if i >= len(fields): break if fields[i] == 'user': fields[i] = 'u' if fields[i] == 'comments': fields[i] = 'c' if not restrict_sr and fields[i] == 'r': fields.pop(i) else: albumname.append(SiteBase.fs_safe(fields[i])) if extra != '': albumname.append(SiteBase.fs_safe(extra)) return '_'.join(albumname)
def start(self): ''' Overriding SiteBase's start() method for unique ripping logic ''' # We need a lot of libraries from ImageUtils import ImageUtils from calendar import timegm from shutil import copy2, rmtree from time import gmtime from os import path, walk, environ, getcwd from json import loads savedir = path.join('rips', self.path) if getcwd().endswith('py'): savedir = path.join('..', savedir) if self.album_exists: # Don't re-rip an album. Return info about existing album. return { 'warning' : 'album already exists', 'album_id' : self.album_id, 'album' : self.album_name, 'url' : self.url, 'host' : self.get_host(), 'path' : self.path, 'count' : self.db.count('medias', 'album_id = ?', [self.album_id]), 'pending' : self.db.count('urls', 'album_id = ?', [self.album_id]) } user = self.url.split(':')[-1] # Search for username (with proper case) on site gwapi = self.db.get_config('gw_api') if gwapi == None: raise Exception('unable to rip gonewild albums: gw_api is null') r = self.httpy.get('%s?method=search_user&user=%s' % (gwapi, user)) json = loads(r) found = False for jsonuser in json['users']: if jsonuser.lower() == user.lower(): found = True user = jsonuser break gwroot = self.db.get_config('gw_root') if gwroot == None: raise Exception('unable to rip gonewild albums: gw_root is null') userroot = path.join(gwroot, user) # Check if we can actually rip this user if not found or not path.exists(userroot): return { 'error' : 'unable to rip user (not archived)' } # Create subdirs ImageUtils.create_subdirectories(path.join(savedir, 'thumbs')) # Copy images to /rips/, get values that need to be inserted into db (insertmany) insertmany = [] already_got = [] filesize = 0 for root, subdirs, files in walk(userroot): if root.endswith('thumbs'): continue for filename in sorted(files): f = path.join(root, filename) n = filename if not root.endswith(userroot): # It's a subidr, save the file accordingly n = '%s_%s' % (root[root.rfind('/')+1:], filename) # Avoid duplicates no_post = n[n.rfind('_')+1:] if no_post in already_got: continue already_got.append(no_post) n = '%03d_%s' % (len(insertmany) + 1, n) saveas = path.join(savedir, n) # Copy & get size try: copy2(f, saveas) (width, height) = ImageUtils.get_dimensions(saveas) except Exception, e: # image can't be parsed, probably corrupt. move on. continue # Create thumbnail tsaveas = path.join(savedir, 'thumbs', n) try: (tsaveas, twidth, theight) = ImageUtils.create_thumbnail(saveas, tsaveas) except Exception, e: # Failed to create thumb tsaveas = '/'.join(['ui', 'images', 'nothumb.png']) twidth = theight = 160 filesize += path.getsize(saveas) # Add to list of values to insert into DB insertmany.append( [ self.album_id, # album_id, currently None len(insertmany) + 1, # i_index '', # url TODO 1, # valid None, # error SiteBase.get_type(saveas), # type n, # image_name width, # img width height, # img height path.getsize(saveas), # filesize path.basename(tsaveas), # thumb_name twidth, # thumb width theight, # thumb height None # metadata ] )
def get_album_name(self): album = self.url.split('/')[-2] return SiteBase.fs_safe(album)
def get_album_name(self): url = self.url.replace('http://', '').replace('https://', '') album = '-'.join(url.split('/')[1:]) return SiteBase.fs_safe(album)
def get_album_name(self): album = self.url.split('/')[-1] return SiteBase.fs_safe(album)
def get_album_name(self): if self.twitter_user != None: return self.twitter_user elif self.twitter_search != None: return 'search_%s' % SiteBase.fs_safe(self.twitter_search) raise Exception('url was neither a twitter user nor a twitter search')