def json_parse_url(url): """Trivial helper to avoid copy-pasting same code all over""" if debug: print "Fetching (JSON) %s" % url try: fp = urllib_cached.urlopen(url) parsed = json.load(fp) fp.close() except Exception, e: print "Got exception %s" % e return None
def json_parse_url(url): """Trivial helper to avoid copy-pasting same code all over""" if debug: print "Fetching (JSON) %s" % url try: fp = urllib_cached.urlopen(url) parsed = json.load(fp) fp.close() except Exception,e: print "Got exception %s" % e return None
def fill_image_urls(message_id): """Loads the object, then tries to figure out the web URL for it and scrape said url for the images""" if not can_scrape: return False if ( not storage.in_cache_byid(message_id) and not fetcherparser.recursive_fetch_message(message_id)): return False obj = fetcherparser.fetch_message(message_id) # There is no image, don't bother... if ( not obj.has_key('image_url') or not obj['image_url']): return False # Already processed this one for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']: if obj.has_key(prop): return True # Try to figure the shortest way to the canonical message HTML view url = None if obj.has_key('in_reply_to_status_url'): url = obj['in_reply_to_status_url'] # This is a redirect but urllib has no problem following it if ( not url and obj.has_key('channel') and obj['channel']): url = "http://www.qaiku.com/channels/show/%s/view/%s/" % (obj['channel'], obj['id']) # Channel message if ( not url and obj.has_key('user') and obj['user'].has_key('url') and obj['user']['url']): url = "%s/show/%s/" % (obj['user']['url'], obj['id']) # non-Channel message # Are there other possible combinations ? if not url: return False if debug: print "Soupifying %s" % url try: soup = BeautifulSoup(urllib_cached.urlopen(url)) except Exception,e: print "Got exception %s" % e return False
def fill_image_urls(message_id): """Loads the object, then tries to figure out the web URL for it and scrape said url for the images""" if not can_scrape: return False if (not storage.in_cache_byid(message_id) and not fetcherparser.recursive_fetch_message(message_id)): return False obj = fetcherparser.fetch_message(message_id) # There is no image, don't bother... if (not obj.has_key('image_url') or not obj['image_url']): return False # Already processed this one for prop in ['QaikuBackup_image_url_view', 'QaikuBackup_image_url_orig']: if obj.has_key(prop): return True # Try to figure the shortest way to the canonical message HTML view url = None if obj.has_key('in_reply_to_status_url'): url = obj[ 'in_reply_to_status_url'] # This is a redirect but urllib has no problem following it if (not url and obj.has_key('channel') and obj['channel']): url = "http://www.qaiku.com/channels/show/%s/view/%s/" % ( obj['channel'], obj['id']) # Channel message if (not url and obj.has_key('user') and obj['user'].has_key('url') and obj['user']['url']): url = "%s/show/%s/" % (obj['user']['url'], obj['id'] ) # non-Channel message # Are there other possible combinations ? if not url: return False if debug: print "Soupifying %s" % url try: soup = BeautifulSoup(urllib_cached.urlopen(url)) except Exception, e: print "Got exception %s" % e return False
if os.path.isfile(local_path): # Make sure the file has sane amount of data... if (os.stat(local_path).st_size < 16): print "ERR: Local file %s is empty, removing" % local_path os.unlink(local_path) else: return local_path # Create the container dir if it's not there if not os.path.isdir(os.path.dirname(local_path)): os.makedirs(os.path.dirname(local_path)) if debug: print "Fetching (BIN) %s to %s" % (url, local_path) fp_from = None fp_to = None try: fp_from = urllib_cached.urlopen(url) fp_to = open(local_path, 'wb') # TODO: use a sensibly sized buffer ? fp_to.write(fp_from.read()) fp_from.close() fp_to.close() except Exception, e: print "Got exception %s" % e if fp_from: fp_from.close() if fp_to: fp_to.close() if os.path.isfile(local_path): os.unlink(local_path) return None return local_path
if os.path.isfile(local_path): # Make sure the file has sane amount of data... if (os.stat(local_path).st_size < 16): print "ERR: Local file %s is empty, removing" % local_path os.unlink(local_path) else: return local_path # Create the container dir if it's not there if not os.path.isdir(os.path.dirname(local_path)): os.makedirs(os.path.dirname(local_path)) if debug: print "Fetching (BIN) %s to %s" % (url, local_path) fp_from = None fp_to = None try: fp_from = urllib_cached.urlopen(url) fp_to = open(local_path, 'wb') # TODO: use a sensibly sized buffer ? fp_to.write(fp_from.read()) fp_from.close() fp_to.close() except Exception,e: print "Got exception %s" % e if fp_from: fp_from.close() if fp_to: fp_to.close() if os.path.isfile(local_path): os.unlink(local_path) return None return local_path