def __init__(self,cache="cache", debug=False): self.cache = cache self.store = {} self.debug = debug self.grabber = URLTimeout(debug) self.default_timeout = self.grabber.getTimeout() if memcache == None and not os.path.exists(cache): os.mkdir(cache)
class Cache: def __init__(self,cache="cache", debug=False): self.cache = cache self.store = {} self.debug = debug self.grabber = URLTimeout(debug) self.default_timeout = self.grabber.getTimeout() if memcache == None and not os.path.exists(cache): os.mkdir(cache) def __load__(self,url,ref, data = None): hash = URLObject.md5(url,ref,data) if self.store.has_key(hash): return if memcache: get = memcache.get(hash) if get != None: self.store[hash] = get self.store[hash].data = decompress(self.store[hash].data) return f = hash+".cache" if f in os.listdir(self.cache): try: if self.debug: print "loading",os.path.join(self.cache,f) old = load(file(os.path.join(self.cache,f))) old.seek(0) if len(old.readall())==0: raise EOFError() self.store[old.hash()] = old if self.debug: print "loaded",old.url,old.ref,old.hash() if(old.hash()!=f[:-len(".cache")]): raise Exception,"md5 problem!" except (EOFError,ValueError,UnpicklingError,ImportError): # ignore and discard if self.debug: print "discarded",f,sys.exc_info() os.unlink(os.path.join(self.cache,f)) def auth(self,user,password): self.grabber.auth(user,password) def _dump(self,url,ref,data): self.__load__(url,ref,data) hash = URLObject.md5(url,ref,data) if self.store.has_key(hash): if self.debug: print "dumping",url,ref,hash if memcache!=None: self.store[hash].data = compress(self.store[hash].data) memcache.set(hash, self.store[hash]) else: f = file(os.path.join(self.cache,hash+".cache"),'wb') dump(self.store[hash],f) f.close() else: raise Exception, "We never got that URL! ("+url+")" user_agent = None def get(self, url, ref=None, max_age=3600, data = None, headers={}, timeout=None, ignore_move = False): # 3600 seconds = 60 minutes if timeout == None: timeout = self.default_timeout if self.debug: print "Grabbing",url self.__load__(url,ref) hash = URLObject.md5(url,ref,data) if self.user_agent!=None: headers["User-Agent"] = self.user_agent now = time.time() if self.store.has_key(hash): old = self.store[hash] if self.debug: print "time diff",time.time()-old.checked if len(old.headers.headers)>0: # non-local file if max_age==-1 or now-old.checked < max_age: old.seek(0) old.used = now self._dump(old.url,old.ref,old.postData) return old if old.info().get("Last-Modified")!=None: headers["If-Modified-Since"] = old.info().get("Last-Modified") if old.info().get("ETag")!=None: headers["If-None-Match"] = old.info().get("ETag") else: try: if os.stat(url[len("file://"):])[ST_MTIME] <= old.checked: old.checked = old.used = now self._dump(old.url,old.ref,old.postData) return old except OSError,e: raise URLTimeoutError, (str(e),url) else: