import wsgiref import urllib2 import os,sys import timeout_urllib2; timeout_urllib2.sethttptimeout(4.0) from sane_re import * import tchelpers os.system("mkdir -p httpcache.tc") data_by_url = tchelpers.open("httpcache.tc/data_by_url.tch") last_update = tchelpers.open("httpcache.tc/last_update.tch") def application(environ, start_response): response_headers = [('Content-type', 'text/plain')] #start_response(status, response_headers) url = wsgiref.util.request_uri(environ) print url url = _S(url)['/(http.*)', 1] url = url.replace('http%3A//','http://') print url if url in data_by_url: print "CACHE HIT" data = data_by_url[url] else: try: print "URL FETCH" f = urllib2.urlopen(url) data = f.read() data_by_url[url] = data except (urllib2.URLError, timeout_urllib2.Error, socket.error), e:
def __init__(self, filename="background_model.tc", readonly=True): os.system("mkdir -p %s" % filename) flag = 'r' if readonly else 'c' self.counts = TokyoNgramProxy(tchelpers.open("%s/ngram_counts.hdb" % filename, flag)) self.info = KVIntProxy(tchelpers.open_tc("%s/info.hdb" % filename, flag))