def load(cls, file): """a deserialized instance of %s from file.""" % cls if isinstance(file, basestring): file = open(file, 'r', 0) header = file.readline() offset = len(header) version, metalen = header.strip().split(':') metadb = DiscoDB.load(file, offset) offset += int(metalen) datadb = DiscoDB.load(file, offset) return cls(datadb, metadb)
def test_dump_load(self): from tempfile import NamedTemporaryFile handle = NamedTemporaryFile() self.discodb.dump(handle) handle.seek(0) discodb = DiscoDB.load(handle) self.assertEquals(discodb.dumps(), self.discodb.dumps())
def maybe_method(datadir, rest, method, xargs=None): if rest.find('/%s/' % method) > 0: file, arg = rest.split('/%s/' % method, 1) path = os.path.join(datadir, file) if os.path.isfile(path): bound_method = getattr(DiscoDB.load(open(path)), method) return bound_method(xargs(arg)) if xargs else bound_method() raise NotMethod(method)
def __init__(self, **tables): # {'name': DiscoDB} self._tables = {} for name, path_or_db in tables.items(): if isinstance(path_or_db, DiscoDB): db = path_or_db else: db = DiscoDB.load(open(path_or_db)) self._tables[name] = DiscoTable(self, name, db)
def load(name): dir = dirname(name) with open(os.path.join(dir, 'partitions')) as file: partitions = int(file.read()) discodbs = [DiscoDB()] * partitions for partition in xrange(0,partitions): path = filename(name, partition) if os.path.exists(path): discodbs[partition] = DiscoDB.load(open(path)) dbs[name] = discodbs
def test_leak(): while True: d = DiscoDB(zip(letters, ['abc'] * 1000)) t = len(d.query('a')) t = len(d['b']) t = 'd' in d t = d.dumps() t = DiscoDB.loads(t) t = d.dump(open('/tmp/discodb', 'w')) t = DiscoDB.load(open('/tmp/discodb')) for k in d.keys(): for v in d.values(): t = k == v
def test_leak(): while True: d = DiscoDB(zip(letters, ["abc"] * 1000)) t = len(d.query("a")) t = len(d["b"]) t = "d" in d t = d.dumps() t = DiscoDB.loads(t) t = d.dump(open("/tmp/discodb", "w")) t = DiscoDB.load(open("/tmp/discodb")) for k in d.keys(): for v in d.values(): t = k == v
def test_leak(): while True: d = DiscoDB(zip(letters, ['abc'] * 1000)) t = len(d.query('a')) t = len(d['b']) t = 'd' in d t = d.dumps() t = DiscoDB.loads(t) t = d.dump(open('/tmp/discodb', 'w')) t = DiscoDB.load(open('/tmp/discodb')) for k in d.keys(): for v in d.values(): t = k == v
def scan_database_dir(state): db_path = os.environ["DATA_DB_PATH"] for fname in (os.path.join(db_path, f) for f in os.listdir(db_path)): if fname not in state["dbs"] and os.path.isfile(fname): try: state["dbs"][fname] = DiscoDB.load(open(fname)) except DiscoDBError: # maybe a corrupt discodb, nuke it the sync # process should fetch a new one later on os.remove(fname) logger.exception("Unable to open %s", fname) if state["dbs"]: state["cache_time"] = time.time() return state
def scan_database_dir(state): db_path = os.environ['DATA_DB_PATH'] for fname in (os.path.join(db_path, f) for f in os.listdir(db_path)): if fname not in state['dbs'] and os.path.isfile(fname): try: state['dbs'][fname] = DiscoDB.load(open(fname)) except DiscoDBError: # maybe a corrupt discodb, nuke it the sync # process should fetch a new one later on os.remove(fname) logger.exception('Unable to open %s', fname) if state['dbs']: state['cache_time'] = time.time() return state
def input_stream(fd, size, url, params): scheme, netloc, rest = util.urlsplit(url) if netloc[0] == Task.netloc[0]: path, rest = rest.split('!', 1) if '!' in rest else (rest, '') Task.discodb = DiscoDB.load(open(os.path.join(Task.root, path))) if rest: method, arg = rest.split('/', 1) if method == 'query': if hasattr(params, 'discodb_query'): return Task.discodb.query(params.discodb_query), size, url return Task.discodb.query(Q.urlscan(arg)), size, url return getattr(Task.discodb, method)(), size, url return Task.discodb, size, url raise core.DiscoError("Scheme 'discodb' can only be used with force_local=True")
def input_stream(fd, size, url, params): import os from disco import util from disco.comm import download from discodb import DiscoDB, Q scheme, netloc, rest = util.urlsplit(url) path, rest = rest.split('!', 1) if '!' in rest else (rest, '') if netloc[0] == Task.netloc[0]: discodb = DiscoDB.load(open(os.path.join(Task.root, path))) else: discodb = DiscoDB.loads(download('disco://%s/%s' % (netloc, path))) if rest: method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None) method = getattr(discodb, method_name) if method_name in ('metaquery', 'query'): return method(Q.urlscan(arg)), size, url return method(*filter(None, arg)), size, url return discodb, size, url
def input_stream(fd, size, url, params): import os from disco import util from disco.comm import download from discodb import DiscoDB, Q scheme, netloc, rest = util.urlsplit(url) path, rest = rest.split('!', 1) if '!' in rest else (rest, '') if netloc[0] == Task.netloc[0]: discodb = DiscoDB.load(open(os.path.join(Task.root, path))) else: discodb = DiscoDB.loads(download('disco://%s/%s' % (netloc, path))) if rest: method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None) method = getattr(discodb, method_name) if method_name in ('metaquery', 'query'): return method(Q.urlscan(arg)), size, url return method(*filter(None, arg)), size, url return discodb, size, url
def Open(url, task=None): if task: disco_data = task.disco_data ddfs_data = task.ddfs_data else: from disco.settings import DiscoSettings settings = DiscoSettings() disco_data = settings['DISCO_DATA'] ddfs_data = settings['DDFS_DATA'] scheme, netloc, rest = util.urlsplit(url) path, rest = rest.split('!', 1) if '!' in rest else (rest, '') discodb = DiscoDB.load(open(util.localize(path, disco_data=disco_data, ddfs_data=ddfs_data))) if rest: method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None) method = getattr(discodb, method_name) if method_name in ('metaquery', 'query'): return method(Q.urlscan(arg)) return method(*filter(None, arg)) return discodb
def Open(url, task=None): if task: disco_data = task.disco_data ddfs_data = task.ddfs_data else: from disco.settings import DiscoSettings settings = DiscoSettings() disco_data = settings['DISCO_DATA'] ddfs_data = settings['DDFS_DATA'] scheme, netloc, rest = util.urlsplit(url) path, rest = rest.split('!', 1) if '!' in rest else (rest, '') discodb = DiscoDB.load( open(util.localize(path, disco_data=disco_data, ddfs_data=ddfs_data))) if rest: method_name, arg = rest.split('/', 1) if '/' in rest else (rest, None) method = getattr(discodb, method_name) if method_name in ('metaquery', 'query'): return method(Q.urlscan(arg)) return method(*filter(None, arg)) return discodb
def open_db(): if len(sys.argv > 1): fname = sys.arv[1] else: # fetch some data from s3 conn = boto.s3_connectection( os.environ['AWS_KEY'], os.environ['AWS_SECRET'] ) bucket = conn.get_bucket('com.mozillalabs.blink') bucket = conn.bucket('blink') item = bucket.list('data/').next() fname = os.path.join( os.environ['DATA_DB_PATH'], str(item.key) ) if not os.path.exists(fname): item.get_contents_to_path(fname) app.db = DiscoDB.load(open(fname))
file, arg = rest.split('/%s/' % method, 1) path = os.path.join(datadir, file) if os.path.isfile(path): bound_method = getattr(DiscoDB.load(open(path)), method) return bound_method(xargs(arg)) if xargs else bound_method() raise NotMethod(method) def input_stream(fd, size, url, params): scheme, rest = url.split('://', 1) host, rest = rest.split('/', 1) if hasattr(params, "discodb_query"): query = lambda x: params.discodb_query else: query = Q.urlscan if host == Task.host or Task.has_flag("resultfs"): datadir = os.path.join(Task.root, "data") try: return maybe_method(datadir, rest, 'query', xargs=query), size, params except NotMethod, e: pass for method in ('keys', 'values'): try: return maybe_method(datadir, rest, method), size, params except NotMethod, e: pass return DiscoDB.load(open(os.path.join(datadir, rest))), size, params raise core.DiscoError("Scheme 'discodb' can only be used with force_local=True")
#!/usr/bin/python import sys from discodb import DiscoDB db = DiscoDB.load(file(sys.argv[1], 'r')) for key in map(str.rstrip, sys.stdin): inq = db.get(key) if inq and len(inq) > 0: print iter(inq).next()
lens_ttls.append(g_lens) print(g1) t2 = time.time() # 500: 48 sec: 5.2k/sec # 1k: 182: 5.5k/sec # 2k: 722: 5.5k/sec # ** writing/loading fo = open('/home/johannes/Dropbox/gsss/thesis/anls/try1/add_data/db.disco', 'a') db.dump(fo) fo.close() with open('/home/johannes/Dropbox/gsss/thesis/anls/try1/add_data/db.disco', 'r') as fi: dbsx = DiscoDB.load(fi) # ** multiprocessing theory from multiprocessing import Process def f(name): print('hello', name) for i in range(5): print(i) time.sleep(1) if __name__ == '__main__': p = Process(target=f, args=('bob',)) p.start() p.join()