示例#1
0
 def reduce(iter, params):
     partitions = params['partitions']
     name = params['name']
     discodb = DiscoDB(kvgroup(iter))
     try:
         # figure out what partition we are in
         key = discodb.keys().__iter__().next()
         partition = util.default_partition(key, partitions, params)
         discodb.dump(open(filename(name, partition), 'w'))
         yield partition, None
     except StopIteration:
         # no keys, nothing to write
         pass
示例#2
0
文件: test.py 项目: Dieterbe/disco
class TestSerializationProtocol(unittest.TestCase):
    numkeys = 10000

    def setUp(self):
        self.discodb = DiscoDB(k_vs_iter(self.numkeys))

    def test_dumps_loads(self):
        dbuffer = self.discodb.dumps()
        self.assertEquals(dbuffer, DiscoDB.loads(dbuffer).dumps())

    def test_dump_load(self):
        from tempfile import NamedTemporaryFile
        handle = NamedTemporaryFile()
        self.discodb.dump(handle)
        handle.seek(0)
        discodb = DiscoDB.load(handle)
        self.assertEquals(discodb.dumps(), self.discodb.dumps())
示例#3
0
def test_leak():
    while True:
        d = DiscoDB(zip(letters, ['abc'] * 1000))
        t = len(d.query('a'))
        t = len(d['b'])
        t = 'd' in d
        t = d.dumps()
        t = DiscoDB.loads(t)
        t = d.dump(open('/tmp/discodb', 'w'))
        t = DiscoDB.load(open('/tmp/discodb'))
        for k in d.keys():
            for v in d.values():
                t = k == v
示例#4
0
文件: perf.py 项目: rch/discodb
def test_leak():
    while True:
        d = DiscoDB(zip(letters, ["abc"] * 1000))
        t = len(d.query("a"))
        t = len(d["b"])
        t = "d" in d
        t = d.dumps()
        t = DiscoDB.loads(t)
        t = d.dump(open("/tmp/discodb", "w"))
        t = DiscoDB.load(open("/tmp/discodb"))
        for k in d.keys():
            for v in d.values():
                t = k == v
示例#5
0
文件: perf.py 项目: Dieterbe/disco
def test_leak():
    while True:
        d = DiscoDB(zip(letters, ['abc'] * 1000))
        t = len(d.query('a'))
        t = len(d['b'])
        t = 'd' in d
        t = d.dumps()
        t = DiscoDB.loads(t)
        t = d.dump(open('/tmp/discodb', 'w'))
        t = DiscoDB.load(open('/tmp/discodb'))
        for k in d.keys():
            for v in d.values():
                t = k == v
示例#6
0
 def create_db(self, name, data):
     db_path = os.path.join(os.environ["DATA_DB_PATH"], name + ".db")
     data = DiscoDB(data)
     data.dump(open(db_path, "w"))
     return db_path
示例#7
0
#!/usr/bin/python

import sys
from discodb import DiscoDB


def read_data(instream):
    for line in instream:
        try:
            (key, value) = line.rstrip().split("\t")
            yield (key, value)
        except:
            pass


db = DiscoDB(
    read_data(
        open(sys.argv[1], 'r') if (
            len(sys.argv) > 1 and sys.argv[1] != '-') else sys.stdin))

db.dump(file(sys.argv[2] if len(sys.argv) > 2 else 'out.discodb', 'w'))
示例#8
0
 def create_db(self, name, data):
     db_path = os.path.join(os.environ['DATA_DB_PATH'], name + '.db')
     data = DiscoDB(data)
     data.dump(open(db_path, 'w'))
     return db_path
示例#9
0
 def create_db(self, name, data):
   db_path = os.path.join(os.environ['DATA_DB_PATH'], name + '.db')
   data = DiscoDB(data)
   data.dump(open(db_path, 'w'))
   return db_path
示例#10
0
    for g2 in tags_srt_sub:

        x = db2.query(Q.parse(g1 + " & " + g2))
        g_lens.append(len(x))
    lens_ttls.append(g_lens)
    print(g1)
t2 = time.time()
# 500: 48 sec: 5.2k/sec
# 1k: 182: 5.5k/sec
# 2k: 722: 5.5k/sec


# ** writing/loading

fo = open('/home/johannes/Dropbox/gsss/thesis/anls/try1/add_data/db.disco', 'a')
    db.dump(fo)
    fo.close()

with open('/home/johannes/Dropbox/gsss/thesis/anls/try1/add_data/db.disco', 'r') as fi:
    dbsx = DiscoDB.load(fi)


# ** multiprocessing theory
from multiprocessing import Process

def f(name):
    print('hello', name)
    for i in range(5):
        print(i)
        time.sleep(1)