示例#1
0
def run_disco(limit):
        results = disco.job(sys.argv[1], "test_sort",
                                tserver.makeurl([""] * int(1e3)),
                                fun_map, reduce = fun_reduce, nr_reduces = 50,
                                sort = True, mem_sort_limit = limit)
        k = len(list(disco.result_iterator(results)))
        if k != int(1e5): 
                raise "not enough results: Got %d, expected %d" % (k, 1e5)
示例#2
0
def test():
        num = sum(x['max_workers'] for x in disco.nodeinfo()['available'])
        inputs = range(num * 2)
        job = disco.new_job(
                name = "test_missingnode",
                map = fun_map,
                input = tserver.makeurl(inputs))
        results = job.wait()
        s = sum(int(k) for k, v in result_iterator(results))
        correct = sum(range(num * 2))
        if s != correct:
                raise Exception("Invalid result. Got %d, expected %d" %\
                        (s, correct))
        job.purge()
示例#3
0
def run_disco(limit, name):
        job = disco.new_job(
                        name = "test_sort_%s" % name,
                        input = tserver.makeurl([""] * int(100)),
                        map = fun_map,
                        reduce = fun_reduce,
                        nr_reduces = 1,
                        sort = True,
                        mem_sort_limit = limit)

        ANS = dict((str(x), True)\
                for x in list(string.ascii_lowercase) + range(10))

        for k, v in result_iterator(job.wait()):
                if v != "1000":
                        raise Exception("Incorrect result: "\
                                "Expected 1000, got %s" % v)
                del ANS[base64.decodestring(k)]
        if ANS:
                raise Exception("Missing keys: %s" % " ".join(ANS.keys()))
        job.purge()
示例#4
0
ANS = "1028380578493512611198383005758052057919386757620401"\
      "58350002406688858214958513887550465113168573010369619140625"

def data_gen(path):
        return "\n".join([path[1:]] * 10)

def fun_map(e, params):
        return [('=' + e, e)]

def fun_reduce(iter, out, params):
        s = 1
        for k, v in iter:
                if k != "=" + v:
                        raise Exception("Corrupted key")
                s *= int(v)
        out.add("result", s)

tserver.run_server(data_gen)

inputs = [3, 5, 7, 11, 13, 17, 19, 23, 29, 31]
results = disco.job(sys.argv[1], "test_simple", tserver.makeurl(inputs),
                fun_map, 
		reduce = fun_reduce, 
		nr_reduces = 1,
		sort = False)

if list(disco.result_iterator(results)) != [("result", ANS)]:
        raise Exception("Invalid answer")

print "ok"
示例#5
0
def fun_map(e, params):
        return [(w, 1) for w in re.sub("\W", " ", e).lower().split()]

def fun_reduce(iter, out, params):
        s = {}
        for k, v in iter:
                if k in s:
                        s[k] += int(v)
                else:
                        s[k] = int(v)
        for k, v in s.iteritems():
                out.add(k, v)

tserver.run_server(data_gen)
results = disco.job(sys.argv[1], "test_50k", tserver.makeurl([""] * int(5e4)),
                       fun_map, reduce = fun_reduce, nr_reduces = 300,
                       sort = False)

ANS = {"gutta": int(5e6), "cavat": int(1e7), "capidem": int(5e6)}
i = 0
for key, value in disco.result_iterator(results):
        i += 1
        if ANS[key] == int(value):
                print "Correct: %s %s" % (key, value)
        else:
                raise "Results don't match"
if i != 3:
        raise "Too few results"

disco.Disco(sys.argv[1]).purge(disco.util.jobname(results[0]))
示例#6
0
def fun_map3(e, params):
        fail

def fun_map4(e, params):
        time.sleep(4)
        return []

tserver.run_server(data_gen)
disco = Disco(sys.argv[1])

jobs = []
for i, m in enumerate([fun_map1, fun_map2, fun_map3, fun_map4]):
        jobs.append(disco.new_job(
                name = "test_waitmany_%d" % (i + 1),
                input = tserver.makeurl([""] * 5),
                map = m))

res = []
while jobs:
        cont = False
        ready, jobs = disco.results(jobs, timeout = 2000)
        res += ready

for n, r in res:
        if n.startswith("test_waitmany_3"):
                if r[0] != "dead":
                        raise Exception("Invalid job status: %s" % n)
        elif r[0] != "ready":
                raise Exception("Invalid job status: %s" % n)
        disco.purge(n)
示例#7
0
        return "\n".join(ani)

def fun_map(e, params):
        if type(e) == tuple:
                return [(e[0] + params['suffix'], int(e[1]) + 1)]
        else:
                return [(e + params['suffix'], 0)]

def fun_reduce(iter, out, params):
        for k, v in iter:
                out.add(k + "-", v)

tserver.run_server(data_gen)
disco = Disco(sys.argv[1])

results = disco.new_job(name = "test_chain_0", input = tserver.makeurl([""] * 100),
                map = fun_map, reduce = fun_reduce, nr_reduces = 4,
                sort = False, params = {'suffix': '0'}).wait()

i = 1
while i < 10:
        nresults = disco.new_job(name = "test_chain_%d" % i, input = results,
                map = fun_map, reduce = fun_reduce, nr_reduces = 4,
                map_reader = chain_reader, sort = False,
                params = {'suffix': str(i)}).wait()

        disco.purge(jobname(results[0]))
        results = nresults
        i += 1

for key, value in result_iterator(results):
示例#8
0
        return "test_%s\n" % path[1:]

def fun_reduce(iter, out, params):
        for k, v in iter:
                out.add("red_" + k, "red_" + v)
        
tserver.run_server(data_gen)

inputs = ["ape", "cat", "dog"]
params = {"test1": "1,2,3",\
          "one two three": "dim\ndam\n",\
          "dummy": "value"}

job = Disco(sys.argv[1]).new_job(
            name = "test_external",
            input = tserver.makeurl(inputs),
            map = external(["ext_test"]), 
            reduce = fun_reduce, 
            ext_params = params,
            nr_reduces = 1,
            sort = False)

results = sorted([(v, k) for k, v in result_iterator(job.wait())])
for i, e in enumerate(results): 
        v, k = e
        if k != "red_dkey" or v != "red_test_%s" % inputs[i / 3]:
                raise Exception("Invalid answer: %s, %s" % (k, v))

if len(results) != 9:
        raise Exception("Wrong number of results: %u vs. 9" % len(results))
示例#9
0
def fun_map(e, params):
        return [(w, 1) for w in re.sub("\W", " ", e).lower().split()]

def fun_reduce(iter, out, params):
        s = {}
        for k, v in iter:
                if k in s:
                        s[k] += int(v)
                else:
                        s[k] = int(v)
        for k, v in s.iteritems():
                out.add(k, v)

tserver.run_server(data_gen)
job = Disco(sys.argv[1]).new_job(name="test_50k",
                        input=tserver.makeurl([""] * int(5e4)),
                        map=fun_map,
                        reduce=fun_reduce,
                        nr_reduces=300,
                        sort=False)

ANS = {"gutta": int(5e6), "cavat": int(1e7), "capidem": int(5e6)}
i = 0
for key, value in result_iterator(job.wait()):
        i += 1
        if ANS[key] == int(value):
                print "Correct: %s %s" % (key, value)
        else:
                raise "Results don't match"
if i != 3:
        raise "Wrong number of results: Got %d expected 3" % i
示例#10
0
        return [(e[0] + params["suffix"], int(e[1]) + 1)]
    else:
        return [(e + params["suffix"], 0)]


def fun_reduce(iter, out, params):
    for k, v in iter:
        out.add(k + "-", v)


tserver.run_server(data_gen)
disco = Disco(sys.argv[1])

results = disco.new_job(
    name="test_chain_0",
    input=tserver.makeurl([""] * 100),
    map=fun_map,
    reduce=fun_reduce,
    nr_reduces=4,
    sort=False,
    clean=True,
    params={"suffix": "0"},
).wait()

i = 1
while i < 10:
    nresults = disco.new_job(
        name="test_chain_%d" % i,
        input=results,
        map=fun_map,
        reduce=fun_reduce,
示例#11
0
from disco import Disco

def data_gen(path):
        return "1 2 3\n"

def fun_map(e, params):
        import time
        time.sleep(100)
        return []

disco = Disco(sys.argv[1])
num = sum(x['max_workers'] for x in disco.nodeinfo()['available'])
print >> sys.stderr, num, "slots available"
tserver.run_server(data_gen)
job = disco.new_job(name = "test_kill",
        input = tserver.makeurl([""] * num * 2), map = fun_map)

time.sleep(10)
print >> sys.stderr, "Killing", job.name
job.kill()
time.sleep(5)
if job.jobinfo()['active'] == "dead":
        print "ok"
        job.purge()
else:
        raise Exception("Killing failed")




示例#12
0
fail = ["1", "2", "3"]

def data_gen(path):
        lock.acquire()
        e = path[1:]
        if e in fail:
                fail.remove(e)
                lock.release()
                raise tserver.FailedReply()
        else:
                lock.release()
                return str(int(e) * 10) + "\n"

def fun_map(e, params):
        return [(int(e) * 10, "")]

tserver.run_server(data_gen)

job = Disco(sys.argv[1]).new_job(
        name = "test_tempfail",
        input = tserver.makeurl(map(str, range(10))),
        map = fun_map)

res = sum(int(x) for x, y in result_iterator(job.wait()))
if res != 4500:
        raise Exception("Invalid result: Got %d, expected 4500" % res)

job.purge()
print "ok"

示例#13
0
文件: test_simple.py 项目: rca/disco

def fun_map(e, params):
    return [("=" + e, e)]


def fun_reduce(iter, out, params):
    s = 1
    for k, v in iter:
        if k != "=" + v:
            raise Exception("Corrupted key")
        s *= int(v)
    out.add("result", s)


tserver.run_server(data_gen)

inputs = [3, 5, 7, 11, 13, 17, 19, 23, 29, 31]
results = disco.job(
    sys.argv[1], "test_simple", tserver.makeurl(inputs), fun_map, reduce=fun_reduce, nr_reduces=1, sort=False
)

if list(disco.result_iterator(results)) != [("result", ANS)]:
    raise Exception("Invalid answer")

print results

disco.Disco(sys.argv[1]).purge(disco.util.jobname(results[0]))

print "ok"
示例#14
0
        if x > 10:
                return 1
        else:
                return 0

def data_gen(path):
        return "\n".join([path[1:]] * 10)

def fun_map(e, params):
        return [(e, params.f1(int(e), params.x))]

def fun_reduce(iter, out, params):
        for k, v in iter:
                out.add(k, params.f2(int(v)))

tserver.run_server(data_gen)

inputs = range(10)
results = disco.job(sys.argv[1], "test_params", tserver.makeurl(inputs),
                fun_map, 
                params = disco.Params(x = 5, f1 = fun1, f2 = fun2),
		reduce = fun_reduce, 
		nr_reduces = 1,
		sort = False)

for x, y in disco.result_iterator(results):
        if fun2(int(x) + 5) != int(y):
                raise "Invalid result: %s and %s" % (x, y)

print "ok"
示例#15
0
                job.purge()
        else:
                raise Exception("Rate limit failed")

def data_gen(path):
        return "badger\n" * 1000000

def fun_map(e, params):
        msg(e)
        return []

def fun_map2(e, params):
        return []

tserver.run_server(data_gen)
inputs = tserver.makeurl([1])
job = Disco(sys.argv[1]).new_job(name = "test_ratelimit",
        input = inputs, map = fun_map)

time.sleep(5)
check_dead(job)

job = Disco(sys.argv[1]).new_job(name = "test_ratelimit2",
        input = inputs, map = fun_map2, status_interval = 1)

time.sleep(5)
check_dead(job)

job = Disco(sys.argv[1]).new_job(name = "test_ratelimit3",
        input = inputs, map = fun_map2, status_interval = 0)
job.wait()
示例#16
0
def fun_reduce(iter, out, params):
        s = {}
        for k, v in iter:
                if k in s:
                        s[k] += int(v)
                else:
                        s[k] = int(v)
        for k, v in s.iteritems():
                out.add(k, v)

tserver.run_server(data_gen)

job = Disco(sys.argv[1]).new_job(\
        name = "test_profile",\
        input = tserver.makeurl([""] * int(100)),\
        map = really_unique_function_name,\
        reduce = fun_reduce,\
        nr_reduces = 30,\
        sort = False,\
        profile = True)

ANS = {"gutta": int(1e4), "cavat": int(2e4), "capidem": int(1e4)}
i = 0
for key, value in result_iterator(job.wait()):
        i += 1
        if ANS[key] == int(value):
                print "Correct: %s %s" % (key, value)
        else:
                raise "Results don't match (%s): Got %d expected %d" %\
                        (key, int(value), ANS[key])
示例#17
0
def fun_map(e, params):
        import time, random
        time.sleep(random.randint(1, 3))
        return [(e, 0)]

def fun_reduce(iter, out, params):
        for k, v in iter:
                out.add("[%s]" % k, v)

tserver.run_server(data_gen)

disco = Disco(sys.argv[1])
num = sum(x['max_workers'] for x in disco.nodeinfo()['available'])
print >> sys.stderr, num, "slots available"
inputs = tserver.makeurl(range(num * 10))
random.shuffle(inputs)

jobs = []
for i in range(5):
        jobs.append(disco.new_job(name = "test_async_%d" % i,
                       input = inputs[i * (num * 2):(i + 1) * (num * 2)],
                       map = fun_map, reduce = fun_reduce, nr_reduces = 11,
                       sort = False))
        time.sleep(1)

all = dict(("[%s]" % i, 0) for i in range(num * 10))
for job in jobs:
        results = job.wait()
        print "Job", job, "done"
        for k, v in result_iterator(results):
示例#18
0
def data_gen(path):
    return "\n".join([path[1:]] * 10)


def fun_map(e, params):
    return [("=" + e, e)]


def fun_reduce(iter, out, params):
    s = 1
    for k, v in iter:
        if k != "=" + v:
            raise Exception("Corrupted key")
        s *= int(v)
    out.add("result", s)


tserver.run_server(data_gen)

inputs = [3, 5, 7, 11, 13, 17, 19, 23, 29, 31]
job = Disco(sys.argv[1]).new_job(
    name="test_simple", input=tserver.makeurl(inputs), map=fun_map, reduce=fun_reduce, nr_reduces=1, sort=False
)

if list(result_iterator(job.wait())) != [("result", ANS)]:
    raise Exception("Invalid answer")

job.purge()
print "ok"
示例#19
0
inputs = []
for i in range(N):
        a = [i] * 10
        b = range(i, i + 10)
        inputs += ["%d:%d" % x for x in zip(a, b)]
        results[str(i)] = str(sum(b))

random.shuffle(inputs)

disco = Disco(sys.argv[1])

print "Running two map jobs.."

map1 = disco.new_job(\
                name = "test_onlyreduce1",
                input = tserver.makeurl(inputs[:len(inputs) / 2]),
                map = fun_map,
                partition = fun_partition,
                nr_reduces = N)

map2 = disco.new_job(\
                name = "test_onlyreduce2",
                input = tserver.makeurl(inputs[len(inputs) / 2:]),
                map = fun_map,
                partition = fun_partition,
                nr_reduces = N)

results1 = map1.wait()
print "map1 done"
results2 = map2.wait()
print "map2 done"