Exemplo n.º 1
0
            def read_work(reader_key):
                client_id = reader_key['client_id']
                reduceId = rounds * taskId + reader_key['roundIdx']
                key_per_client = reader_key['key-per-client']
                key_per_client = int(key_per_client)
                client_id = int(client_id)
                objs = []
                for mapId in range(key_per_client * client_id, min(key_per_client * (client_id + 1), numPartitions)):
                    # for mapId in range(1):
                    keyname = "shuffle-part-" + str(mapId) + "-" + str(reduceId)
                    m = hashlib.md5()
                    m.update(keyname.encode('utf-8'))
                    randomized_keyname = "shuffle-" + m.hexdigest()[:8] + "-part-" + str(mapId) + "-" + str(reduceId)
                    print("The name of the key to read is: " + randomized_keyname)
                    try:
                        datasize = 17000000
                        textback = " "*datasize
                        pocket.get_buffer(pocket_namenode, randomized_keyname, textback, datasize, jobid)
                        print("Successfully read")
                        #pos = textback.find('.')
                        #print("Padding position: " + str(pos))
                        original_text = b64decode(textback.encode('utf-8'))
                        print("last ten bytes after padding: " + textback[-10:])

                        objs.append(original_text)
                    except Exception:
                        print("reading error key " + randomized_keyname)
                        raise

                data = [np.fromstring(obj, dtype=recordType) for obj in objs]
                [d.sort(order='key') for d in data]
                inputs.extend(data)
Exemplo n.º 2
0
def pocket_read_buffer(p, jobid, iter, text_back_tmp, size):
    text_back = " " * size
    for i in range(iter):
        dst_filename = 'tmp1' + '-' + str(i)
        r = pocket.get_buffer(p, dst_filename, text_back, size, jobid)
        if r != 0:
            raise Exception("get buffer failed: " + dst_filename)
Exemplo n.º 3
0
def pocket_read_buffer(p, jobid, iter, text_back, size, id):
    for i in xrange(iter):
        dst_filename = '/tmp'+str(id)+'-'+str(i)
        r = pocket.get_buffer(p, dst_filename, text_back, size, jobid)
        if r != 0:
            raise Exception("get buffer failed: "+ dst_filename)