Пример #1
0
def clear_bucket(endpoint, bucket_name):
    objects = hlist_keys(endpoint, bucket_name)
    while objects != None:
        if len(objects) > 1:
            print("delete files {} in bucket {}".format(objects, bucket_name))
            hdelete_keys(endpoint, bucket_name, objects)
        objects = hlist_keys(endpoint, bucket_name)
    return True
Пример #2
0
def handler(event, context):
    from archived.elasticache import hlist_keys
    location = "test.fifamc.ng.0001.euc1.cache.amazonaws.com"
    endpoint = redis.Redis(host=location, port=6379, db=0)
    heyhey = hlist_keys(endpoint, "tmp-updates")
    print(heyhey)
    hdelete_keys(endpoint, "tmp-updates", he)
    print(hlist_keys(endpoint, "tmp-updates"))
Пример #3
0
def merge_w_b_layers(endpoint, bucket_name, num_workers, prefix):
    #whatever merges w/b grads or model
    num_files = 0
    merged_value = []

    while num_files < num_workers:
        objects = hlist_keys(endpoint, bucket_name)
        if objects is not None:
            for obj in objects:
                file_key = bytes.decode(obj)
                data_bytes = hget_object(endpoint, bucket_name, file_key)
                data = pickle.loads(data_bytes)

                for i in range(len(data)):
                    if num_files == 0:
                        merged_value.append(
                            np.zeros(data[i].shape, dtype=data[i].dtype))

                    merged_value[i] = merged_value[i] + data[i]

                num_files = num_files + 1
                hdelete_keys(endpoint, bucket_name, [file_key])

    # average weights
    if prefix == 'w_':
        merged_value = [value / float(num_workers) for value in merged_value]

    return merged_value
Пример #4
0
def merge_w_b_grads(endpoint,
                    bucket_name,
                    num_workers,
                    dtype,
                    w_shape,
                    b_shape,
                    w_grad_prefix="w_grad_",
                    b_grad_prefix="b_grad_"):
    num_w_files = 0
    num_b_files = 0
    w_grad_sum = np.zeros(w_shape, dtype=dtype)
    b_grad_sum = np.zeros(b_shape, dtype=dtype)

    while num_w_files < num_workers or num_b_files < num_workers:

        objects = hlist_keys(endpoint, bucket_name)
        while objects is not None:
            for obj in objects:
                file_key = bytes.decode(obj)
                print("the name of the file being processed = {}".format(
                    file_key))
                bytes_data = np.fromstring(
                    hget_object(endpoint, bucket_name, file_key), dtype)
                if file_key.startswith(w_grad_prefix):
                    w_grad = bytes_data.reshape(w_shape)
                    #print("merge the {}-th weight grad {} in bucket {} = {}".format(num_w_files, file_key, bucket_name, w_grad[0][:5]))
                    w_grad_sum = w_grad_sum + w_grad
                    num_w_files = num_w_files + 1
                elif file_key.startswith(b_grad_prefix):
                    b_grad = bytes_data.reshape(b_shape)
                    #print("merge the {}-th bias grad {} in bucket {} = {}".format(num_b_files, file_key, bucket_name, b_grad))
                    b_grad_sum = b_grad_sum + b_grad
                    num_b_files = num_b_files + 1

                hdelete_keys(endpoint, bucket_name, [file_key])
            objects = hlist_keys(endpoint, bucket_name)
            #print("the keys being deleted = {}".format(objects))

    return w_grad_sum / num_workers, b_grad_sum / num_workers
Пример #5
0
def delete_expired_w_b_grads(endpoint,
                             bucket_name,
                             cur_epoch,
                             cur_batch,
                             w_prefix="w_grad_",
                             b_prefix="b_grad_"):
    objects = hlist_keys(endpoint, bucket_name)
    if objects is not None:
        for obj in objects:
            file_key = bytes.decode(obj)
            if file_key.startswith(w_prefix) or file_key.startswith(b_prefix):
                key_splits = file_key.split("_")
                key_batch = int(key_splits[-1])
                key_epoch = int(key_splits[-2])
                if key_epoch < cur_epoch or (key_epoch == cur_epoch
                                             and key_batch < cur_batch):
                    print("delete object {} in bucket {}".format(
                        file_key, bucket_name))
                    hdelete_keys(endpoint, bucket_name, [file_key])
Пример #6
0
def compute_average_centroids(endpoint, avg_cent_bucket, worker_cent_bucket,
                              num_workers, shape, epoch, dt):
    num_files = 0
    centroids_vec_list = []
    error_list = []
    while num_files < num_workers:
        num_files = 0
        centroids_vec_list = []
        error_list = []
        objects = hlist_keys(endpoint, worker_cent_bucket)
        if objects is not None:
            for obj in objects:
                file_key = bytes.decode(obj)
                cent_with_error = np.frombuffer(hget_object(
                    endpoint, worker_cent_bucket, file_key),
                                                dtype=dt)
                cent = cent_with_error[0:-1].reshape(shape)
                error = cent_with_error[-1]
                centroids_vec_list.append(cent)
                error_list.append(error)
                num_files = num_files + 1
        else:
            print(f"no object in the {worker_cent_bucket}")

    print("All workers are ready.")
    avg = avg_centroids(centroids_vec_list)
    avg_error = np.mean(np.array(error_list))
    clear_bucket(endpoint, worker_cent_bucket)

    print(
        f"Write averaged centroids {avg} for {epoch}-th epoch to bucket {avg_cent_bucket}"
    )
    print(f"Average error: {avg_error}")
    res = avg.reshape(-1)
    res = np.append(res, avg_error)
    hset_object(endpoint, avg_cent_bucket, f"avg-{epoch}", res.tobytes())
    return 1