Пример #1
0
def init_ray_context_fixture():
    from zoo import init_spark_on_local
    from zoo.ray import RayContext
    sc = init_spark_on_local(cores=4, spark_log_level="INFO")
    ray_ctx = RayContext(sc=sc, object_store_memory="1g")
    ray_ctx.init()
    yield
    ray_ctx.stop()
    sc.stop()
Пример #2
0
def rayonspark_fixture():
    from zoo import init_spark_on_local
    from zoo.ray import RayContext
    global sc
    global ray_ctx
    sc = init_spark_on_local(cores=8, spark_log_level="INFO")
    ray_ctx = RayContext(sc=sc, object_store_memory="1g")
    ray_ctx.init()
    yield
    ray_ctx.stop()
    sc.stop()
Пример #3
0
    def test_local(self):
        @ray.remote
        class TestRay:
            def hostname(self):
                import socket
                return socket.gethostname()

        sc = init_spark_on_local(cores=4)
        ray_ctx = RayContext(sc=sc, object_store_memory="1g")
        ray_ctx.init()
        actors = [TestRay.remote() for i in range(0, 4)]
        print(ray.get([actor.hostname.remote() for actor in actors]))
        ray_ctx.stop()
        sc.stop()
Пример #4
0
def orca_data_fixture():
    from zoo import init_spark_on_local
    from zoo.ray import RayContext
    global ray_ctx
    sc = init_spark_on_local(cores=4, spark_log_level="INFO")
    access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
    secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
    ray_ctx = RayContext(sc=sc,
                         object_store_memory="1g",
                         env={
                             "AWS_ACCESS_KEY_ID": access_key_id,
                             "AWS_SECRET_ACCESS_KEY": secret_access_key
                         })
    ray_ctx = RayContext(sc=sc, object_store_memory="1g")
    ray_ctx.init()
    yield
    ray_ctx.stop()
    sc.stop()
Пример #5
0
def orca_data_fixture():
    from zoo import init_spark_on_local
    from zoo.ray import RayContext
    ZooContext._orca_eager_mode = True
    sc = init_spark_on_local(cores=4, spark_log_level="INFO")
    access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
    secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
    if access_key_id is not None and secret_access_key is not None:
        ray_ctx = RayContext(sc=sc,
                             object_store_memory="1g",
                             env={
                                 "AWS_ACCESS_KEY_ID": access_key_id,
                                 "AWS_SECRET_ACCESS_KEY": secret_access_key
                             })
    else:
        ray_ctx = RayContext(sc=sc, object_store_memory="1g")
    ray_ctx.init()
    yield
    ray_ctx.stop()
    sc.stop()
Пример #6
0
        format('json').mode("overwrite").save("/tmp/ray-pandas-example")

    # init ray context
    ray_ctx = RayContext(sc=sc, object_store_memory="5g")
    ray_ctx.init(object_store_memory="5g")

    # read data
    data_shard = zoo.xshard.pandas.read_json("/tmp/ray-pandas-example",
                                             ray_ctx)

    # collect data
    data = data_shard.collect()
    print("collected data :")
    print(data[0].head())

    # repartition
    partitions = data_shard.get_partitions()
    print("get %d partitions" % len(partitions))
    data_shard.repartition(2)
    new_partitions = data_shard.get_partitions()
    print("get %d partitions after repartition" % len(new_partitions))

    # apply function on each element
    data_shards = data_shard.apply(process_feature, 6, 24)
    data2 = data_shards.collect()
    print("collected new data :")
    print(data2[0].head())

    ray_ctx.stop()
    sc.stop()