def test_put_pins_object(ray_start_object_store_memory): obj = np.ones(200 * 1024, dtype=np.uint8) x_id = ray.put(obj) x_binary = x_id.binary() assert (ray.get(ray.ObjectRef(x_binary)) == obj).all() # x cannot be evicted since x_id pins it for _ in range(10): ray.put(np.zeros(10 * 1024 * 1024)) assert (ray.get(x_id) == obj).all() assert (ray.get(ray.ObjectRef(x_binary)) == obj).all() # now it can be evicted since x_id pins it but x_binary does not del x_id for _ in range(10): ray.put(np.zeros(10 * 1024 * 1024)) assert not ray.worker.global_worker.core_worker.object_exists( ray.ObjectRef(x_binary)) # weakref put y_id = ray.put(obj, weakref=True) for _ in range(10): ray.put(np.zeros(10 * 1024 * 1024)) with pytest.raises(ray.exceptions.UnreconstructableError): ray.get(y_id)
def __init__(self, *, object_ref: dict, node_address: str, is_driver: bool, pid: int): # worker info self.is_driver = is_driver self.pid = pid self.node_address = node_address # object info self.task_status = object_ref.get("taskStatus", "?") if self.task_status == "NIL": self.task_status = "-" self.attempt_number = int(object_ref.get("attemptNumber", 0)) if self.attempt_number > 0: self.task_status = f"Attempt #{self.attempt_number + 1}: {self.task_status}" self.object_size = int(object_ref.get("objectSize", -1)) self.call_site = object_ref.get("callSite", "<Unknown>") if len(self.call_site) == 0: self.call_site = "disabled" self.object_ref = ray.ObjectRef( decode_object_ref_if_needed(object_ref["objectId"])) # reference info self.local_ref_count = int(object_ref.get("localRefCount", 0)) self.pinned_in_memory = bool(object_ref.get("pinnedInMemory", False)) self.submitted_task_ref_count = int( object_ref.get("submittedTaskRefCount", 0)) self.contained_in_owned = [ ray.ObjectRef(decode_object_ref_if_needed(object_ref)) for object_ref in object_ref.get("containedInOwned", []) ] self.reference_type = self._get_reference_type()
def object_table(self, object_ref=None): """Fetch and parse the object table info for one or more object refs. Args: object_ref: An object ref to fetch information about. If this is None, then the entire object table is fetched. Returns: Information from the object table. """ self._check_connected() if object_ref is not None: object_ref = ray.ObjectRef(hex_to_binary(object_ref)) object_info = self.global_state_accessor.get_object_info( object_ref) if object_info is None: return {} else: object_location_info = gcs_utils.ObjectLocationInfo.FromString( object_info) return self._gen_object_info(object_location_info) else: object_table = self.global_state_accessor.get_object_table() results = {} for i in range(len(object_table)): object_location_info = gcs_utils.ObjectLocationInfo.FromString( object_table[i]) results[binary_to_hex(object_location_info.object_id)] = \ self._gen_object_info(object_location_info) return results
def put_unpinned_object(obj): value = ray.worker.global_worker.get_serialization_context().serialize(obj) return ray.ObjectRef( ray.worker.global_worker.core_worker.put_serialized_object( value, pin_object=False ) )
def _save_spark_df_to_object_store(df: sql.DataFrame, num_shards: int) -> List["RecordBatchShard"]: # call java function from python jvm = df.sql_ctx.sparkSession.sparkContext._jvm jdf = df._jdf object_store_writer = jvm.org.apache.spark.sql.raydp.ObjectStoreWriter(jdf) records = object_store_writer.save() worker = ray.worker.global_worker blocks: List[ray.ObjectRef] = [] block_sizes: List[int] = [] for record in records: owner_address = record.ownerAddress() object_ref = ray.ObjectRef(record.objectId()) num_records = record.numRecords() # Register the ownership of the ObjectRef worker.core_worker.deserialize_and_register_object_ref( object_ref.binary(), ray.ObjectRef.nil(), owner_address) blocks.append(object_ref) block_sizes.append(num_records) divided_blocks = divide_blocks(block_sizes, num_shards) record_batch_set: List[RecordBatchShard] = [] for i in range(num_shards): indexes = divided_blocks[i] object_ids = [blocks[index] for index in indexes] record_batch_set.append(RecordBatchShard(i, object_ids)) return record_batch_set
def test_object_ref_properties(): id_bytes = b"0011223344556677889900001111" object_ref = ray.ObjectRef(id_bytes) assert object_ref.binary() == id_bytes object_ref = ray.ObjectRef.nil() assert object_ref.is_nil() with pytest.raises(ValueError, match=r".*needs to have length.*"): ray.ObjectRef(id_bytes + b"1234") with pytest.raises(ValueError, match=r".*needs to have length.*"): ray.ObjectRef(b"0123456789") object_ref = ray.ObjectRef.from_random() assert not object_ref.is_nil() assert object_ref.binary() != id_bytes id_dumps = pickle.dumps(object_ref) id_from_dumps = pickle.loads(id_dumps) assert id_from_dumps == object_ref
def dump_args_proto(arg): if arg.local == ray_client_pb2.Arg.Locality.INTERNED: return cloudpickle.loads(arg.data) else: # TODO(barakmich): This is a dirty hack that assumes the # server maintains a reference to the ID we've been given ref = ray.ObjectRef(arg.reference_id) return ray.get(ref)
def __init__(self, channel_id_str: str): """ Args: channel_id_str: string representation of channel id """ self.channel_id_str = channel_id_str self.object_qid = ray.ObjectRef( channel_id_str_to_bytes(channel_id_str))
def test_put_pins_object(ray_start_object_store_memory): obj = np.ones(200 * 1024, dtype=np.uint8) x_id = ray.put(obj) x_binary = x_id.binary() assert (ray.get(ray.ObjectRef(x_binary)) == obj).all() # x cannot be evicted since x_id pins it for _ in range(10): ray.put(np.zeros(10 * 1024 * 1024)) assert (ray.get(x_id) == obj).all() assert (ray.get(ray.ObjectRef(x_binary)) == obj).all() # now it can be evicted since x_id pins it but x_binary does not del x_id for _ in range(10): ray.put(np.zeros(10 * 1024 * 1024)) assert not ray.worker.global_worker.core_worker.object_exists( ray.ObjectRef(x_binary))
def _convert_args(arg_list): out = [] for arg in arg_list: t = convert_from_arg(arg) if isinstance(t, ClientObjectRef): out.append(ray.ObjectRef(t.id)) else: out.append(t) return out
def restore_spilled_objects(self, keys): for k in keys: filename = k.decode() ref = ray.ObjectRef(bytes.fromhex(filename[len(self.prefix):])) with open(os.path.join(self.directory_path, filename), "rb") as f: metadata_len = int.from_bytes(f.read(8), byteorder="little") buf_len = int.from_bytes(f.read(8), byteorder="little") metadata = f.read(metadata_len) # read remaining data to our buffer self._put_object_to_store(metadata, buf_len, f, ref)
def _convert_args(arg_list, prepared_args=None): if prepared_args is not None: return prepared_args out = [] for arg in arg_list: t = convert_from_arg(arg) if isinstance(t, ClientObjectRef): out.append(ray.ObjectRef(t.id)) else: out.append(t) return out
def __init__(self, *, object_ref: dict, node_address: str, is_driver: bool, pid: int): # worker info self.is_driver = is_driver self.pid = pid self.node_address = node_address # object info self.object_size = int(object_ref.get("objectSize", -1)) self.call_site = object_ref.get("callSite", "<Unknown>") self.object_ref = ray.ObjectRef( decode_object_ref_if_needed(object_ref["objectId"])) # reference info self.local_ref_count = int(object_ref.get("localRefCount", 0)) self.pinned_in_memory = bool(object_ref.get("pinnedInMemory", False)) self.submitted_task_ref_count = int( object_ref.get("submittedTaskRefCount", 0)) self.contained_in_owned = [ ray.ObjectRef(decode_object_ref_if_needed(object_ref)) for object_ref in object_ref.get("containedInOwned", []) ] self.reference_type = self._get_reference_type()
def _fill_object_store_and_get(obj, succeed=True, object_MiB=20, num_objects=5): for _ in range(num_objects): ray.put(np.zeros(object_MiB * 1024 * 1024, dtype=np.uint8)) if type(obj) is bytes: obj = ray.ObjectRef(obj) if succeed: wait_for_condition( lambda: ray.worker.global_worker.core_worker.object_exists(obj)) else: wait_for_condition( lambda: not ray.worker.global_worker.core_worker.object_exists(obj) )
def _save_spark_df_to_object_store(df: sql.DataFrame): # call java function from python jvm = df.sql_ctx.sparkSession.sparkContext._jvm jdf = df._jdf object_store_writer = jvm.org.apache.spark.sql.raydp.ObjectStoreWriter(jdf) records = object_store_writer.save() worker = ray.worker.global_worker blocks: List[ray.ObjectRef] = [] block_sizes: List[int] = [] for record in records: owner_address = record.ownerAddress() object_ref = ray.ObjectRef(record.objectId()) num_records = record.numRecords() # Register the ownership of the ObjectRef worker.core_worker.deserialize_and_register_object_ref( object_ref.binary(), ray.ObjectRef.nil(), owner_address) blocks.append(object_ref) block_sizes.append(num_records) return blocks, block_sizes
def test_valid_reference_memory_entry(): memory_entry = build_local_reference_entry() assert memory_entry.reference_type == ReferenceType.LOCAL_REFERENCE assert memory_entry.object_ref == ray.ObjectRef( decode_object_ref_if_needed(OBJECT_ID)) assert memory_entry.is_valid() is True
def ref_not_exists(): worker = ray.worker.global_worker inner_oid = ray.ObjectRef(inner_oid_binary) return not worker.core_worker.object_exists(inner_oid)
def restore_spilled_objects(self, keys): for k in keys: key = k.decode() ref = ray.ObjectRef(bytes.fromhex(key[len(self.prefix):])) self._restore_spilled_object(key, ref)
def binary_to_object_ref(binary_object_ref): return ray.ObjectRef(binary_object_ref)