def parallel_cast(input, name=None, distribute=None, gradient_distribute=None): assert not oneflow.eager_execution_enabled() op_conf = op_conf_util.OperatorConf() setattr( op_conf, "name", name if name is not None else id_util.UniqueStr("ParallelCast_"), ) op_conf.parallel_cast_conf.out = "out" setattr(op_conf.parallel_cast_conf, "in", input.unique_name) def to_split_axis(dist): split_axis = data_type_util.OptInt64() if type(dist) is oneflow_api.distribute.SplitDistribute: split_axis.value = dist.axis elif type(dist) is oneflow_api.distribute.BroadcastDistribute: split_axis.ClearField("value") else: raise NotImplementedError return split_axis if distribute is not None: op_conf.parallel_cast_conf.split_axis.CopyFrom( to_split_axis(distribute)) if gradient_distribute is not None: op_conf.parallel_cast_conf.gradient_split_axis.CopyFrom( to_split_axis(gradient_distribute)) compile_context.CurJobAddOp(op_conf) lbi = logical_blob_id_util.LogicalBlobId() lbi.op_name = op_conf.name lbi.blob_name = "out" return remote_blob_util.RemoteBlob(lbi)
def unpack(input, unpack_num, name=None): assert not oneflow.eager_execution_enabled() return (oneflow.user_op_builder( name if name is not None else id_util.UniqueStr("Unpack_")).Op( "unpack").Input("in", [input]).Output("out").Attr( "unpack_num", unpack_num).Build().InferAndTryRun().RemoteBlobList()[0])
def acc(one, max_acc_num, name=None): assert not oneflow.eager_execution_enabled() return (oneflow.user_op_builder( name if name is not None else id_util.UniqueStr("Acc_")).Op( "acc").Input("in", [one]).Output("out").Attr( "max_acc_num", max_acc_num).Build().InferAndTryRun().RemoteBlobList()[0])
def global_function_or_identity(*args, **kwargs): if rt_mode.CurrentMode() == rt_mode.NORMAL_MODE: assert flow.eager_execution_enabled() return flow.global_function(*args, **kwargs) else: assert rt_mode.CurrentMode() == rt_mode.GLOBAL_MODE identity_decorator = lambda func: func return identity_decorator
def test_eager_assign_121(test_case): if not flow.eager_execution_enabled(): return arg_dict = OrderedDict() arg_dict["shape"] = [(10), (30, 4), (8, 256, 20)] arg_dict["dtype"] = [flow.float, flow.double] arg_dict["device_type"] = ["cpu"] arg_dict["assign"] = [flow.experimental.eager_assign_121] for arg in GenArgDict(arg_dict): _compare_with_np(test_case, **arg)
def test_sigmoid_cross_entropy_with_logits(test_case): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu", "cpu"] arg_dict["data_type"] = ["double", "float32"] arg_dict["shape"] = [(64, 1000), (5, 5, 1000)] for arg in GenArgList(arg_dict): compare_with_tensorflow(*arg)
def AddInfo4InterfaceOpName(self, interface_op_name, op_attribute): if oneflow.eager_execution_enabled(): self.interface_op_name2op_attr_[interface_op_name] = op_attribute self.interface_op_name2job_name_[ interface_op_name] = c_api_util.JobBuildAndInferCtx_GetCurrentJobName( ) else: # In lazy mode, we update fields with # the latest info in another function after compiler.Compile pass
def test_add_ssp_variable_proxy(test_case): if flow.eager_execution_enabled(): return device_name = "0:0" flow.config.enable_debug_mode(True) flow.config.cpu_device_num(2) buffer_size = 4 function_config = flow.FunctionConfig() function_config.enable_ssp(True) @flow.global_function(type="train", function_config=function_config) def Foo() -> tp.Numpy: with flow.scope.placement( "cpu", device_name), flow.experimental.scope.config( ssp_num_stages=buffer_size, ssp_stage_id=0): w = flow.get_variable( "w", shape=(10, ), dtype=flow.float, initializer=flow.constant_initializer(0), ) loss = w + flow.constant_like(w, value=0.0, dtype=flow.float) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [-10.0]), momentum=0).minimize(loss) return loss checkpoint = flow.train.CheckPoint() checkpoint.init() zeros = np.zeros((10, )).astype(np.float32) ones = np.ones((10, )).astype(np.float32) # the first four results are always initialized with zeros for i in range(buffer_size): x = Foo() test_case.assertTrue(np.allclose(x, zeros)) # ones, because the formula is W_mutable = W_mutable + 1 x = Foo() test_case.assertTrue(np.allclose(x, ones)) # twos, because the formula is W_mutable = W_mutable + 1 x = Foo() test_case.assertTrue(np.allclose(x, ones + ones)) # threes, because the formula is W_mutable = W_mutable + 1 x = Foo() test_case.assertTrue(np.allclose(x, ones + ones + ones)) # fours, because the formula is W_mutable = W_mutable + 1 x = Foo() test_case.assertTrue(np.allclose(x, ones + ones + ones + ones))
def test_unpack_pack(test_case): if flow.eager_execution_enabled(): return @flow.global_function(function_config=func_config) def UnpackPackJob(a: oft.Numpy.Placeholder((3, 4))): return flow.pack(flow.unpack(a, 3), 3) x = np.random.rand(3, 4).astype(np.float32) y = UnpackPackJob(x).get().numpy() test_case.assertTrue(np.array_equal(y, x))
def test_repeat_acc(test_case): if flow.eager_execution_enabled(): return @flow.global_function(function_config=func_config) def RepeatAccJob(a: oft.Numpy.Placeholder((3, 4))): return flow.acc(flow.repeat(a, 3), 3) x = np.random.rand(3, 4).astype(np.float32) y = RepeatAccJob(x).get().numpy() test_case.assertTrue(np.array_equal(y, x * 3))
def test_sparse_softmax_cross_entropy_with_logits(test_case): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu", "cpu"] arg_dict["data_type"] = ["float32", "double"] arg_dict["label_type"] = ["int32", "int64"] arg_dict["num_classes"] = [1000] arg_dict["batch_size"] = [64] for arg in GenArgList(arg_dict): compare_with_tensorflow(*arg)
def test_softmax_axis(test_case): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu", "cpu"] arg_dict["x_shape"] = [(10, 20, 30, 40)] arg_dict["data_type"] = ["float32", "double", "float16"] arg_dict["axis"] = [-4, -3, -2, -1, 0, 1, 2, 3] for arg in GenArgList(arg_dict): if arg[0] == "cpu" and arg[2] == "float16": continue compare_with_tensorflow(*arg)
def unpack(input, unpack_num, name=None): assert not oneflow.eager_execution_enabled() op_conf = op_conf_util.OperatorConf() setattr( op_conf, "name", name if name is not None else id_util.UniqueStr("Unpack_"), ) setattr(op_conf.unpack_conf, "in", input.unique_name) op_conf.unpack_conf.out = "out" op_conf.unpack_conf.unpack_num = unpack_num compile_context.CurJobAddOp(op_conf) lbi = logical_blob_id_util.LogicalBlobId() lbi.op_name = op_conf.name lbi.blob_name = "out" return remote_blob_util.RemoteBlob(lbi)
def test_1d_ring_buffer_Wm_assign_Wm_plus_1(test_case): if flow.eager_execution_enabled(): return device_name = "0:0" flow.config.cpu_device_num(2) buffer_size = 4 @flow.global_function() def Foo() -> tp.Numpy: with flow.scope.placement("cpu", device_name): w = flow.get_variable( "w", shape=(10, ), dtype=flow.float, initializer=flow.constant_initializer(0), ) ones = flow.constant_like(w, value=1.0, dtype=flow.float) ref, value = flow.experimental.ssp_variable_proxy( w, buffer_size=buffer_size) # do no use `w` again because it's delegated by `ref` and `value` # W_mutable = W_mutable + 1 flow.assign(ref, ref + ones) return value checkpoint = flow.train.CheckPoint() checkpoint.init() zeros = np.zeros((10, )).astype(np.float32) ones = np.ones((10, )).astype(np.float32) # the first four results are always initialized with zeros for i in range(buffer_size): x = Foo() test_case.assertTrue(np.allclose(x, zeros)) # ones, because the formula is W_mutable = W_mutable + 1 x = Foo() test_case.assertTrue(np.allclose(x, ones)) # twos, because the formula is W_mutable = W_mutable + 1 x = Foo() test_case.assertTrue(np.allclose(x, ones + ones)) # threes, because the formula is W_mutable = W_mutable + 1 x = Foo() test_case.assertTrue(np.allclose(x, ones + ones + ones)) # fours, because the formula is W_mutable = W_mutable + 1 x = Foo() test_case.assertTrue(np.allclose(x, ones + ones + ones + ones))
def acc(one, max_acc_num, name=None): assert not oneflow.eager_execution_enabled() op_conf = op_conf_util.OperatorConf() setattr( op_conf, "name", name if name is not None else id_util.UniqueStr("Acc_"), ) op_conf.acc_conf.one = one.unique_name op_conf.acc_conf.acc = "acc" op_conf.acc_conf.max_acc_num = max_acc_num compile_context.CurJobAddOp(op_conf) lbi = logical_blob_id_util.LogicalBlobId() lbi.op_name = op_conf.name lbi.blob_name = "acc" return remote_blob_util.RemoteBlob(lbi)
def decode_ofrecord( ofrecord_dir: str, blobs: Sequence[BlobConf], batch_size: int = 1, data_part_num: int = 1, part_name_prefix: str = "part-", part_name_suffix_length: int = -1, shuffle: bool = False, buffer_size: int = 1024, name: str = None, ) -> Tuple[remote_blob_util.BlobDef]: print( "WARNING:", "oneflow.data.decode_ofrecord is deprecated, and NOT work in eager mode, please use: \n", " 1) ofrecord = oneflow.data.ofrecord_reader(...) to read ofrecord; \n", " 2) image = oneflow.data.ofrecord_image_decoder(...) to decode image; \n", " 3) raw = oneflow.data.ofrecord_raw_decoder(...) to decode raw data like label; \n", traceback.format_stack()[-2], ) assert not flow.eager_execution_enabled() if name is None: name = id_util.UniqueStr("Decode_") lbis = [] op_conf = op_conf_util.OperatorConf() op_conf.name = name op_conf.decode_ofrecord_conf.data_dir = ofrecord_dir op_conf.decode_ofrecord_conf.data_part_num = data_part_num op_conf.decode_ofrecord_conf.batch_size = batch_size op_conf.decode_ofrecord_conf.part_name_prefix = part_name_prefix op_conf.decode_ofrecord_conf.part_name_suffix_length = part_name_suffix_length if shuffle == True: op_conf.decode_ofrecord_conf.random_shuffle_conf.buffer_size = buffer_size for blob_conf in blobs: op_conf.decode_ofrecord_conf.blob.extend([blob_conf.to_proto()]) lbi = logical_blob_id_util.LogicalBlobId() lbi.op_name = name lbi.blob_name = blob_conf.name lbis.append(lbi) interpret_util.ConsistentForward(op_conf) return tuple(map(lambda x: remote_blob_util.RemoteBlob(x), lbis))
def test_fused_bias_add(test_case): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu"] arg_dict["x_shape"] = [ (10, 10), (10, 5), (1, 10, 10, 10), (2, 10, 10, 10), ] arg_dict["data_type"] = ["float16", "float32", "double"] arg_dict["data_format"] = ["NCHW"] for arg in GenArgList(arg_dict): if arg[0] == "cpu" and arg[2] == "float16": continue compare_with_not_fused(test_case, *arg)
def sync_dynamic_resize( inputs: remote_blob_util.BlobDef, size: remote_blob_util.BlobDef, name: Optional[str] = None, ) -> remote_blob_util.BlobDef: op_conf = op_conf_util.OperatorConf() setattr( op_conf, "name", name if name is not None else id_util.UniqueStr("SyncDynamicResize_"), ) setattr(op_conf.sync_dynamic_resize_conf, "in", inputs.unique_name) setattr(op_conf.sync_dynamic_resize_conf, "size", size.unique_name) setattr(op_conf.sync_dynamic_resize_conf, "axis", 0) setattr(op_conf.sync_dynamic_resize_conf, "out", "out") setattr(op_conf.sync_dynamic_resize_conf, "eager", flow.eager_execution_enabled()) interpret_util.Forward(op_conf) out_lbi = logical_blob_id_util.LogicalBlobId() setattr(out_lbi, "op_name", op_conf.name) setattr(out_lbi, "blob_name", "out") return remote_blob_util.RemoteBlob(out_lbi)
def test_fused_scale_tril_softmax_dropout(test_case): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu"] arg_dict["x_shape"] = [ (2, 2, 5, 5), (10, 20), (32, 12, 128), (10, 960), ] arg_dict["data_type"] = ["float16", "float32", "double"] arg_dict["diagonal"] = [-1, 0] arg_dict["fill_value"] = [float("-inf"), 0] arg_dict["scale"] = [0.125] arg_dict["rate"] = [0.5] arg_dict["seed"] = [12345] for arg in GenArgList(arg_dict): if arg[0] == "cpu" and arg[2] == "float16": continue compare_with_not_fused(test_case, *arg)
def test_testsource(test_case): func_config = flow.FunctionConfig() func_config.default_data_type(flow.float) func_config.default_logical_view(flow.scope.consistent_view()) @flow.global_function(function_config=func_config) def TestSourceJob(): with flow.scope.placement("cpu", "0:0"): ret = my_test_source("my_cc_test_source_op", 0) return ret y = TestSourceJob().get().numpy() rand_0_4 = np.array( [0.5488136, 0.59284467, 0.7151894, 0.8442659, 0.6027634]) test_case.assertTrue(np.allclose(y, rand_0_4, atol=1e-5, rtol=1e-5)) y = TestSourceJob().get().numpy() if flow.eager_execution_enabled(): rand_5_9 = rand_0_4 else: rand_5_9 = np.array( [0.85794574, 0.54488325, 0.84725183, 0.42365485, 0.62356377]) test_case.assertTrue(np.allclose(y, rand_5_9, atol=1e-5, rtol=1e-5))
def decode_ofrecord( ofrecord_dir: str, blobs: Sequence[BlobConf], batch_size: int = 1, data_part_num: int = 1, part_name_prefix: str = "part-", part_name_suffix_length: int = -1, shuffle: bool = False, buffer_size: int = 1024, name: str = None, ) -> Tuple[oneflow_api.BlobDesc]: print( "WARNING:", "oneflow.data.decode_ofrecord is deprecated, and NOT work in eager mode, please use: \n", " 1) ofrecord = oneflow.data.ofrecord_reader(...) to read ofrecord; \n", " 2) image = oneflow.data.ofrecord_image_decoder(...) to decode image; \n", " 3) raw = oneflow.data.ofrecord_raw_decoder(...) to decode raw data like label; \n", traceback.format_stack()[-2], ) assert not flow.eager_execution_enabled() ofrecord = flow.data.ofrecord_reader( ofrecord_dir=ofrecord_dir, batch_size=batch_size, data_part_num=data_part_num, part_name_prefix=part_name_prefix, part_name_suffix_length=part_name_suffix_length, random_shuffle=shuffle, shuffle_buffer_size=buffer_size, name=name, ) result_blob_list = [] for blob_conf in blobs: result_blob_list.append( blob_conf.decode_blob(input_blob=ofrecord, batch_size=batch_size)) return tuple(result_blob_list)
def test_softmax(test_case): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return arg_dict = OrderedDict() arg_dict["device_type"] = ["gpu", "cpu"] arg_dict["x_shape"] = [ (10, 10, 20, 30), (10, 20, 30), (10, 20), (10, 960), (10, 4096), (10, 8092), (256, 1001), ] arg_dict["data_type"] = ["float32", "double", "float16"] arg_dict["axis"] = [-1, 1, 2, 3] for arg in GenArgList(arg_dict): if arg[0] == "cpu" and arg[2] == "float16": continue if arg[3] >= len(arg[1]): continue compare_with_tensorflow(*arg)
def test_repeat_acc(test_case, device_type, shape, dtype, acc_num): flow.clear_default_session() if flow.eager_execution_enabled(): return @flow.global_function(function_config=func_config) def RepeatAccJob(a: oft.Numpy.Placeholder(shape)): if dtype == "float16": return flow.cast( flow.acc(flow.repeat(flow.cast(a, flow.float16), acc_num), acc_num), flow.float, ) else: return flow.acc(flow.repeat(a, acc_num), acc_num) x = np.random.rand(*shape).astype(np.float32) y = RepeatAccJob(x).get().numpy() z = x * acc_num if dtype == "float16": z = x.astype(np.float16) * acc_num z = z.astype(np.float32) test_case.assertTrue(np.allclose(y, z, rtol=1e-5, atol=1e-5))
def RemoteBlobList(self): remote_blob_list = [] for k in self.op_conf_.user_conf.output: if k not in self.output_arg_key_list_: raise ValueError( "output_arg_name {} of {} op is not set in python op builder" .format(k, self.op_conf_.name)) for output_arg_name in self.output_arg_key_list_: assert output_arg_name in self.op_conf_.user_conf.output for i in range( len(self.op_conf_.user_conf.output[output_arg_name].s)): lbi = logical_blob_id_util.LogicalBlobId() lbi.op_name = self.op_conf_.name lbi.blob_name = "{}_{}".format(output_arg_name, i) remote_blob_obj = self.MakeRemoteBlob(lbi) remote_blob_list.append(remote_blob_obj) if flow.eager_execution_enabled(): gradient_util.GetDefaultBackwardBlobRegister( ).TrySetObject4BlobName(remote_blob_obj.logical_blob_name, remote_blob_obj.blob_object) return tuple(remote_blob_list)
def test_dynamic_dim_gather(test_case): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return _compare_dim_gather_with_samples(test_case, inputshape=(2, 2), indexshape=(2, 2), dim=1, maxshape=(10, 10)) _compare_dim_gather_with_samples(test_case, inputshape=(2, 2), indexshape=(2, 2), dim=0, maxshape=(10, 10)) _compare_dim_gather_with_samples( test_case, inputshape=(4, 4, 3), indexshape=(4, 1, 3), dim=1, maxshape=(10, 10, 10), )
def test(test_case): flow.config.gpu_device_num(2) if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return @flow.global_function() def add() -> tp.Numpy: with flow.scope.placement("gpu", "0:0-1"): x = flow.get_variable( name="x", shape=(2, 3), initializer=flow.random_uniform_initializer(), ) y = flow.get_variable( name="y", shape=(2, 3), initializer=flow.random_uniform_initializer(), ) return flow.math.add_n([x, y]) # NOTE(chengcheng): Should retain for session init before set_interface_blob_value flow.train.CheckPoint().init() x_value = np.random.random((2, 3)).astype(np.float32) y_value = np.random.random((2, 3)).astype(np.float32) flow.experimental.set_interface_blob_value("x", x_value) flow.experimental.set_interface_blob_value("y", y_value) test_case.assertTrue( np.array_equal(x_value, flow.experimental.get_interface_blob_value("x")) ) test_case.assertTrue( np.array_equal(y_value, flow.experimental.get_interface_blob_value("y")) ) test_case.assertTrue(np.array_equal(add(), x_value + y_value))
def test_fp16(self): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return compare_fused_with_no_fused(self, 4, 1024, 12, 64, True)
def test_model_io_case_0(test_case): if flow.eager_execution_enabled(): print("\nSkip under erger mode!") return # _test_model_io(test_case, (10, 5, 7), flow.float32, 1e-2, 10) _test_model_io(test_case, (2, 2), flow.float32, 1e-2, 10)
def get_checkpoint_ready_model(model_getter, dtype): model = model_getter(dtype) if flow.eager_execution_enabled(): model() return model