Exemplo n.º 1
0
 def BuildAssignInstruction(builder):
     op_conf = op_conf_pb.OperatorConf()
     # device_tag doesn't matter for logical_slice_assign op
     device_tag = oneflow.current_scope(
     ).device_parallel_desc_symbol.device_tag
     op_conf.device_tag = device_tag
     op_name = id_util.UniqueStr(OP_PREFIX)
     op_conf.name = op_name
     op_conf.user_conf.op_type_name = "logical_slice_assign"
     op_conf.user_conf.input["value"].s.append("{}/value_0".format(op_name))
     op_conf.user_conf.input["ref"].s.append("{}/ref_0".format(op_name))
     parallel_conf = ref_blob_object.parallel_desc_symbol.parallel_conf
     op_conf.user_conf.attr["parallel_conf"].at_string = str(parallel_conf)
     op_conf.user_conf.attr["start"].at_list_int64.val[:] = start
     op_conf.user_conf.attr["stop"].at_list_int64.val[:] = stop
     op_conf.user_conf.attr["step"].at_list_int64.val[:] = [1] * len(start)
     bn_in_op2blob_object = oneflow_api.deprecated.BnInOp2BlobObject()
     bn_in_op2blob_object["ref_0"] = ref_blob_object
     bn_in_op2blob_object["value_0"] = value_blob_object
     scope_symbol_id = _GetScopeSymbolIdFromEagerBlob(ref_blob)
     op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object,
                                        scope_symbol_id)
     cfg_op_attribute = oneflow_api.deprecated.MakeOpAttributeByString(
         str(op_attribute))
     builder.StatelessCall(
         cfg_op_attribute,
         parallel_conf,
         bn_in_op2blob_object,
         boxing_util.BoxingTo,
         vm_util._FindOrCreateDelegateBlobObject,
     )
Exemplo n.º 2
0
 def build(builder):
     op_conf = op_conf_pb.OperatorConf()
     # device_tag doesn't matter for logical_slice op
     device_tag = oneflow.current_scope(
     ).device_parallel_desc_symbol.device_tag
     op_conf.device_tag = device_tag
     op_conf.name = op_name
     op_conf.user_conf.op_type_name = "logical_slice"
     op_conf.user_conf.input["x"].s.append("{}/x_0".format(op_name))
     op_conf.user_conf.output["y"].s.append("{}/y_0".format(op_name))
     input_blob_object = input_blob.blob_object
     parallel_conf = input_blob_object.parallel_desc_symbol.parallel_conf
     op_conf.user_conf.attr["parallel_conf"].at_string = str(
         parallel_conf)
     op_conf.user_conf.attr["start"].at_list_int64.val[:] = start
     op_conf.user_conf.attr["stop"].at_list_int64.val[:] = stop
     op_conf.user_conf.attr["step"].at_list_int64.val[:] = [
         1
     ] * len(start)
     bn_in_op2blob_object = oneflow_api.deprecated.BnInOp2BlobObject()
     bn_in_op2blob_object["x_0"] = input_blob_object
     scope_symbol_id = _GetScopeSymbolIdFromEagerBlob(input_blob)
     op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object,
                                        scope_symbol_id)
     cfg_op_attribute = oneflow_api.deprecated.MakeOpAttributeByString(
         str(op_attribute))
     builder.StatelessCall(
         cfg_op_attribute,
         parallel_conf,
         bn_in_op2blob_object,
         boxing_util.BoxingTo,
         vm_util._FindOrCreateDelegateBlobObject,
     )
     Yield(bn_in_op2blob_object["y_0"])
Exemplo n.º 3
0
 def BuildModelLoadPathInputInstruction(builder):
     op_attribute = op_infer_util.Infer(path_input_op_conf,
                                        ibn2blob_object={})
     parallel_conf = oneflow.placement.current_scope().default_parallel_conf
     builder.StatelessCall(op_attribute,
                           parallel_conf,
                           bn_in_op2blob_object=path_input_blob_objects)
Exemplo n.º 4
0
def _BuildCopyInstruction(builder, produced_blob_object, op_conf,
                          to_device_tag):
    x_devices = produced_blob_object.parallel_desc_symbol.machine_id2device_id_list
    x_device_tag = produced_blob_object.parallel_desc_symbol.device_tag
    bn_in_op2blob_object = oneflow_api.deprecated.BnInOp2BlobObject()
    bn_in_op2blob_object["in"] = produced_blob_object
    op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object)
    assert to_device_tag != x_device_tag, (to_device_tag, x_device_tag)
    cfg_op_attribute = oneflow_api.deprecated.MakeOpAttributeByString(
        str(op_attribute))
    if to_device_tag == "cpu" and x_device_tag == "gpu":
        x_parallel_conf = produced_blob_object.parallel_desc_symbol.parallel_conf
        builder.NoBoxingCudaD2HStatelessCall(cfg_op_attribute, x_parallel_conf,
                                             bn_in_op2blob_object,
                                             TryReplaceDeviceTag)
    elif to_device_tag == "gpu" and x_device_tag == "cpu":
        out_parallel_desc_symbol = TryReplaceDeviceTag(
            builder, produced_blob_object.parallel_desc_symbol, to_device_tag)
        out_parallel_conf = out_parallel_desc_symbol.parallel_conf
        with _CudaHostPinBlob(builder, produced_blob_object):
            builder.NoBoxingCudaH2DStatelessCall(
                cfg_op_attribute,
                out_parallel_conf,
                bn_in_op2blob_object,
            )
    else:
        raise NotImplementedError(
            "invalid device found. to_device_tag: %s, x_device_tag: %s" %
            (to_device_tag, x_device_tag))
    sbp_parallel = bn_in_op2blob_object[
        "out"].op_arg_parallel_attr.sbp_parallel
    sbp_parallel.CopyFrom(
        produced_blob_object.op_arg_parallel_attr.sbp_parallel)
    return bn_in_op2blob_object["out"]
Exemplo n.º 5
0
 def BuildModelIOPathInputInstruction(builder):
     op_attribute = op_infer_util.Infer(op_conf, ibn2blob_object={})
     parallel_conf = (
         oneflow.current_scope().device_parallel_desc_symbol.parallel_conf)
     builder.StatelessCall(op_attribute,
                           parallel_conf,
                           bn_in_op2blob_object=bn_in_op2blob_object)
Exemplo n.º 6
0
 def build(builder):
     op_conf = op_conf_pb.OperatorConf()
     # device_tag doesn't matter for logical_slice op
     device_tag = oneflow.current_scope(
     ).device_parallel_desc_symbol.device_tag
     op_conf.device_tag = device_tag
     op_conf.name = op_name
     op_conf.user_conf.op_type_name = "logical_slice"
     op_conf.user_conf.input["x"].s.append("{}/x_0".format(op_name))
     op_conf.user_conf.output["y"].s.append("{}/y_0".format(op_name))
     input_blob_object = input_blob.blob_object
     parallel_conf = input_blob_object.parallel_desc_symbol.parallel_conf
     op_conf.user_conf.attr["parallel_conf"].at_string = str(
         parallel_conf)
     op_conf.user_conf.attr["start"].at_list_int64.val[:] = start
     op_conf.user_conf.attr["stop"].at_list_int64.val[:] = stop
     op_conf.user_conf.attr["step"].at_list_int64.val[:] = [
         1
     ] * len(start)
     bn_in_op2blob_object = {"x_0": input_blob_object}
     scope_symbol_id = _GetScopeSymbolIdFromEagerBlob(input_blob)
     op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object,
                                        scope_symbol_id)
     builder.StatelessCall(
         op_attribute,
         parallel_conf=parallel_conf,
         bn_in_op2blob_object=bn_in_op2blob_object,
     )
     Yield(bn_in_op2blob_object["y_0"])
Exemplo n.º 7
0
 def BuildAssignInstruction(builder):
     op_conf = op_conf_pb.OperatorConf()
     # device_tag doesn't matter for logical_slice_assign op
     device_tag = oneflow.current_scope(
     ).device_parallel_desc_symbol.device_tag
     op_conf.device_tag = device_tag
     op_name = id_util.UniqueStr(OP_PREFIX)
     op_conf.name = op_name
     op_conf.user_conf.op_type_name = "logical_slice_assign"
     op_conf.user_conf.input["value"].s.append("{}/value_0".format(op_name))
     op_conf.user_conf.input["ref"].s.append("{}/ref_0".format(op_name))
     parallel_conf = ref_blob_object.parallel_desc_symbol.parallel_conf
     op_conf.user_conf.attr["parallel_conf"].at_string = str(parallel_conf)
     op_conf.user_conf.attr["start"].at_list_int64.val[:] = start
     op_conf.user_conf.attr["stop"].at_list_int64.val[:] = stop
     op_conf.user_conf.attr["step"].at_list_int64.val[:] = [1] * len(start)
     bn_in_op2blob_object = {
         "ref_0": ref_blob_object,
         "value_0": value_blob_object
     }
     scope_symbol_id = _GetScopeSymbolIdFromEagerBlob(ref_blob)
     op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object,
                                        scope_symbol_id)
     builder.StatelessCall(
         op_attribute,
         parallel_conf=parallel_conf,
         bn_in_op2blob_object=bn_in_op2blob_object,
     )
Exemplo n.º 8
0
def ConstructNaiveBoxingOpConf(
    produced_blob_object,
    consumer_op_arg_parallel_attr,
    in_parallel_num,
    out_parallel_num,
):
    op_conf = op_conf_pb.OperatorConf()
    op_conf.name = "undefined_boxing_op_name"
    op_conf.device_tag = "cpu"
    op_conf.boxing_conf.lbi.op_name = "undefined_boxing_op_name"
    op_conf.boxing_conf.lbi.blob_name = "undefined_boxing_blob_name"
    op_conf.boxing_conf.in_num = in_parallel_num
    op_conf.boxing_conf.out_num = out_parallel_num
    in_sbp_parallel = produced_blob_object.op_arg_parallel_attr.sbp_parallel
    if in_sbp_parallel.HasField("split_parallel"):
        op_conf.boxing_conf.concat_box.axis = in_sbp_parallel.split_parallel.axis
    elif in_parallel_num == 1:
        op_conf.boxing_conf.concat_box.axis = 0
    else:
        assert in_sbp_parallel.HasField("partial_sum_parallel")
        op_conf.boxing_conf.add_box.SetInParent()
    out_sbp_parallel = consumer_op_arg_parallel_attr.sbp_parallel
    if out_sbp_parallel.HasField("split_parallel"):
        out_axis = out_sbp_parallel.split_parallel.axis
    else:
        assert out_parallel_num == 1
        out_axis = 0
    op_conf.boxing_conf.split_box.axis = out_axis
    shape = produced_blob_object.op_arg_blob_attr.shape
    op_conf.boxing_conf.split_box.part_num.extend(
        balanced_splitter.BalancedPartNums(shape[out_axis], out_parallel_num))
    bn_in_op2blob_object = {("in_%s" % i): produced_blob_object
                            for i in range(in_parallel_num)}
    return op_infer_util.Infer(op_conf, bn_in_op2blob_object)
Exemplo n.º 9
0
def _BuildCopyInstruction(builder, produced_blob_object, op_conf,
                          to_device_tag):
    x_devices = produced_blob_object.parallel_desc_symbol.machine_id2device_id_list
    x_device_tag = produced_blob_object.parallel_desc_symbol.device_tag
    bn_in_op2blob_object = {"in": produced_blob_object}
    op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object)
    assert to_device_tag != x_device_tag, (to_device_tag, x_device_tag)
    if to_device_tag == "cpu" and x_device_tag == "gpu":
        x_parallel_conf = produced_blob_object.parallel_desc_symbol.parallel_conf
        builder.BoxingCudaD2HStatelessCall(
            op_attribute,
            x_parallel_conf,
            bn_in_op2blob_object=bn_in_op2blob_object)
    elif to_device_tag == "gpu" and x_device_tag == "cpu":
        out_parallel_desc_symbol = TryReplaceDeviceTag(
            builder, produced_blob_object.parallel_desc_symbol, to_device_tag)
        out_parallel_conf = out_parallel_desc_symbol.parallel_conf
        with builder.CudaHostPinBlob(produced_blob_object):
            builder.BoxingCudaH2DStatelessCall(
                op_attribute,
                out_parallel_conf,
                bn_in_op2blob_object=bn_in_op2blob_object,
            )
    else:
        raise NotImplementedError(
            "invalid device found. to_device_tag: %s, x_device_tag: %s" %
            (to_device_tag, x_device_tag))
    sbp_parallel = bn_in_op2blob_object[
        "out"].op_arg_parallel_attr.sbp_parallel
    sbp_parallel.CopyFrom(
        produced_blob_object.op_arg_parallel_attr.sbp_parallel)
    return bn_in_op2blob_object["out"]
Exemplo n.º 10
0
def _GetCpu0VariableBlobFromNumpy(
        np_array: np.ndarray,
        dtype: dtype_util.dtype) -> oneflow_api.EagerConsistentBlob:
    """
    Add a variable on cpu 0, and feed the value of `np_array`

    Note: dtype argument cannot be eliminated by
    convert_numpy_dtype_to_oneflow_dtype(np_array.dtype),
    because np.int8 == np.char and
    numpy_dtype_to_oneflow_dtype(oneflow_dtype_to_numpy_dtype(flow.int8))
    may be flow.char
    """
    with oneflow.scope.placement("cpu", "0:0"):
        op_name = id_util.UniqueStr(OP_PREFIX)
        op_conf = get_variable.GenerateVariableOpConf(
            name=op_name,
            shape=np_array.shape,
            dtype=dtype,
            initializer=initializer_util.zeros_initializer(dtype=dtype),
            trainable=False,
        )
        current_parallel_desc_sym = oneflow.current_scope(
        ).device_parallel_desc_symbol
        device_tag = current_parallel_desc_sym.device_tag
        op_conf.device_tag = device_tag
        op_attribute = op_infer_util.Infer(op_conf, {})
        var_blob = get_variable.CreateEagerVariableBlob(op_attribute,
                                                        job_name=FAKE_JOB_NAME)

        interface_op_read_and_write.FeedValueToInterfaceBlobObject(
            var_blob.blob_object, np_array)
        return var_blob
Exemplo n.º 11
0
 def BuildModelLoadInstruction(builder):
     path_blob_object = path_input_blob_objects["out"]
     model_load_blob_objects["path"] = path_blob_object
     op_attribute = op_infer_util.Infer(
         model_load_op_conf, ibn2blob_object=model_load_blob_objects)
     parallel_conf = path_blob_object.parallel_desc_symbol.parallel_conf
     builder.StatelessCall(op_attribute,
                           parallel_conf,
                           bn_in_op2blob_object=model_load_blob_objects)
Exemplo n.º 12
0
def _GetEagerNcclAllReduce(parallel_conf, ibn2blob_object):
    op_conf = op_conf_pb.OperatorConf()
    op_conf.device_tag = "gpu"
    op_conf.name = "eager_nccl_all_reduce"
    op_conf.user_conf.op_type_name = "eager_nccl_all_reduce"
    op_conf.user_conf.input["in"].s.append("eager_nccl_all_reduce/in_0")
    op_conf.user_conf.output["out"].s.append("eager_nccl_all_reduce/out_0")
    op_conf.user_conf.attr["parallel_conf"].at_string = str(parallel_conf)
    return op_infer_util.Infer(op_conf, ibn2blob_object)
Exemplo n.º 13
0
 def BuildModelIOPathInputInstruction(builder):
     op_attribute = op_infer_util.Infer(op_conf, ibn2blob_object={})
     parallel_conf = (
         oneflow.current_scope().device_parallel_desc_symbol.parallel_conf
     )
     cfg_op_attribute = oneflow_api.deprecated.MakeOpAttributeByString(
         str(op_attribute)
     )
     builder.StatelessCall(
         cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo,
     )
Exemplo n.º 14
0
    def BuildModelSaveInstruction(builder):
        path_blob_object = path_input_blob_objects["out"]
        model_save_blob_objects["path"] = path_blob_object
        for i, blob in enumerate(var_blobs):
            model_save_blob_objects["in_{}".format(i)] = blob.blob_object

        op_attribute = op_infer_util.Infer(
            model_save_op_conf, ibn2blob_object=model_save_blob_objects)
        parallel_conf = path_blob_object.parallel_desc_symbol.parallel_conf
        builder.StatelessCall(op_attribute,
                              parallel_conf,
                              bn_in_op2blob_object=model_save_blob_objects)
Exemplo n.º 15
0
 def BuildModelLoadInstruction(builder):
     path_blob_object = path_input_blob_objects["out"]
     model_load_blob_objects["path"] = path_blob_object
     op_attribute = op_infer_util.Infer(
         model_load_op_conf, ibn2blob_object=model_load_blob_objects)
     parallel_conf = path_blob_object.parallel_desc_symbol.parallel_conf
     cfg_op_attribute = oneflow_api.deprecated.MakeOpAttributeByString(
         str(op_attribute))
     builder.StatelessCall(
         cfg_op_attribute,
         parallel_conf,
         model_load_blob_objects,
         boxing_util.BoxingTo,
     )
Exemplo n.º 16
0
    def BuildModelSaveInstruction(builder):
        path_blob_object = path_input_blob_objects["out"]
        model_save_blob_objects["path"] = path_blob_object
        for i, blob in enumerate(var_blobs):
            model_save_blob_objects["in_{}".format(i)] = blob.blob_object

        op_attribute = op_infer_util.Infer(
            model_save_op_conf, ibn2blob_object=model_save_blob_objects)
        parallel_conf = path_blob_object.parallel_desc_symbol.parallel_conf
        cfg_op_attribute = oneflow_api.deprecated.MakeOpAttributeByString(
            str(op_attribute))
        builder.StatelessCall(
            cfg_op_attribute,
            parallel_conf,
            model_save_blob_objects,
            boxing_util.BoxingTo,
        )
Exemplo n.º 17
0
def BuildAssignInstruction(builder, ref_blob_object, value_blob_object,
                           op_conf):
    oneflow_api.TryDisableBlobCache(ref_blob_object)
    ref_parallel_conf = ref_blob_object.parallel_desc_symbol.parallel_conf
    ref_devices = ref_blob_object.parallel_desc_symbol.machine_id2device_id_list
    value_devices = value_blob_object.parallel_desc_symbol.machine_id2device_id_list
    assert ref_devices == value_devices, "\nref_devices: %s\nvalue_devices: %s" % (
        ref_devices,
        value_devices,
    )
    ref_device_tag = ref_blob_object.parallel_desc_symbol.device_tag
    value_device_tag = value_blob_object.parallel_desc_symbol.device_tag
    bn_in_op2blob_object = oneflow_api.deprecated.BnInOp2BlobObject()
    bn_in_op2blob_object["ref"] = ref_blob_object
    bn_in_op2blob_object["value"] = value_blob_object
    op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object)
    cfg_op_attribute = oneflow_api.deprecated.MakeOpAttributeByString(
        str(op_attribute))
    if ref_device_tag == value_device_tag:
        builder.NoBoxingStatelessCall(
            cfg_op_attribute,
            ref_parallel_conf,
            bn_in_op2blob_object,
        )
    elif ref_device_tag == "cpu" and value_device_tag == "gpu":
        value_parallel_conf = value_blob_object.parallel_desc_symbol.parallel_conf
        builder.NoBoxingCudaD2HStatelessCall(
            cfg_op_attribute,
            value_parallel_conf,
            bn_in_op2blob_object,
            TryReplaceDeviceTag,
        )
    elif ref_device_tag == "gpu" and value_device_tag == "cpu":
        with _CudaHostPinBlob(builder, value_blob_object):
            builder.NoBoxingCudaH2DStatelessCall(
                cfg_op_attribute,
                ref_parallel_conf,
                bn_in_op2blob_object,
            )
    else:
        raise NotImplementedError(
            "invalid device found. ref_device_tag: %s, value_device_tag: %s" %
            (ref_device_tag, value_device_tag))
Exemplo n.º 18
0
def ConstructNaiveBoxingOpConf(
    produced_blob_object,
    consumer_op_arg_parallel_attr,
    in_parallel_num,
    out_parallel_num,
):
    op_conf = op_conf_pb.OperatorConf()
    op_conf.name = "undefined_boxing_op_name"
    op_conf.device_tag = "cpu"
    op_conf.boxing_conf.lbi.op_name = "undefined_boxing_op_name"
    op_conf.boxing_conf.lbi.blob_name = "undefined_boxing_blob_name"
    op_conf.boxing_conf.in_num = in_parallel_num
    op_conf.boxing_conf.out_num = out_parallel_num
    in_sbp_parallel = produced_blob_object.op_arg_parallel_attr.sbp_parallel
    if in_sbp_parallel.has_split_parallel():
        op_conf.boxing_conf.concat_box.axis = in_sbp_parallel.split_parallel(
        ).axis()
    elif in_parallel_num == 1:
        op_conf.boxing_conf.concat_box.axis = 0
    else:
        assert in_sbp_parallel.has_partial_sum_parallel()
        op_conf.boxing_conf.add_box.SetInParent()
    out_sbp_parallel = consumer_op_arg_parallel_attr.sbp_parallel
    if out_sbp_parallel.has_split_parallel():
        out_axis = out_sbp_parallel.split_parallel().axis()
    else:
        assert out_parallel_num == 1
        out_axis = 0
    op_conf.boxing_conf.split_box.axis = out_axis
    shape = produced_blob_object.op_arg_blob_attr.shape
    op_conf.boxing_conf.split_box.part_num.extend(
        balanced_splitter.BalancedPartNums(shape[out_axis], out_parallel_num))
    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject(
    )
    for i in range(in_parallel_num):
        bn_in_op2blob_object["in_%s" % i] = produced_blob_object
    return op_infer_util.Infer(op_conf, bn_in_op2blob_object)
Exemplo n.º 19
0
def BuildAssignInstruction(builder, ref_blob_object, value_blob_object,
                           op_conf):
    blob_cache_util.TryDisableBlobCache(ref_blob_object)
    ref_parallel_conf = ref_blob_object.parallel_desc_symbol.parallel_conf
    ref_devices = ref_blob_object.parallel_desc_symbol.machine_id2device_id_list
    value_devices = value_blob_object.parallel_desc_symbol.machine_id2device_id_list
    assert ref_devices == value_devices, "\nref_devices: %s\nvalue_devices: %s" % (
        ref_devices,
        value_devices,
    )
    ref_device_tag = ref_blob_object.parallel_desc_symbol.device_tag
    value_device_tag = value_blob_object.parallel_desc_symbol.device_tag
    bn_in_op2blob_object = {"ref": ref_blob_object, "value": value_blob_object}
    op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object)
    if ref_device_tag == value_device_tag:
        builder.BoxingStatelessCall(
            op_attribute,
            parallel_conf=ref_parallel_conf,
            bn_in_op2blob_object=bn_in_op2blob_object,
        )
    elif ref_device_tag == "cpu" and value_device_tag == "gpu":
        value_parallel_conf = value_blob_object.parallel_desc_symbol.parallel_conf
        builder.BoxingCudaD2HStatelessCall(
            op_attribute,
            value_parallel_conf,
            bn_in_op2blob_object=bn_in_op2blob_object)
    elif ref_device_tag == "gpu" and value_device_tag == "cpu":
        with builder.CudaHostPinBlob(value_blob_object):
            builder.BoxingCudaH2DStatelessCall(
                op_attribute,
                ref_parallel_conf,
                bn_in_op2blob_object=bn_in_op2blob_object,
            )
    else:
        raise NotImplementedError(
            "invalid device found. ref_device_tag: %s, value_device_tag: %s" %
            (ref_device_tag, value_device_tag))