示例#1
0
 def create_mobilenet_model_ops(model, loss_scale):
     [softmax, loss] = mobilenet.create_mobilenet(
         model,
         "data",
         num_input_channels=args.num_channels,
         num_labels=args.num_labels,
         label="label")
     # loss = model.Scale(loss, scale=loss_scale)
     brew.accuracy(model, [softmax, "label"], "accuracy")
def Train(args):
    # Either use specified device list or generate one
    if args.gpus is not None:
        gpus = [int(x) for x in args.gpus.split(',')]
        num_gpus = len(gpus)
    else:
        gpus = list(range(args.num_gpus))
        num_gpus = args.num_gpus

    log.info("Running on GPUs: {}".format(gpus))

    # Verify valid batch size
    total_batch_size = args.batch_size
    batch_per_device = total_batch_size // num_gpus
    assert \
        total_batch_size % num_gpus == 0, \
        "Number of GPUs must divide batch size"

    # Round down epoch size to closest multiple of batch size across machines
    global_batch_size = total_batch_size * args.num_shards
    epoch_iters = int(args.epoch_size / global_batch_size)
    args.epoch_size = epoch_iters * global_batch_size
    log.info("Using epoch size: {}".format(args.epoch_size))

    # Create ModelHelper object
    # train_arg_scope = {
    #     'order': 'NCHW',
    #     'use_cudnn': True,
    #     'cudnn_exhaustice_search': True,
    #     'ws_nbytes_limit': (args.cudnn_workspace_limit_mb * 1024 * 1024),
    # }
    # train_model = model_helper.ModelHelper(
    #     name="mobilenet", arg_scope=train_arg_scope
    # )

    num_shards = args.num_shards

    rendezvous = None

    # Model building functions
    # def create_mobilenet_model_ops(model, loss_scale):
    #     [softmax, loss] = mobilenet.create_mobilenet(
    #         model,
    #         "data",
    #         num_input_channels=args.num_channels,
    #         num_labels=args.num_labels,
    #         label="label",
    #         is_test=True,
    #     )
    #     loss = model.Scale(loss, scale=loss_scale)
    #     brew.accuracy(model, [softmax, "label"], "accuracy")
    #     return [loss]

    # def add_optimizer(model):
    #     stepsz = int(30 * args.epoch_size / total_batch_size / num_shards)
    #     optimizer.add_weight_decay(model, args.weight_decay)
    #     optimizer.build_sgd(
    #         model,
    #         args.base_learning_rate,
    #         momentum=0.9,
    #         nesterov=1,
    #         policy="step",
    #         stepsize=stepsz,
    #         gamma=0.1
    #     )

    # def add_image_input(model):
    #     AddImageInput(
    #         model,
    #         reader,
    #         batch_size=batch_per_device,
    #         img_size=args.image_size,
    #     )
    # def add_post_sync_ops(model):
    #     for param_info in model.GetOptimizationParamInfo(model.GetParams()):
    #         if param_info.blob_copy is not None:
    #             model.param_init_net.HalfToFloat(
    #                 param_info.blob,
    #                 param_info.blob_copy[core.DataType.FLOAT]
    #             )

    test_arg_scope = {
        'order': "NCHW",
        # 'use_cudnn': True,
        # 'cudnn_exhaustive_search': True,
    }
    test_model = model_helper.ModelHelper(name="mobilenet_test",
                                          arg_scope=test_arg_scope)

    deploy_arg_scope = {'order': "NCHW"}
    deploy_model = model_helper.ModelHelper(name="mobilenet_deploy",
                                            arg_scope=deploy_arg_scope)
    mobilenet.create_mobilenet(
        deploy_model,
        "data",
        num_input_channels=args.num_channels,
        num_labels=args.num_labels,
        is_test=True,
    )

    # raw_data = np.random.randn(1, 3, 224, 224).astype(np.float32)
    # workspace.FeedBlob("data", raw_data)

    # workspace.RunNetOnce(deploy_model.param_init_net)
    # workspace.CreateNet(deploy_model.net)
    # mobilenet.create_mobilenet(
    #     test_model,
    #     "gpu_0/data",
    #     num_input_channels=args.num_channels,
    #     num_labels=args.num_labels,
    #     is_test=True,
    # )
    # test_reader = test_model.CreateDB(
    #     "test_reader",
    #     db=args.test_data,
    #     db_type=args.db_type,
    # )

    # def test_input_fn(model):
    #     AddImageInput(
    #         model,
    #         test_reader,
    #         batch_size=batch_per_device,
    #         img_size=args.image_size,
    #     )

    # data_parallel_model.Parallelize_GPU(
    #     test_model,
    #     input_builder_fun=test_input_fn,
    #     forward_pass_builder_fun=create_mobilenet_model_ops,
    #     post_sync_builder_fun=add_post_sync_ops,
    #     param_update_builder_fun=None,
    #     devices=gpus,
    # )

    # inputs = np.zeros((32,3,224,224), dtype='f')
    # labels = np.zeros((32,), dtype='f')
    # workspace.FeedBlob("gpu_0/data", inputs)
    # workspace.FeedBlob("gpu_0/label", labels)

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    LoadModel(args.load_model_path, test_model)

    prefix = "gpu_0/"
    for value in deploy_model.params:
        workspace.FeedBlob(value, workspace.FetchBlob(prefix + value))
    # SaveModel(args, test_model)

    # workspace.ResetWorkspace()
    # print(workspace.Blobs())
    # print(deploy_model.params)
    # print("=====================")
    # print(test_model.params)
    # print("=====================")
    # print(workspace.FetchBlob("gpu_0/comp_11_spatbn_2_rm"))
    # print(workspace.FetchBlob("comp_11_spatbn_2_rm"))
    # print(deploy_model.net.Proto())
    # print(deploy_model.param_init_net.Proto())
    # exit(0)

    init_net = caffe2_pb2.NetDef()

    # # print(len(deploy_model.params))
    # # print(deploy_model.param_init_net.Proto())
    # with open("params", 'wb') as f:
    #     f.write(str(deploy_model.param_init_net.Proto()))
    tmp_o = np.zeros((1, 1)).astype(np.float32)
    # print(tmp_o.shape)
    # print(type(tmp_o))
    # exit(0)
    init_net.name = "mobilenet_init"
    rm_riv = []
    for value in deploy_model.params:
        tmp = workspace.FetchBlob(prefix + value)
        # print(type(tmp.shape), type(tmp))

        if "spatbn" == str(value)[-10:-4]:
            # print(value)
            if "s" == str(value)[-1]:
                # print(str(value)[:-1] + "rm")
                # init_net.op.extend([core.CreateOperator("GivenTensorFill", [], [str(value)[:-1] + "rm"], arg=[utils.MakeArgument("shape", tmp_o.shape), utils.MakeArgument("values", tmp_o)])])
                rm_riv.append(
                    core.CreateOperator(
                        "GivenTensorFill", [], [str(value)[:-1] + "rm"],
                        arg=[
                            utils.MakeArgument("shape", tmp_o.shape),
                            utils.MakeArgument("values", tmp_o)
                        ]))
                rm_riv.append(
                    core.CreateOperator(
                        "GivenTensorFill", [], [str(value)[:-1] + "riv"],
                        arg=[
                            utils.MakeArgument("shape", tmp_o.shape),
                            utils.MakeArgument("values", tmp_o)
                        ]))
            # elif "b" == str(value)[-1]:
            #     # print(str(value)[:-1] + "riv")
            #     init_net.op.extend([core.CreateOperator("GivenTensorFill", [], [str(value)[:-1] + "riv"], arg=[utils.MakeArgument("shape", tmp_o.shape), utils.MakeArgument("values", tmp_o)])])
        init_net.op.extend([
            core.CreateOperator("GivenTensorFill", [], [value],
                                arg=[
                                    utils.MakeArgument("shape", tmp.shape),
                                    utils.MakeArgument("values", tmp)
                                ])
        ])
    init_net.op.extend([
        core.CreateOperator("ConstantFill", [], ["data"],
                            shape=(1, 3, 224, 224))
    ])
    # exit(0)
    # for value in rm_riv:
    # init_net.op.extend([value])

    deploy_model.net._net.external_output.extend(["softmax"])
    predict_net = deploy_model.net._net

    # print(dir(deploy_model.net._net))

    # with open("pparams", 'wb') as f:
    #     f.write(str(deploy_model.param_init_net.Proto()))
    # print(workspace.Blobs())
    # for k, value in enumerate(deploy_model.params):
    #     # print(k,value)
    #     name = k + value
    #     name = workspace.FetchBlob(prefix + value)

    # tmp_work = {value: workspace.FetchBlob(prefix + value) for value in deploy_model.params}
    # # tmp_params = (str(deploy_model.params)

    # workspace.ResetWorkspace()
    # # print(workspace.Blobs())
    # # exit(0)
    # for value in deploy_model.params:
    #     workspace.FeedBlob(value, tmp_work[value])

    # # print(workspace.Blobs())
    # print(workspace.FetchBlob("last_out_b"))
    # exit(0)

    # deploy_model.net._net.external_output.extend(["softmax"])

    # #====================================================================
    # init_net, predict_net = me.Export(workspace, deploy_model.net, deploy_model.params)
    # # print(dir(predict_net.op.remove))
    # # # print(dir(caffe2_pb2.NetDef))
    # # print("===========")
    # # init_net.op.pop(0)
    # flag_di = []
    # print(len(init_net.op))
    # for k, value in enumerate(init_net.op):
    #     for x in value.output:
    #         if ("data" == str(x)) and ("GivenTensorFill" == str(value.type)):
    #             flag_di.append(k)

    # flag_di = sorted(flag_di)
    # for k, v in enumerate(flag_di):
    #     init_net.op.pop(v - k)
    # print(len(init_net.op))

    # flag_dp = []
    # print(len(predict_net.external_input))
    # for k, value in enumerate(predict_net.external_input):
    #     if "data" == str(value):
    #         flag_dp.append(k)

    # flag_dp = sorted(flag_dp)
    # for k, v in enumerate(flag_dp):
    #     predict_net.external_input.pop(v - k)

    # print(len(predict_net.external_input))

    # predict_net.external_input.extend(["data"])
    # init_net.op.extend([core.CreateOperator("ConstantFill", [], ["data"], shape=(1, 3, 224, 224))])
    # #==============================================

    with open("pred_net", 'wb') as f:
        f.write(str(predict_net))
    # with open("e_pred_net", 'wb') as f:
    # f.write(str(e_predict_net))
    with open("init_net", 'wb') as f:
        f.write(str(init_net))

    with open(output_predict_net, 'wb') as f:
        f.write(predict_net.SerializeToString())
    print(output_predict_net)

    with open(output_init_net, 'wb') as f:
        f.write(init_net.SerializeToString())
    print(output_init_net)

    print("OK!")