Exemplo n.º 1
0
def elastic_example(args):
    data_dir = os.path.join(args.data_path, 'mnist', 'train')
    dataset = create_elastic_mnist(
        data_path=data_dir,
        batch_size=args.batch_size,
    )
    total = dataset.get_dataset_size()
    print('total steps: %d when using batch size: %d' % (
        total,
        args.batch_size,
    ))

    with kfops.KungFuContext(device=args.device):
        # state = State(0, 60000)
        # state.sync()
        # s = state.global_offset()
        # print('start with global off=%d' % (s))

        # while not state.finished():
        #     pass

        it = enumerate(dataset)

        for i in range(min(args.max_step, total)):
            idx, (x, y) = next(it)
            print(
                'data consumed: %d/%d %s%s %s%s' %
                (idx, total, x.dtype, x.shape, y.dtype, y.shape),
                file=sys.stderr,
            )
Exemplo n.º 2
0
def train(args):
    with kfops.KungFuContext(device=args.device):
        all_reduce = kfops.KungFuAllReduce()
        x = ms.Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32))
        print(x)
        y = all_reduce(x)
        print(y)
Exemplo n.º 3
0
def main():
    args = parse_args()
    log_args(args)
    if args.use_kungfu:
        with kfops.KungFuContext(device=args.device):
            log_duration(run, args)
    else:
        log_duration(run, args)
Exemplo n.º 4
0
def main():
    args = parse_args()
    log_args(args)
    ms.context.set_context(mode=ms.context.GRAPH_MODE,
                           device_target=args.device,
                           save_graphs=False)

    with kfops.KungFuContext(device='CPU'):  # don't init kungFU NCCL
        kfops.kungfu_debug_nccl()
Exemplo n.º 5
0
def main():
    args = parse_args()
    ms.context.set_context(mode=ms.context.GRAPH_MODE,
                           device_target=args.device)

    with kfops.KungFuContext(device=args.device):
        all_reduce = kfops.KungFuAllReduce()
        x = ms.Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32))
        print(x)
        y = all_reduce(x)
        print(y)
Exemplo n.º 6
0
def main():
    args = parse_args()
    with kfops.KungFuContext(device=args.device):
        run(args)
Exemplo n.º 7
0
    download_dataset(args.data_dir)

    # define the loss function
    net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    # create the network
    network = LeNet5()
    # define the optimizer
    net_opt = build_optimizer(args, network)
    config_ck = CheckpointConfig(save_checkpoint_steps=1875,
                                 keep_checkpoint_max=10)
    # save the network model and parameters for subsequence fine-tuning
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck)
    # group layers into an object with training and evaluation features
    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

    train_net(model, args.epoch_size, args.data_dir, args.repeat_size,
              ckpoint_cb, dataset_sink_mode)
    # TODO: test
    # test_net(network, model, args.data_dir)


if __name__ == "__main__":
    args = parse_args()
    if args.use_kungfu:
        with kfops.KungFuContext(device=args.device):
            main(args)
    else:
        main(args)