Пример #1
0
def main(epochs, buffer_size, batch_size, train_mode, 
        distribution_strategy, num_gpus,
        workers, w_type, w_index):


    strategy = get_distribution_strategy(strategy=distribution_strategy, num_gpus=num_gpus, workers=workers, typ=w_type, index=w_index)
    print_msg ('Number of devices: {}'.format(strategy.num_replicas_in_sync), 'info')
   
    data_obj = Dataset(batch_size=128)
    train_dataset, test_dataset = data_obj.create_dataset()
    steps_per_epoch = data_obj.get_buffer_size()//(batch_size)
    train_obj = Benchmark(epochs, steps_per_epoch, 'resnet56')

    with strategy.scope():
        # Create and compile model within strategy scope
        train_obj.create_model('resnet56')
        train_obj.compile_model()
        
    print_msg('Training...', 'info')
    train_obj.run(train_dataset, test_dataset, train_mode)
    print_msg('Training Done.', 'succ')
Пример #2
0
def main(epochs, buffer_size, batch_size, train_mode, display_every,
         distribution_strategy, num_gpus, workers, w_type, w_index,
         setup_cluster, verbose):

    if verbose: os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(verbose)
    strategy = get_distribution_strategy(strategy=distribution_strategy,
                                         train_mode=train_mode,
                                         num_gpus=num_gpus,
                                         workers=workers,
                                         typ=w_type,
                                         index=w_index,
                                         setup=setup_cluster)
    if num_gpus == 1: num_gpus = strategy.num_replicas_in_sync
    print_msg('Number of devices: {}'.format(num_gpus), 'info')

    data_obj = Dataset(batch_size)
    train_dataset, test_dataset = data_obj.create_dataset()
    steps_per_epoch = data_obj.get_buffer_size() // (batch_size)
    train_obj = Benchmark(epochs, steps_per_epoch, batch_size, display_every,
                          num_gpus, 'resnet56', strategy)

    print_msg('Training...', 'info')
    train_obj.run(train_dataset, test_dataset, train_mode)
    print_msg('Training Done.', 'succ')