示例#1
0
def main(epochs, buffer_size, batch_size, train_mode, 
        distribution_strategy, num_gpus,
        workers, w_type, w_index):


    strategy = get_distribution_strategy(strategy=distribution_strategy, num_gpus=num_gpus, workers=workers, typ=w_type, index=w_index)
    print_msg ('Number of devices: {}'.format(strategy.num_replicas_in_sync), 'info')
   
    data_obj = Dataset(batch_size=128)
    train_dataset, test_dataset = data_obj.create_dataset()
    steps_per_epoch = data_obj.get_buffer_size()//(batch_size)
    train_obj = Benchmark(epochs, steps_per_epoch, 'resnet56')

    with strategy.scope():
        # Create and compile model within strategy scope
        train_obj.create_model('resnet56')
        train_obj.compile_model()
        
    print_msg('Training...', 'info')
    train_obj.run(train_dataset, test_dataset, train_mode)
    print_msg('Training Done.', 'succ')
示例#2
0
def main(epochs, buffer_size, batch_size, train_mode, display_every,
         distribution_strategy, num_gpus, workers, w_type, w_index,
         setup_cluster, verbose):

    if verbose: os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(verbose)
    strategy = get_distribution_strategy(strategy=distribution_strategy,
                                         train_mode=train_mode,
                                         num_gpus=num_gpus,
                                         workers=workers,
                                         typ=w_type,
                                         index=w_index,
                                         setup=setup_cluster)
    if num_gpus == 1: num_gpus = strategy.num_replicas_in_sync
    print_msg('Number of devices: {}'.format(num_gpus), 'info')

    data_obj = Dataset(batch_size)
    train_dataset, test_dataset = data_obj.create_dataset()
    steps_per_epoch = data_obj.get_buffer_size() // (batch_size)
    train_obj = Benchmark(epochs, steps_per_epoch, batch_size, display_every,
                          num_gpus, 'resnet56', strategy)

    print_msg('Training...', 'info')
    train_obj.run(train_dataset, test_dataset, train_mode)
    print_msg('Training Done.', 'succ')
示例#3
0
    parser = argparse.ArgumentParser()
    parser.add_argument("train", help="training", type=bool)
    parser.add_argument("config", help="config file path", type=str)
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)
        if args.train:
            config = config["train"]
        else:
            config = config["test"]

    if args.train:
        dataset = Dataset(config["source_data_path"],
                          config["target_data_path"])
        en, ko = dataset.create_dataset()
        en_tensor, en_tokenizer, ko_tensor, ko_tokenizer = dataset.load_dataset(
            config["num_words"])
        en_words_count = len(en_tokenizer.word_index) + 1
        ko_words_count = len(ko_tokenizer.word_index) + 1

        train_ds = tf.data.Dataset.from_tensor_slices(
            (en_tensor, ko_tensor)).shuffle(10000).batch(
                config["batch_size"]).prefetch(1024)
        model = Seq2seq(source_words_count=en_words_count,
                        target_words_count=ko_words_count,
                        sos=ko_tokenizer.word_index["<start>"],
                        eos=ko_tokenizer.word_index["<end>"])

        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
        optimizer = tf.keras.optimizers.Adam()