Пример #1
0
def main():
    global DEFAULT_FLOATX
    DEFAULT_FLOATX = tf.float32

    max_lr = 1e-3
    min_lr = 3e-4
    warmup_steps = 5000
    bs = 32

    def lr_schedule(step, *, decay=0.99995, min_lr=3e-4):
        global curr_lr
        if step < warmup_steps:
            curr_lr = max_lr * step / warmup_steps
            return max_lr * step / warmup_steps
        elif step > (warmup_steps * 10) and curr_lr > min_lr:
            curr_lr *= decay
            return curr_lr
        return curr_lr

    dropout_p = 0.
    filters = 128
    blocks = 20
    components = 32  # logistic mixture components
    attn_heads = 4
    use_ln = True

    floatx_str = {tf.float32: 'fp32', tf.float16: 'fp16'}[DEFAULT_FLOATX]
    flow_training_imagenet.train(
        flow_constructor=lambda: construct(filters=filters,
                                           components=components,
                                           attn_heads=attn_heads,
                                           blocks=blocks,
                                           use_nin=True,
                                           use_ln=use_ln),
        logdir=
        f'~/logs/2018-10-26/imagenet32_ELU_WN_code_release_mix{components}_b{blocks}_f{filters}_h{attn_heads}_ln{int(use_ln)}_lr{max_lr}_bs{bs}_drop{dropout_p}_{floatx_str}',
        lr_schedule=lr_schedule,
        dropout_p=dropout_p,
        seed=0,
        init_bs=32,
        dataset='imagenet32',
        total_bs=bs,
        ema_decay=.999222,
        steps_per_log=100,
        steps_per_val=50000,
        steps_per_dump=5000,
        steps_per_samples=5000,
        max_grad_norm=1.,
        dtype=DEFAULT_FLOATX,
        scale_loss=1e-2 if DEFAULT_FLOATX == tf.float16 else None,
        n_epochs=2,
        restore_checkpoint=
        None,  # put in path to checkpoint in the format: path_to_checkpoint/model (no .meta / .ckpt)
        dump_samples_to_tensorboard=
        False,  # if you want to push the tiled simples to tensorboard. 
    )
Пример #2
0
def main():
    global DEFAULT_FLOATX
    DEFAULT_FLOATX = tf.float32

    max_lr = 4e-4
    warmup_steps = 10000
    bs = 60

    # set this to a smaller value if it can't fit on your GPU. This setting works for a V100 but for smaller GPUs, batch size of 56 or 60 is advised.
    # make sure bs % num_mpi_processes == 0. There will be an assertion error otherwise.

    def lr_schedule(step, *, decay=0.9995):
        """Ramp up to 4e-4 in 10K steps, stay there till 50K, geometric decay to 3e-4 by 55K steps, stay at 3e-4 till 110K steps, 
           then  warmup from 0 to 1e-5 for 20K steps, stay constant at 1e-5 for rest of the training."""
        global curr_lr
        if step < warmup_steps:
            return max_lr * step / warmup_steps
        elif step > warmup_steps and step < (5 * warmup_steps):
            curr_lr = max_lr
            return max_lr
        elif step > (5 * warmup_steps) and curr_lr > 3e-4:
            curr_lr *= decay
            return curr_lr
        elif step > (5 * warmup_steps) and curr_lr <= 3e-4 and step < 110000:
            return 3e-4
        elif step > 110000 and step < 130000:
            return (1e-5) * ((step - 110000) / (2 * warmup_steps))
        elif step > 130000:
            return 1e-5

    dropout_p = 0.
    filters = 96
    blocks = 16
    components = 4  # logistic mixture components
    attn_heads = 4
    use_ln = True

    floatx_str = {tf.float32: 'fp32', tf.float16: 'fp16'}[DEFAULT_FLOATX]
    flow_training_imagenet.train(
        flow_constructor=lambda: construct(filters=filters,
                                           components=components,
                                           attn_heads=attn_heads,
                                           blocks=blocks,
                                           use_nin=True,
                                           use_ln=use_ln),
        logdir=
        f'~/logs/2018-11-12/imagenet64_5bit_ELU_code_release_mix{components}_b{blocks}_f{filters}_h{attn_heads}_ln{int(use_ln)}_lr{max_lr}_bs{bs}_drop{dropout_p}_{floatx_str}',
        lr_schedule=lr_schedule,
        dropout_p=dropout_p,
        seed=0,
        init_bs=
        60,  # set this to a smaller value if it can't fit on your GPU. This setting works for a V100 but for smaller GPUs, batch size of 56 is advised.
        dataset='imagenet64_5bit',
        total_bs=bs,
        ema_decay=.999,
        steps_per_log=100,
        steps_per_val=
        5000000,  # basically not validating while training. set it to a lower value if you want to validate more frequently. 
        steps_per_dump=5000,
        steps_per_samples=5000,
        max_grad_norm=1.,
        dtype=DEFAULT_FLOATX,
        scale_loss=1e-2 if DEFAULT_FLOATX == tf.float16 else None,
        n_epochs=2,
        restore_checkpoint=
        None,  # put in path to checkpoint in the format: path_to_checkpoint/model (no .meta / .ckpt)
        dump_samples_to_tensorboard=
        False,  # if you want to push the tiled simples to tensorboard. 
    )
Пример #3
0
def main():
    global DEFAULT_FLOATX
    DEFAULT_FLOATX = tf.float32

    max_lr = 4e-4
    warmup_steps = 10000
    bs = 72  # set this to a smaller value if it can't fit on your GPU. This setting works for a V100 but for smaller GPUs, batch size of 56 is advised.

    def lr_schedule(step, *, decay=0.99995):
        """Ramp up to 4e-4 in 10K steps, stay at 4e-4 till 50K steps, geometric decay to 3e-4 by 55K steps, stay at 3e-4 till 230K steps, 
           then warm up from 0 to 1e-5 for 10K steps, stay constant at 1e-5 for 40K steps, warm up again from 0 to 1e-6 for 10K steps and stay constant.
           This is done to prevent any NaNs. This schedule works fine for training upto 300K iterations. For good samples, 200K iters are sufficient."""
        global curr_lr
        if step < warmup_steps:
            return max_lr * step / warmup_steps
        elif step > warmup_steps and step < (warmup_steps * 5):
            curr_lr = 4e-4
            return 4e-4
        elif step > (warmup_steps * 5) and (step < 55000):
            curr_lr *= decay
            return curr_lr
        elif step > 55000 and step < 230000:
            return 3e-4
        elif step > 230000 and step < 240000:
            return (1e-5) * (step - 230000) / warmup_steps
        elif step > 240000 and step < 280000:
            return 1e-5
        elif step > 280000 and step < 290000:
            return (1e-6) * (step - 290000) / warmup_steps
        elif step > 290000:
            return 1e-6

    dropout_p = 0.
    filters = 96
    blocks = 16
    components = 4  # logistic mixture components
    attn_heads = 4
    use_ln = True

    floatx_str = {tf.float32: 'fp32', tf.float16: 'fp16'}[DEFAULT_FLOATX]
    flow_training_imagenet.train(
        flow_constructor=lambda: construct(filters=filters,
                                           components=components,
                                           attn_heads=attn_heads,
                                           blocks=blocks,
                                           use_nin=True,
                                           use_ln=use_ln),
        logdir=
        f'~/logs/2018-11-12/imagenet64_ELU_code_release_mix{components}_b{blocks}_f{filters}_h{attn_heads}_ln{int(use_ln)}_lr{max_lr}_bs{bs}_drop{dropout_p}_{floatx_str}',
        lr_schedule=lr_schedule,
        dropout_p=dropout_p,
        seed=0,
        init_bs=
        72,  # set this to a smaller value if it can't fit on your GPU. This setting works for a V100 but for smaller GPUs, batch size of 56 is advised.
        dataset='imagenet64',
        total_bs=bs,
        ema_decay=.999,
        steps_per_log=100,
        steps_per_val=
        5000000,  # basically not validating while training. set it to a lower value if you want to validate more frequently. 
        steps_per_dump=5000,
        steps_per_samples=5000,
        max_grad_norm=1.,
        dtype=DEFAULT_FLOATX,
        scale_loss=1e-2 if DEFAULT_FLOATX == tf.float16 else None,
        n_epochs=2,
        restore_checkpoint=
        None,  # put in path to checkpoint in the format: path_to_checkpoint/model (no .meta / .ckpt)
        dump_samples_to_tensorboard=
        False,  # if you want to push the tiled simples to tensorboard. 
    )