def build_networks(): shape = (64, 64, 3) dopt = Adam(lr=0.0002, beta_1=0.5) opt = Adam(lr=0.0001, beta_1=0.5) gen = build_gen(shape) gen.compile(optimizer=opt, loss='binary_crossentropy') gen.summary() disc = build_discriminator(shape) disc.compile(optimizer=dopt, loss='binary_crossentropy') disc.summary() noise = Input(shape=(1, 1, 100)) gened = gen(noise) result = disc(gened) gan = models.Model(inputs=noise, outputs=result) gan.compile(optimizer=opt, loss='binary_crossentropy') gan.summary() return gen, disc, gan
def build_networks(): shape = (Args.sz, Args.sz, 3) dopt = Adam(lr=0.0002, beta_1=Args.adam_beta) opt = Adam(lr=0.0001, beta_1=Args.adam_beta) # generator part gen = build_gen( shape ) gen.compile(optimizer=opt, loss='binary_crossentropy') gen.summary() # discriminator part disc = build_discriminator( shape ) disc.compile(optimizer=dopt, loss='binary_crossentropy') disc.summary() noise = Input( shape=Args.noise_shape ) gened = gen( noise ) result = disc( gened ) gan = models.Model( inputs=noise, outputs=result ) gan.compile(optimizer=opt, loss='binary_crossentropy') gan.summary() return gen, disc, gan
def build_networks(): shape = (Args.sz, Args.sz, 3) print("Args.sz:", Args.sz) # Learning rate is important. # Optimizers are important too, try experimenting them yourself to fit your dataset. # I recommend you read DCGAN paper. # Unlike gan hacks, sgd doesn't seem to work well. # DCGAN paper states that they used Adam for both G and D. # opt = optimizers.SGD(lr=0.0001, decay=0.0, momentum=0.9, nesterov=True) # dopt = optimizers.SGD(lr=0.0001, decay=0.0, momentum=0.9, nesterov=True) # lr=0.010. Looks good, statistically (low d loss, higher g loss) # but too much for the G to create face. # If you see only one color 'flood fill' during training for about 10 batches or so, # training is failing. If you see only a few colors (instead of colorful noise) # then lr is too high for the opt and G will not have chance to form face. # dopt = Adam(lr=0.010, beta_1=0.5) # opt = Adam(lr=0.001, beta_1=0.5) # vague faces @ 500 # Still can't get higher frequency component. # dopt = Adam(lr=0.0010, beta_1=0.5) # opt = Adam(lr=0.0001, beta_1=0.5) # better faces @ 500 # but mode collapse after that, probably due to learning rate being too high. # opt.lr = dopt.lr / 10 works nicely. I found this with trial and error. # now same lr, as we are using history to train D multiple times. # I don't exactly understand how decay parameter in Adam works. Certainly not exponential. # Actually faster than exponential, when I look at the code and plot it in Excel. dopt = Adam(lr=0.0002, beta_1=Args.adam_beta) opt = Adam(lr=0.0001, beta_1=Args.adam_beta) # too slow # Another thing about LR. # If you make it small, it will only optimize slowly. # LR only has to be smaller than certain threshold that is data dependent. # (related to the largest gradient that prevents optimization) # dopt = Adam(lr=0.000010, beta_1=0.5) # opt = Adam(lr=0.000001, beta_1=0.5) # generator part gen = build_gen(shape) # loss function doesn't seem to matter for this one, as it is not directly trained gen.compile(optimizer=opt, loss='binary_crossentropy') gen.summary() # discriminator part disc = build_discriminator(shape) disc.compile(optimizer=dopt, loss='binary_crossentropy') disc.summary() # GAN stack # https://ctmakro.github.io/site/on_learning/fast_gan_in_keras.html is the faster way. # Here, for simplicity, I use slower way (slower due to duplicate computation). noise = Input(shape=Args.noise_shape) gened = gen(noise) result = disc(gened) gan = models.Model(inputs=noise, outputs=result) gan.compile(optimizer=opt, loss='binary_crossentropy') gan.summary() return gen, disc, gan