def save_model(): f_q = Encoder(input_shape) f_k = Encoder(input_shape) optimizer = tf.keras.optimizers.Adam(0.01, decay=0.0001) checkpoint = tf.train.Checkpoint(f_q=f_q, f_k=f_k, optimizer=optimizer) checkpoint.restore(tf.train.latest_checkpoint('checkpoints')) if False == os.path.exists('models'): os.mkdir('models') f_k.save(os.path.join('models', 'resnet50.h5'))
def main(): # query and key feature extractor f_q = Encoder(input_shape); # update this model more frequently f_k = Encoder(input_shape); # update this model less frequently f_k.set_weights(np.array(f_q.get_weights())); # utils for training optimizer = tf.keras.optimizers.SGD(0.001, momentum = 0.9, decay = 0.0001); trainset = iter(tfds.load(name = 'imagenet_resized/64x64', split = tfds.Split.TRAIN, download = False).repeat(-1).map(parse_function).shuffle(batch_size).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)); checkpoint = tf.train.Checkpoint(f_q = f_q, f_k = f_k, optimizer = optimizer); checkpoint.restore(tf.train.latest_checkpoint('checkpoints')); log = tf.summary.create_file_writer('checkpoints'); avg_loss = tf.keras.metrics.Mean(name = 'loss', dtype = tf.float32); # stuff 10 batches feature into queue queue = Queue(trainset, f_k, 10); augmentation = RandomAugmentation(input_shape, rotation_range = (-10, 10)); while True: x, label = next(trainset); # two augmented versions of the same batch data x_q = augmentation(x); # x_q.shape = (batch, 64, 64, 3) x_k = augmentation(x); # x_k.shape = (batch, 64, 64, 3) with tf.GradientTape() as tape: q = f_q(x_q); # q.shape = (batch, 128) k = f_k(x_k); # k.shape = (batch, 128) l_pos = tf.reshape(tf.linalg.matmul(tf.reshape(q, (-1, 1, 128)), tf.reshape(k, (-1, 128, 1))), (-1, 1)); # l_pos.shape = (batch, 1) l_neg = tf.reshape(tf.linalg.matmul(tf.reshape(q, (-1, 1, 128)), queue.get()), (-1, 10)); # l_neg.shape = (batch, 10) logits = tf.concat([l_pos, l_neg], axis = 1); # logits.shape = (batch, 11) # contrastive loss loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)(tf.zeros((batch_size,)), logits / temp); grads = tape.gradient(loss, f_q.trainable_variables); avg_loss.update_state(loss); [tf.debugging.Assert(tf.math.logical_not(tf.math.reduce_any(tf.math.is_nan(grad))), grads + [optimizer.iterations,]) for grad in grads]; [tf.debugging.Assert(tf.math.logical_not(tf.math.reduce_any(tf.math.is_inf(grad))), grads + [optimizer.iterations,]) for grad in grads]; tf.debugging.Assert(tf.math.logical_not(tf.math.reduce_any(tf.math.is_nan(f_q(tf.constant(np.random.normal(size = (1, 64, 64, 3)), dtype = tf.float32))))), [optimizer.iterations]); optimizer.apply_gradients(zip(grads, f_q.trainable_variables)); # momentum update tf.debugging.Assert(tf.math.logical_not(tf.math.reduce_any(tf.math.is_nan(f_q(tf.constant(np.random.normal(size = (1, 64, 64, 3)), dtype = tf.float32))))), [optimizer.iterations]); for i in range(len(f_q.trainable_variables)): f_k.trainable_variables[i] = beta * f_k.trainable_variables[i] + (1 - beta) * f_q.trainable_variables[i]; # update dictionary queue.update(k); # write log if tf.equal(optimizer.iterations % 500, 0): with log.as_default(): tf.summary.scalar('loss', avg_loss.result(), step = optimizer.iterations); print('Step #%d Loss: %.6f' % (optimizer.iterations, avg_loss.result())); avg_loss.reset_states(); if tf.equal(optimizer.iterations % 5000, 0): # save model checkpoint.save(os.path.join('checkpoints', 'ckpt')); if False == os.path.exists('models'): os.mkdir('models'); f_k.save(os.path.join('models', 'model.h5'));
def aae(batch_size=128, dim_list=[100,100], latent_dim=32, nb_epoch=1, nb_epoch_ae=1, plt_frq=50, saved_model='', dataDirName=dataDirName, max_len=20, saved='', activation=['tanh', 'tanh']): # get date now = dt.datetime.now() date = now.strftime("%d%m_%H%M%S") # creating file name with parameters in name interm='' for i in dim_list: interm += str(i) name='e'+str(nb_epoch)+'_a'+str(nb_epoch_ae)+'_l'+str(latent_dim)+'_i'+interm+'_o'+str(max_len) # import Data Y_train, Y_test, Y_train, Y_test = importDataSet(dataDirName, max_len) dof = Y_train.shape[2] activation_list = activation dim_list = dim_list output_length = Y_train.shape[1] # define different models encoder = Encoder(latent_dim, output_length, dof, activation_list, dim_list) decoder = Decoder(latent_dim, output_length, dim_list, activation_list, dof) autoencoder = Autoencoder(encoder, decoder) discriminator = Discriminator(latent_dim) generator = Generator(encoder, discriminator) # compilating every models necessary compile(generator, discriminator, autoencoder) # summary of the different models encoder.summary() decoder.summary() discriminator.summary() # loading autoencoder if needed if saved != '': autoencoder = load_model(saved) name += '_s' # Pre-train the AE print 'pre-training autoencoder' autoencoder.fit(Y_train, Y_train, nb_epoch=nb_epoch_ae, verbose=2, batch_size=batch_size, shuffle=True, validation_data=(Y_test, Y_test)) # Pre-train the discriminator network ... ntrain = 1000 XT = np.random.permutation(Y_train) # generating vector (real distribution and encoder output) zfake = np.random.uniform(low=-1.0, high=1.0, size=[XT.shape[0], latent_dim]) zreal = encoder.predict(XT) X = np.concatenate((zreal, zfake)) # putting labels on vectors n = XT.shape[0] y = np.zeros(2*n) y[:n] = 1 y = np_utils.to_categorical(y, 2) # training discriminator discriminator.fit(X, y, nb_epoch=1, verbose=0, batch_size=batch_size) y_hat = discriminator.predict(X) y_hat_idx = np.argmax(y_hat, axis=1) y_idx = np.argmax(y, axis=1) diff = y_idx-y_hat_idx n_tot = y.shape[0] n_rig = (diff==0).sum() acc = n_rig*100.0/n_tot print "Discriminator Accuracy pretrain: %0.02f pct (%d pf %d) right on %d epoch"%(acc, n_rig, n_tot, 1) # set up loss storage vector losses = {"discriminator":[], "generator":[]} # defining function to train aae def train_for_n(nb_epoch=5, plt_frq=plt_frq, BATCH_SIZE=32): count = 0 for e in range(nb_epoch): # Train ae print "epoch %d"%(e+1) # first we train the autoencoder autoencoder_losses = autoencoder.fit(Y_train, Y_train, shuffle=True, nb_epoch=1, batch_size=BATCH_SIZE, verbose=2, validation_data=(Y_test, Y_test)) # Make generative latent vectors music_batch = Y_train[np.random.randint(Y_train.shape[0], size=BATCH_SIZE)] noise = np.random.uniform(low=-1.0, high=1.0, size=[BATCH_SIZE,latent_dim]) zreal = encoder.predict(music_batch) # Train discriminator on generated images nb_misclassified = np.random.randint(BATCH_SIZE) X0 = np.concatenate((zreal, noise)) y0 = np.zeros(BATCH_SIZE) y0 = np_utils.to_categorical(y0, 2) # noising labels misclass = np.zeros(BATCH_SIZE) misclass[:nb_misclassified] = 0 misclass = np_utils.to_categorical(misclass, 2) y0=np.concatenate((misclass, y0)) #in order to shuffle labels zipped = list(zip(X0, y0)) shuffle(zipped) X0, y0 = zip(*zipped) X0 = np.array(X0) y0 = np.array(y0) # then training discriminator #make_trainable(discriminator, True) # print "Training discriminator" d_loss, d_lab = discriminator.train_on_batch(X0, y0) losses["discriminator"].append(float(d_loss)) # train Generator-Discriminator stack on input noise to non-generated output class y2 = np.ones(BATCH_SIZE) y2 = np_utils.to_categorical(y2, 2) #make_trainable(discriminator, False) # Train generator # print "Training generator" g_loss, g_lab = generator.train_on_batch(music_batch, y2) image_batch = Y_train[np.random.randint(0,Y_train.shape[0],size=BATCH_SIZE)] g_loss, g_lab = generator.train_on_batch(music_batch, y2 ) losses["generator"].append(float(g_loss)) # Updates plots d_acc = discriminator.evaluate(X,y, batch_size=BATCH_SIZE, verbose=0) print "\ndiscriminator loss:", losses["discriminator"][-1] print "discriminator acc:", d_acc[1] print "generator loss:", losses["generator"][-1] #print "autoencoder loss:", autoencoder_losses count += 1 # launching previous function to train aae train_for_n(nb_epoch=nb_epoch, plt_frq=plt_frq, BATCH_SIZE=batch_size) # saving final models autoencoder.save(name+'_autoencoder.h5') encoder.save(name+'_encoder_save.h5') decoder.save(name+'_decoder_save.h5') #writing losses in a csv to plot them via plot_loss.py with open('loss.csv', 'w') as csvfile: fieldnames = ['discriminator', 'generator'] w = csv.DictWriter(csvfile, fieldnames=fieldnames) w.writeheader() w.writerow(losses) # plotting loss plot_loss('loss.csv')