def __init__(self, output_dir, data_loader, n_words, ixtoword, log): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'model') self.image_dir = os.path.join(output_dir, 'image') mkdir_p(self.model_dir) mkdir_p(self.image_dir) self.log = log self.update_interval = 100 self.img_save_interval = 400 self.batch_size = cfg.TRAIN.BATCH_SIZE self.max_epoch = cfg.TRAIN.MAX_EPOCH self.D_lr = cfg.TRAIN.DISCRIMINATOR_LR self.G_lr = cfg.TRAIN.GENERATOR_LR self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL self.n_words = n_words self.ixtoword = ixtoword self.data_loader = data_loader self.num_batches = len(self.data_loader) self.train_log = np.empty((0, 3)) self.log.add("CUDA status: {}".format(cfg.CUDA)) self.log.add("GPU ID: {}".format(cfg.GPU_ID)) self.log.add("Init condGAN ... ")
def __init__(self, output_dir, data_loader, data_loader_val, n_words, ixtoword, log): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'model') self.image_dir = os.path.join(output_dir, 'image') mkdir_p(self.model_dir) mkdir_p(self.image_dir) if cfg.GPU_ID >= 0: torch.cuda.set_device(cfg.GPU_ID) cudnn.benchmark = True self.log = log self.update_interval = 200 self.batch_size = cfg.TRAIN.BATCH_SIZE self.max_epoch = cfg.TRAIN.MAX_EPOCH #self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL self.n_words = n_words self.ixtoword = ixtoword self.data_loader = data_loader self.data_loader_val = data_loader_val self.text_encoder, self.image_encoder, self.labels, self.start_epoch = self.build_models() self.train_log = np.empty((0,5)) self.log.add("CUDA status: {}".format(cfg.CUDA)) self.log.add("GPU ID: {}".format(cfg.GPU_ID)) self.log.add("Init DAMSM ... ")
def generate_images(self, data_dic, tries, threshold): '''Generate examples''' if cfg.TRAIN.NET_G == '': self.log.add('Error: model not found!') else: s_tmp = cfg.SAVE_DIR text_encoder, image_encoder, netG, netsD, _ = self.build_models() netG.eval() netsD[2].eval() generated_images = [] with torch.no_grad(): for key in data_dic: save_dir = '%s/%s' % (s_tmp, key) mkdir_p(save_dir) captions, cap_lens, sorted_indices = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM captions = Variable(torch.from_numpy(captions)) cap_lens = Variable(torch.from_numpy(cap_lens)) if cfg.CUDA: captions = captions.cuda() cap_lens = cap_lens.cuda() for i in range(1): # 16 noise = Variable(torch.FloatTensor(batch_size, nz)) if cfg.CUDA: noise = noise.cuda() image_name = '%s/%d_s' % (save_dir, i) images = self.generate_image(text_encoder, netG, netsD, batch_size, captions, cap_lens, sorted_indices, noise, image_name, tries, threshold) generated_images.append(images) return generated_images
def save_singleimages(self, images, filenames, save_dir, split_dir, sentenceID=0): for i in range(images.size(0)): s_tmp = '%s/single_samples/%s/%s' %\ (save_dir, split_dir, filenames[i]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) fullpath = '%s_%d.jpg' % (s_tmp, sentenceID) # range from [-1, 1] to [0, 1] # img = (images[i] + 1.0) / 2 img = images[i].add(1).div(2).mul(255).clamp(0, 255).byte() # range from [0, 1] to [0, 255] ndarr = img.permute(1, 2, 0).data.cpu().numpy() im = Image.fromarray(ndarr) im.save(fullpath)
def __init__(self, output_dir, data_loader, n_words, ixtoword): if cfg.TRAIN.FLAG: self.model_dir = os.path.join(output_dir, 'Model') self.image_dir = os.path.join(output_dir, 'Image') mkdir_p(self.model_dir) mkdir_p(self.image_dir) if cfg.GPU_ID >= 0: torch.cuda.set_device(cfg.GPU_ID) cudnn.benchmark = True self.batch_size = cfg.TRAIN.BATCH_SIZE self.max_epoch = cfg.TRAIN.MAX_EPOCH self.snapshot_interval = cfg.TRAIN.SNAPSHOT_INTERVAL self.n_words = n_words self.ixtoword = ixtoword self.data_loader = data_loader self.num_batches = len(self.data_loader) print("Starting trainer ... ") print("CUDA status: {}".format(cfg.CUDA)) print("GPU ID: {}".format(cfg.GPU_ID))
def gen_example(self, data_dic): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: # Build and load the generator text_encoder = RNNEncoder(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM, nlayers=cfg.TEXT.RNN_LAYERS) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() # the path to save generated images if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() s_tmp = cfg.TRAIN.NET_G[:cfg.TRAIN.NET_G.rfind('.pth')] model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) netG.cuda() netG.eval() for key in data_dic: save_dir = '%s/%s' % (s_tmp, key) mkdir_p(save_dir) captions, cap_lens, sorted_indices = data_dic[key] batch_size = captions.shape[0] nz = cfg.GAN.Z_DIM captions = Variable(torch.from_numpy(captions), volatile=True) cap_lens = Variable(torch.from_numpy(cap_lens), volatile=True) captions = captions.cuda() cap_lens = cap_lens.cuda() for i in range(1): # 16 noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() ####################################################### # (1) Extract text embeddings ###################################################### hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) mask = (captions == 0) ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) fake_imgs, attention_maps, _, _ = netG( noise, sent_emb, words_embs, mask) # G attention cap_lens_np = cap_lens.cpu().data.numpy() for j in range(batch_size): save_name = '%s/%d_s_%d' % (save_dir, i, sorted_indices[j]) for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() im = (im + 1.0) * 127.5 im = im.astype(np.uint8) # print('im', im.shape) im = np.transpose(im, (1, 2, 0)) # print('im', im.shape) im = Image.fromarray(im) fullpath = '%s_g%d.png' % (save_name, k) im.save(fullpath) for k in range(len(attention_maps)): if len(fake_imgs) > 1: im = fake_imgs[k + 1].detach().cpu() else: im = fake_imgs[0].detach().cpu() attn_maps = attention_maps[k] att_sze = attn_maps.size(2) img_set, sentences = \ build_super_images2(im[j].unsqueeze(0), captions[j].unsqueeze(0), [cap_lens_np[j]], self.ixtoword, [attn_maps[j]], att_sze) if img_set is not None: im = Image.fromarray(img_set) fullpath = '%s_a%d.png' % (save_name, k) im.save(fullpath)
def sampling(self, split_dir): if cfg.TRAIN.NET_G == '': print('Error: the path for morels is not found!') else: if split_dir == 'test': split_dir = 'valid' # Build and load the generator if cfg.GAN.B_DCGAN: netG = G_DCGAN() else: netG = G_NET() netG.apply(weights_init) netG.cuda() netG.eval() # text_encoder = RNNEncoder(self.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM, nlayers=cfg.TEXT.RNN_LAYERS) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) text_encoder.load_state_dict(state_dict) print('Load text encoder from:', cfg.TRAIN.NET_E) text_encoder = text_encoder.cuda() text_encoder.eval() batch_size = self.batch_size nz = cfg.GAN.Z_DIM noise = Variable(torch.FloatTensor(batch_size, nz), volatile=True) noise = noise.cuda() model_dir = cfg.TRAIN.NET_G state_dict = \ torch.load(model_dir, map_location=lambda storage, loc: storage) # state_dict = torch.load(cfg.TRAIN.NET_G) netG.load_state_dict(state_dict) print('Load G from: ', model_dir) # the path to save generated images s_tmp = model_dir[:model_dir.rfind('.pth')] save_dir = '%s/%s' % (s_tmp, split_dir) mkdir_p(save_dir) cnt = 0 for _ in range(1): # (cfg.TEXT.CAPTIONS_PER_IMAGE): for step, data in enumerate(self.data_loader, 0): cnt += batch_size if step % 100 == 0: print('step: ', step) # if step > 50: # break imgs, captions, cap_lens, class_ids, keys = prepare_data( data) hidden = text_encoder.init_hidden(batch_size) # words_embs: batch_size x nef x seq_len # sent_emb: batch_size x nef words_embs, sent_emb = text_encoder( captions, cap_lens, hidden) words_embs, sent_emb = words_embs.detach( ), sent_emb.detach() mask = (captions == 0) num_words = words_embs.size(2) if mask.size(1) > num_words: mask = mask[:, :num_words] ####################################################### # (2) Generate fake images ###################################################### noise.data.normal_(0, 1) fake_imgs, _, _, _ = netG(noise, sent_emb, words_embs, mask) for j in range(batch_size): s_tmp = '%s/single/%s' % (save_dir, keys[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) mkdir_p(folder) k = -1 # for k in range(len(fake_imgs)): im = fake_imgs[k][j].data.cpu().numpy() # [-1, 1] --> [0, 255] im = (im + 1.0) * 127.5 im = im.astype(np.uint8) im = np.transpose(im, (1, 2, 0)) im = Image.fromarray(im) fullpath = '%s_s%d.png' % (s_tmp, k) im.save(fullpath)
def __init__(self, output_dir): self.allow_print = True self.log_dir = os.path.join(output_dir, 'log') mkdir_p(self.log_dir) log_path = os.path.join(self.log_dir, 'log.txt') self.log_file = open(log_path, 'w')