args = parser.parse_args() # import tensorflow if args.verbosity >= 2: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' else: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf # import tf_util for TFRecords if args.tf_records: from shapeworld import tf_util # dataset dataset = dataset(dtype=args.type, name=args.name, language=args.language, config=args.config) # information about dataset and model if args.verbosity >= 1: sys.stdout.write('{time} train {model} on {dataset}\n'.format( time=datetime.now().strftime('%H:%M:%S'), model=args.model, dataset=dataset)) sys.stdout.write(' config: {}\n'.format(args.config)) sys.stdout.write(' hyperparameters: {}\n'.format( args.hyperparams_file)) sys.stdout.flush() if args.type == 'agreement': parameters = dict(
def load_shapeworld_dataset(data_path, embed_path, mode, size, ds_type, name, batch_size, random_seed, shuffle, img_feats, cuda, truncate_final_batch=False): """ Reads ShapeWorld dataset into random num_batches Args: - data_path: path to folder containing the shapeworld data - embed_path: path to folder containing pretrained word vectors - mode: 'train', 'eval', or 'test' - size: size of dataset - ds_type: problem type e.g. 'agreement' - name: name of dataset, e.g. 'oneshape_simple_textselect' - batch_size: size of each batch - random_seed: int to use to set random seed - shuffle: whether to shuffle the dataset - img_feats: what type of image features to use e.g. 'avgpool_512', 'layer4_2' - whether to use cuda - truncate_final_batch: whether to use a smaller final batch or not Each batch is a dict consisting of: batch = { "im_feats_1": im_feats_1, "im_feats_2": im_feats_2, "im_1": masked_im_1, "im_2": masked_im_2, "p": p, "texts_str": natural_lang_desc_texts, "texts_vec": texts_vec, "texts_int": texts_int, "texts_extra": texts_extra, "target": targets, "shapes": shapes, "colors": colors, "caption_str": caption_str, } im_feats_1: image features for agent 1 im_feats_1: image features for agent 2 masked_im_1: masked input image received by agent 1 masked_im_2: masked input image received by agent 2 p: percentage of the input image received by agent 1. Agent 2 received (1 - p) texts_str: set of natural language descriptions of the image (only one is correct) texts_int: set of integer descriptions of the image (only one is correct) texts_vec: vector representation of the set of natural language image descriptions for each example texts_extra: dict for individual word vectors for each description for each example and their corresponding lengths target: index of correct textual description shapes: shape of the object in the correct caption, None if there is no explicit shape in the caption colors: color of the object in the correct caption, None if there is no explicit color in the caption caption_str: correct natural language description of the image """ # Read data debuglogger.debug(f'Reading in dataset...') load_cmd = 'load(' + data_path + ')' data = dataset(dtype=ds_type, name=name, config=load_cmd) generated = data.generate(n=size, mode=mode) debuglogger.debug(f'Dataset read...') order = list(range(size)) assert len(generated['texts_str']) == size # Convert texts to vector texts_str = generated['texts_str'] texts_int, word2id, id2word = convert_texts(texts_str) word2id = embed(word2id, embed_path) # Create feature extraction model model = FeatureModel() model.fn.eval() model.eval() if cuda: model.fn.cuda() model.cuda() # Shuffle if shuffle: random.shuffle(order) # Generate batches num_batches = size // batch_size if truncate_final_batch: if size - (num_batches * batch_size) > 0: num_batches = num_batches + 1 for i in range(num_batches): batch_indices = sorted(order[i * batch_size:(i + 1) * batch_size]) batch = dict() debuglogger.debug(f'batch idxs: {batch_indices}') # Upscale images and convert to tensors ims = generated['world'][batch_indices] if FLAGS.improc_from_scratch: ims = downsize(ims, FLAGS.image_size) else: ims = upscale(ims) batch['images'] = torch.from_numpy(ims).float().permute(0, 3, 1, 2) # Extract target and texts batch['target'] = torch.from_numpy(generated['target'][batch_indices]).long() batch["texts_str"] = [generated['texts_str'][j] for j in batch_indices] batch["caption_str"] = [generated['caption_str'][j] for j in batch_indices] batch["texts_int"] = [texts_int[j] for j in batch_indices] # Get shape and color for batch batch["shapes"] = [] batch["colors"] = [] for cap in batch["caption_str"]: cap = cap.split() color = None shape = None for w in cap: if w in SHAPES: shape = w if w in COLORS: color = w batch["shapes"].append(shape) batch["colors"].append(color) assert len(batch["shapes"]) == batch_size assert len(batch["colors"]) == batch_size # Get shape and color for texts batch["texts_shapes"] = [] batch["texts_colors"] = [] for t in batch["texts_str"]: s = [] c = [] for cap in t: cap = cap.split() color = None shape = None for w in cap: if w in SHAPES: shape = w if w in COLORS: color = w s.append(shape) c.append(color) batch["texts_shapes"].append(s) batch["texts_colors"].append(c) assert len(batch["texts_shapes"]) == batch_size assert len(batch["texts_colors"]) == batch_size # Generate p batch['p'] = torch.from_numpy(np.random.rand(batch_size)) # Mask images debuglogger.debug(f'Image dims: {batch["images"].shape}') (bs, ch, width, height) = batch['images'].shape mask = torch.ones(bs, ch, width, height) # Vertical mask if FLAGS.vertical_mask: cutoffs = (width * batch["p"]).int().clamp(0, width - 1).numpy().tolist() debuglogger.debug(f'cutoffs: {cutoffs}') for i_c, c in enumerate(cutoffs): mask[i_c, :, :, c:] = 0 else: # Random mask for i_m in range(bs): mask[i_m] = generate_mask(batch['images'][i_m]) batch['masked_im_1'] = torch.mul(mask, batch['images']) + (1 - mask) batch['masked_im_2'] = torch.mul(1 - mask, batch['images']) + mask if i == 0: # Save example batch save_image(batch['images'], data_path + '/example_ims_orig.png', pad_value=0.5) save_image(batch['masked_im_1'], data_path + '/example_ims_1.png', pad_value=0.5) save_image(batch['masked_im_2'], data_path + '/example_ims_2.png', pad_value=0.5) # Build descriptions desc_cbow, desc_set, desc_set_lens = cbow_general(batch["texts_int"], word2id, id2word) batch["texts_vec"] = desc_cbow batch["texts_extra"] = {"desc_set": desc_set, "desc_set_lens": desc_set_lens} # Extract image feats m_im_1 = Variable(batch['masked_im_1']) m_im_2 = Variable(batch['masked_im_2']) if cuda: m_im_1 = m_im_1.cuda() m_im_2 = m_im_2.cuda() if FLAGS.improc_from_scratch: batch["im_feats_1"] = m_im_1 batch["im_feats_2"] = m_im_2 else: batch["im_feats_1"] = (model(m_im_1, request=img_feats)[0]).detach() batch["im_feats_2"] = (model(m_im_2, request=img_feats)[0]).detach() # Identify non blank partition non_blank_partition = [] for j in range(batch_size): idx = get_non_blank_partition(batch['masked_im_1'][j], batch['masked_im_2'][j]) non_blank_partition.append(idx) batch['non_blank_partition'] = non_blank_partition yield batch
from shapeworld import dataset dataset = dataset(dtype='agreement', name='oneshape_simple_textselect', config='load(../data/oneshape_simple_textselect)') generated = dataset.generate(n=250, mode='train') k = ['caption_str', 'texts_str', 'pred_items'] for l in generated: print(l, type(generated[l])) if l == 'target': print(generated[l].shape) for i in range(10): print( f'Prediction items: {generated[k[2]][i]}, caption: {generated[k[0]][i]}, texts: {generated[k[1]][i]}' )
from shapeworld import dataset import pprint dataset = dataset( dtype='agreement', name='oneshape_simple_textselect', ) generated = dataset.generate(n=30, mode='train', noise_range=0.1, include_model=True)
N_VAL = 500 N_TEST = 500 #N_CAPTIONS = 100 #N_TRAIN = 50 #N_VAL = 25 #N_TEST = 25 assert N_TRAIN + N_VAL + N_TEST == N_CAPTIONS WIDTH = 64 HEIGHT = 64 CHANNELS = 3 EXAMPLES = 4 DATASET = dataset(dtype="agreement", name="spatial_jda") random = np.random.RandomState(0) all_captions = {} while len(all_captions) < N_CAPTIONS: if len(all_captions) % 500 == 0: print("%d / %d captions" % (len(all_captions), N_CAPTIONS)) DATASET.world_generator.sample_values(mode="train") DATASET.world_captioner.sample_values(mode="train", correct=True) while True: world = DATASET.world_generator() if world is None: continue caption = DATASET.world_captioner(entities=world.entities) if caption is None:
'--restore', action='store_true', help='Restore model (requires --model-file)') parser.add_argument( '-E', '--evaluate', action='store_true', help='Evaluate model without training (requires --model-file)') parser.add_argument('-V', '--verbose-tensorflow', action='store_true', help='TensorFlow verbosity') args = parser.parse_args() # dataset dataset = dataset(dtype=args.type, name=args.name, config=args.config) sys.stdout.write('{} {} dataset: {}\n'.format( datetime.now().strftime('%H:%M:%S'), dataset.type, dataset.name)) sys.stdout.write(' config: {}\n'.format(args.config)) sys.stdout.flush() # import tensorflow if args.verbose_tensorflow: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' else: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf # model module = import_module('models.{}.{}'.format(args.type, args.model)) sys.stdout.write('{} {} model: {}\n'.format(