def _check_img_exists(): logger.info('Listing existing image files.') img_all = train_set.union(valid_set).union(test_set) ex = [] if os.path.exists(pjoin(dset_dir, 'images')): ex = os.listdir(pjoin(dset_dir, 'images')) ex = [pjoin('images', i) for i in ex] ex = set(ex) return len(ex.intersection(img_all)) == len(img_all), list(ex)
def _save_files(caption_type, img, caption, composite, sort_metric): img_id = caption['image_id'] base_score = caption[baseline_name][sort_metric] model_score = [caption[n][sort_metric] for n in model_names] base_out = '{} ({}): {}'.format(baseline_name, base_score, caption[baseline_name]['caption']) model_out = [ '{} ({}): {}'.format(n, model_score[i], caption[n]['caption']) for i, n in enumerate(model_names) ] # Save image score = '{:1.3f}'.format(model_score[-1]).replace('.', '-') type_short = {v: k for k, v in CATEGORIES.items()} if type(img_id) == str: img_out_name = '{}_{}_{}.jpg'.format(type_short[caption_type], score, img_id) else: img_out_name = '{}_{}_{:012d}.jpg'.format(type_short[caption_type], score, img_id) img.save(pjoin(output_dir, img_out_name)) draw = ImageDraw.Draw(composite) offset = int(IMG_RESIZE - TEXT_SIZE) / 2 draw.text((IMG_RESIZE, offset), img_out_name, font=font) draw.text((IMG_RESIZE, offset + TEXT_SIZE), 'Type: ' + caption_type, font=font) composite.save(pjoin(output_dir, 'comp_' + img_out_name)) # Write captions out_str = '{}\r\n{}\r\n\r\n'.format(base_out, '\r\n'.join(model_out)) with open(pjoin(output_dir, 'captions_{}.txt'.format(caption_type)), 'a') as f: f.write('{}\r\n{}'.format(img_out_name, out_str)) # Write captions in LATEX format modcap = ' \\begin{{modcap}}\n' modcap += ' {}\n' modcap += ' \\end{{modcap}} \\\\\n' out_str = [ ' \\gph{{1.0}}{{resources/xxx/{}}} &'.format(img_out_name), ' \\begin{tabular}{M{\\linewidth}}', ' \\begin{basecap}', ' {}'.format(caption[baseline_name]['caption']), ' \\end{basecap} \\\\', ] for n in model_names: out_str += [modcap.format(caption[n]['caption'])] out_str += [ ' \\end{tabular} &', ' ', ] with open(pjoin(output_dir, 'captions_latex_{}.txt'.format(caption_type)), 'a') as f: f.write('\n'.join(out_str) + '\n')
def _get_caption_statistics(train_caption_set, curr_score_dir, checkpoint_num=None, default_vocab_size=9962): # float_str = '{:.3f}' # assert isinstance(train_caption_list, list) assert isinstance(train_caption_set, set) # Try to load vocab size from config c = cfg.load_config( pjoin(os.path.dirname(curr_score_dir), 'run_01', 'config.pkl')) try: vocab_size = len(c.itow) except AttributeError: vocab_size = default_vocab_size # Find caption file if checkpoint_num is None: jsons = [f for f in os.listdir(curr_score_dir) if 'captions___' in f] jsons = [j for j in sorted(jsons, key=nat_key)] caption_json = pjoin(curr_score_dir, jsons[-1]) else: caption_json = pjoin(curr_score_dir, 'captions___{}.json'.format(checkpoint_num)) # Load captions with open(caption_json, 'r') as f: captions = json.load(f) captions_list = [d['caption'] for d in captions] # Calculate stats appear_in_train = 0 counts = {} caption_length = [] for caption in captions_list: # Unique if caption in train_caption_set: appear_in_train += 1 # appear_in_train += binary_search(data_list=train_caption_list, query=caption) # Vocab caption = caption.split(' ') for w in caption: counts[w] = counts.get(w, 0) + 1 # Length caption_length.append(len(caption)) vocab_coverage = (len(counts) / (vocab_size - 2)) * 100. # Exclude <GO> and <EOS> average_length = np.mean(caption_length) percent_unique = (1. - (appear_in_train / len(captions_list))) * 100. return np.array([vocab_coverage, percent_unique, average_length])
def main(): if len(SHORTLISTED_IMGS) > 0: global JUMP_TO_IDX JUMP_TO_IDX = 0 os.makedirs(OUTPUT_DIR, exist_ok=True) id_to_results = {} config = dict( sort_by_metric=SORT_BY_METRIC, baseline_json=BASELINE_JSON, model_json=MODEL_JSON, ) with open(pjoin(OUTPUT_DIR, 'config.json'), 'w') as f: json.dump(config, f) # Load captions for j, n in zip(MODEL_JSON, MODEL_NAMES): _load_caption_json(id_to_results, j, n) _load_caption_json(id_to_results, BASELINE_JSON, BASELINE_NAME) # Load scores for j, n in zip(MODEL_SCORES_JSON, MODEL_NAMES): _load_score_json(id_to_results, j, n) _load_score_json(id_to_results, BASELINE_SCORES_JSON, BASELINE_NAME) # Sort captions caption_list = _sort_captions(id_to_results, sort_metric=SORT_BY_METRIC, sort_model=MODEL_NAMES, use_diff=not VISUALISE_ATTENTION) if VISUALISE_ATTENTION: _display_attention(caption_list, SORT_BY_METRIC, RADIX_SAMPLE_TIMESTEP) else: _display_captions(caption_list, SORT_BY_METRIC)
def main(): os.makedirs(output_dir, exist_ok=True) id_to_results = {} config = dict( sort_by_metric=sort_by_metric, baseline_json=baseline_json, model_json=model_json, ) with open(pjoin(output_dir, 'config.json'), 'w') as f: json.dump(config, f) # Load captions for j, n in zip(model_json, model_names): _load_caption_json(id_to_results, j, n) _load_caption_json(id_to_results, baseline_json, baseline_name) # Load scores for j, n in zip(model_scores_json, model_names): _load_score_json(id_to_results, j, n) _load_score_json(id_to_results, baseline_scores_json, baseline_name) # Sort captions caption_list = _sort_captions(id_to_results, sort_metric=sort_by_metric, sort_model=model_names, use_diff=True) _display_captions(caption_list, sort_by_metric)
def _get_model_size(curr_score_dir): # Try to load model size file msfp = pjoin(os.path.dirname(curr_score_dir), 'run_01', 'model_size.txt') with open(msfp, 'r') as f: line = f.readlines()[1] model_size = P_NUM.findall(line) assert isinstance(model_size, list) and len(model_size) == 1 return model_size[0].replace(',', '')
def _get_ckpt(score_file, sort_checkpoints=True, get_checkpoint_num=None): scores = np.genfromtxt(score_file, delimiter=',') if scores.shape[1] > 3: # MNIST files have only 3 columns scores = scores[:, :-2] ckpt_nums, scores = scores[:, 0].astype(np.int64), scores[:, 1:].astype( np.float64) # Calculate weighted average # 2x weightage for B-4, CIDEr, SPICE wg = np.array([[1, 1, 1, 2, 1, 1, 2, 2]]).astype(np.float64) try: scores_wg_av = np.mean(scores * wg, axis=1) except ValueError: # Give up, take first value lol scores_wg_av = scores[:, 0] if get_checkpoint_num: max_idx = int(np.where(ckpt_nums == int(get_checkpoint_num))[0]) # max_idx = np.where(ckpt_nums == int(get_checkpoint_num))[0] else: if sort_checkpoints: # Get best checkpoint max_idx = np.where(scores_wg_av == np.amax(scores_wg_av)) if len(max_idx[0]) > 1: if scores.shape[1] > 6: # For ties, sort by CIDEr max_idx = int(np.argmax(scores[:, 6])) else: # MNIST, take last checkpoint max_idx = max_idx[0][-1] else: max_idx = int(max_idx[0]) else: # Get final checkpoint max_idx = ckpt_nums.shape[0] - 1 sparsity_file = pjoin(os.path.split(score_file)[0], 'sparsity_values.csv') if os.path.isfile(sparsity_file): sparsity = np.genfromtxt(sparsity_file, delimiter=',', skip_header=1) def _check(): # noinspection PyTypeChecker return sparsity.shape[0] != ckpt_nums.shape[0] or sparsity[ max_idx, 0] != ckpt_nums[max_idx] if _check(): # Try again without skipping header sparsity = np.genfromtxt(sparsity_file, delimiter=',') if _check(): raise ValueError( 'Checkpoint check failed. {} vs {} for idx {}'.format( sparsity[max_idx, 0], ckpt_nums[max_idx], max_idx)) sparsity = sparsity[max_idx, 1:2] else: sparsity = [-1] score = np.concatenate( [sparsity, scores[max_idx], [scores_wg_av[max_idx]]]) return ckpt_nums[max_idx], score
def main(args): print('') a = args default_exp_dir = pjoin(BASE_DIR, 'experiments') if a.log_dir == '': a.log_dir = default_exp_dir if a.inspect_attributes == '': print('\nAttribute list is empty.\n') return None else: inspect_attributes = a.inspect_attributes.split(',') # List experiments exp_names = os.listdir(a.log_dir) all_run_dirs = [] for n in exp_names: exp_dir = pjoin(a.log_dir, n) if os.path.isdir(exp_dir): sub_dirs = [pjoin(a.log_dir, n, d) for d in os.listdir(exp_dir)] run_dirs = [d for d in sub_dirs if 'infer' not in os.path.split(d)[1]] all_run_dirs += run_dirs # List config files # all_cfg_files = [] # for d in all_run_dirs: # cfg_file = [f for f in os.listdir(d) if 'config' and '.pkl' in f] # assert len(cfg_file) == 1 # all_cfg_files.append(pjoin(d, cfg_file[0])) all_cfg_files = [pjoin(d, 'config.pkl') for d in all_run_dirs] # Inspect for attr in inspect_attributes: print('\nInspecting attribute: `{}`\n'.format(attr)) for cpath in all_cfg_files: try: c = vars(load_config(cpath)) except IOError: continue print(os.path.sep.join(cpath.split(os.path.sep)[-3:-1])) if attr in c: print(c[attr]) else: print('`{}` not found.'.format(attr)) print('\nAttribute inspection completed.\n')
def tokenize_all(train_json, test1_json): """ Tokenize sentences in raw dataset Args: train_json, test1_json: raw json object key: 'caption' or 'tags' """ # print("\nINFO: Tokenising captions.\n") tokenised_data = [] # Train data for user_id, posts in tqdm(sorted(train_json.items()), ncols=100, desc='Tokenising train data'): for post_id, post in sorted(posts.items()): img_id = '{}_@_{}'.format(user_id, post_id) temp_dict = dict(split='train', filepath=pjoin('images', img_id), image_id=img_id, raw=[post['caption']], tokens=[tokenize(post['caption'])]) tokenised_data.append(temp_dict) # Validation data random.seed(4896) random.shuffle(tokenised_data) for i in range(2000): tokenised_data[i]['split'] = 'val' # Test1 data for user_id, posts in tqdm(sorted(test1_json.items()), ncols=100, desc='Tokenising test1 data'): for post_id, post in sorted(posts.items()): img_id = '{}_@_{}'.format(user_id, post_id) temp_dict = dict(split='test', filepath=pjoin('images', img_id), image_id=img_id, raw=[post['caption']], tokens=[tokenize(post['caption'])]) tokenised_data.append(temp_dict) return tokenised_data
def main(args): args = deepcopy(args) os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu default_exp_dir = pjoin(os.path.dirname(CURR_DIR), 'experiments') args.infer_checkpoints_dir = pjoin(default_exp_dir, args.infer_checkpoints_dir) args.annotations_file = pjoin(BASE_DIR, 'common', 'coco_caption', 'annotations', args.annotations_file) if args.dataset_dir == '': args.dataset_dir = pjoin(BASE_DIR, 'datasets', 'mscoco') ckpt_prefix = args.ckpt_prefix if args.infer_checkpoints == 'all': ckpt_files = sorted(os.listdir(args.infer_checkpoints_dir), key=nat_key) ckpt_files = [f for f in ckpt_files if ckpt_prefix in f] ckpt_files = [ f.replace('.index', '') for f in ckpt_files if '.index' in f ] ckpt_files = [f.replace(ckpt_prefix, '') for f in ckpt_files] # if len(ckpt_files) > 20: ckpt_files = ckpt_files[-12:] args.infer_checkpoints = ckpt_files else: args.infer_checkpoints = args.infer_checkpoints.split(',') if len(args.infer_checkpoints) < 1: raise ValueError('`infer_checkpoints` must be either `all` or ' 'a list of comma-separated checkpoint numbers.') ### c = cfg.load_config(pjoin(args.infer_checkpoints_dir, 'config.pkl')) c = update_config(c) c.__dict__.update(args.__dict__) save_name = 'b{}_lp{:2.1f}___{}'.format( c.infer_beam_size, c.infer_length_penalty_weight, strftime('%m-%d_%H-%M', localtime())) set_name = c.infer_set[0] + ''.join( x.title() for x in c.infer_set.split('_'))[1:] # camelCase c.infer_save_path = '_'.join( [c.infer_checkpoints_dir, '__infer', set_name, save_name]) # c.infer_save_path = pjoin(c.infer_checkpoints_dir, '_'.join(['infer', set_name, save_name]) ### if not os.path.exists(c.infer_save_path): os.mkdir(c.infer_save_path) # Loop through the checkpoint files scores_combined = {} for ckpt_num in c.infer_checkpoints: curr_ckpt_path = pjoin(c.infer_checkpoints_dir, ckpt_prefix + ckpt_num) infer.evaluate_model(config=c, curr_ckpt_path=curr_ckpt_path, scores_combined=scores_combined) print('\n')
def main(args): print('') a = args default_exp_dir = pjoin(BASE_DIR, 'experiments') if a.log_dir == '': a.log_dir = default_exp_dir def _should_add(path): _is_infer = 'infer' in os.path.split(path)[1] if args.collect_runs == 'all': return _is_infer else: runs = [ 'run_{:02d}'.format(int(r)) for r in args.collect_runs.split(',') ] return _is_infer and _extract_keys(path)[1] in runs # List experiments exp_dirs = [pjoin(a.log_dir, n) for n in os.listdir(a.log_dir)] all_score_dirs = [] for exp_dir in exp_dirs: if not os.path.isdir(exp_dir): continue sub_dirs = [pjoin(exp_dir, d) for d in os.listdir(exp_dir)] score_dirs = [d for d in sub_dirs if _should_add(d)] all_score_dirs += score_dirs # Extract scores for sort_checkpoints in [True, False]: score_dict = {} _loop(all_score_dirs, score_dict, 'valid', sort_checkpoints, args) _loop(all_score_dirs, score_dict, 'test', sort_checkpoints, args) _write_output_csv(score_dict, args.log_dir, sort_checkpoints, reverse_exps=a.reverse_sort_dirs) print('\nScore collection completed.\n')
def _write_output_csv(sc_dict, log_dir, sort_checkpoints, reverse_exps=False): if sort_checkpoints: sfx = 'sorted' else: sfx = 'last' for infer_name in sorted(sc_dict): datetime = strftime('%m-%d_%H-%M-%S', localtime()) fname = infer_name.replace('infer_', '') + '___{}___{}.csv'.format( sfx, datetime) lines = [] for exp_name in sorted(sc_dict[infer_name], reverse=reverse_exps): runs = [ sc_dict[infer_name][exp_name][r] for r in sorted(sc_dict[infer_name][exp_name]) ] mean_stats = [] for i, r in enumerate(runs): mean_stats.append(r['caption_stats']) if i == 0: name = r['name'] else: name = '-' line = ','.join([ name, r['run'], str(r['best_ckpt']), _score_to_string(r['best_score']), r['model_size'], _score_to_string(r['caption_stats']), r['infer_name'], r['datetime'] ]) lines.append(line) if len(runs) > 1: mean_score = _score_to_string( _get_average([r['best_score'] for r in runs])) mean_stats = _score_to_string(_get_average(mean_stats)) line = ','.join( ['-', 'mean', '0', mean_score, mean_stats, 'N/A', 'N/A']) lines.append(line) with open(pjoin(log_dir, fname), 'w') as f: f.write('\r\n'.join(lines))
def _check_img_exists(): logger.info('Listing existing image files.') img_all = train_set.union(valid_set).union(test_set) trpath = pjoin(dset_dir, 'train2014') vpath = pjoin(dset_dir, 'val2014') ttpath = pjoin(dset_dir, 'test2014') extr = exv = extt = [] if os.path.exists(trpath): extr = os.listdir(trpath) extr = [pjoin('train2014', i) for i in extr] if os.path.exists(vpath): exv = os.listdir(vpath) exv = [pjoin('val2014', i) for i in exv] if os.path.exists(ttpath): extt = os.listdir(ttpath) extt = [pjoin('test2014', i) for i in extt] ex = set(extr + exv) exists = len(ex.intersection(img_all)) == len(img_all) exists = exists and len(extt) == 40775 return exists, list(ex) + extt
def _display_attention(captions_list, sort_metric, radix_sample_timestep): # Display captions print('') instructions = [ '"y" to save', '"r" to repeat', '"e" to exit', 'other keys to skip.', '---\n', ] instructions = '\n'.join(instructions) global JUMP_TO_IDX if JUMP_TO_IDX < 0 or JUMP_TO_IDX >= len(captions_list): JUMP_TO_IDX = 0 model_name = MODEL_NAMES[0] img_plot = None fig = plt.figure(figsize=(20, 10)) for cap_idx, cap in enumerate(captions_list[JUMP_TO_IDX:]): if len(SHORTLISTED_IMGS) > 0 and not any(str(cap['image_id']) in _ for _ in SHORTLISTED_IMGS): # Skip if no partial match with any shortlisted images continue # Draw attention maps if available (only for the 1st model) att_dict = cap[MODEL_NAMES[0]]['attention'] if att_dict is None: continue img = _prepare_img(pjoin(IMAGE_DIR, cap['image_name'])) # Collect info model_cap = '{} ({:.2f}): {}'.format( model_name, cap[model_name][sort_metric], cap[model_name]['caption'] ) sent_len = len(cap[model_name]['caption'].split(' ')) # Visualise bg_big = Image.new('RGB', [IMG_CROP * 6, IMG_CROP * 4]) bg_big.paste(img, (BORDER, BORDER)) draw = ImageDraw.Draw(bg_big) draw.text( (IMG_CROP + BORDER * 2, BORDER), '# {} / {}'.format(JUMP_TO_IDX + cap_idx + 1, len(captions_list)), font=font ) text_group = textwrap.wrap(model_cap, width=45) print(model_cap + '\n') for i, text in enumerate(text_group): draw.text((IMG_CROP + BORDER * 2, BORDER * 2 + int(TEXT_SIZE * 1.05) * (i + 1)), text, font=font) # assert isinstance(att_map_list, list) # atts = [_[img_name] for _ in att_map_list] # max_len = max(_.shape[1] for _ in atts) + 2 # bgs = [Image.new('RGB', [IMG_CROP * 6, IMG_CROP * 4]) for _ in range(max_len)] # for i, att in enumerate(atts): assert isinstance(att_dict, dict) att = att_dict[cap['image_id']] hw = int(math.sqrt(att.shape[-1])) num_heads = att.shape[0] att = np.reshape(att, [num_heads, att.shape[1], hw, hw]) ori_timesteps = att.shape[1] if radix_sample_timestep: att = att[:, ::RADIX_NUM_TOKENS, :, :] sampled_timesteps = att.shape[1] att = att[:, :sent_len, :] # Apply attention map bg = Image.new('RGB', [IMG_CROP, IMG_CROP]) # border = int(IMG_CROP / 4) offset = IMG_CROP + BORDER att_comp = [bg_big.copy() for _ in range(att.shape[1] + 1)] all_comps = [] for head in range(num_heads): maps = att[head, :, :, :] m_max = maps.max() # if m_max < 0.01: # maps *= (255.0 / m_max / 5) # else: # maps *= (255.0 / m_max) maps *= (255.0 / m_max) maps = maps.astype(np.uint8) comps = [] for t, m in enumerate(maps): m = Image.fromarray(m) m = m.convert('L') m = m.resize([IMG_CROP, IMG_CROP], Image.BILINEAR) comp = Image.composite(img, bg, m) comp = ImageEnhance.Brightness(comp).enhance(2.0) comp = ImageEnhance.Contrast(comp).enhance(1.5) x = (head % 4) * offset + BORDER y = int(head / 4) * offset + BORDER * 2 + IMG_CROP att_comp[t].paste(comp, (x, y)) comps.append(comp) all_comps.append(comps) key_input = 'r' while key_input == 'r': for comp in att_comp: if img_plot is None: img_plot = plt.imshow(comp) else: img_plot.set_data(comp) plt.show(block=False) fig.canvas.draw() plt.pause(.05) # Get key press # key_input = raw_input(instructions) key_input = input(instructions) fig.canvas.flush_events() if key_input == 'e': plt.close() break elif key_input == 'y': img_id = cap['image_id'] score = score = '{:1.3f}'.format(cap[model_name][sort_metric]).replace('.', '-') if type(img_id) == str: output_dir = pjoin(OUTPUT_DIR, '{}_{}'.format(score, img_id)) else: output_dir = pjoin(OUTPUT_DIR, '{}_{:012d}'.format(score, img_id)) os.makedirs(output_dir, exist_ok=True) img.save(pjoin(output_dir, 'base.jpg')) footnote = [ 'Num words (including <EOS>): {}'.format(sent_len + 1), 'Original attention time steps: {}'.format(ori_timesteps), 'Sampled time steps before truncation: {}'.format(sampled_timesteps), 'Sampled time steps after truncation: {}'.format(att.shape[1]), ] draw.text((BORDER, IMG_CROP + BORDER * 4), '\n\n'.join(footnote), font=font) bg_big.save(pjoin(output_dir, 'comp.jpg')) with open(pjoin(output_dir, 'caption.txt'), 'w') as f: f.write(cap[model_name]['caption']) for i, h in enumerate(all_comps): for j, t in enumerate(h): if radix_sample_timestep: j *= RADIX_NUM_TOKENS t.save(pjoin(output_dir, 'h{}_t{}.jpg'.format(i, j))) print('')
def parse_args(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--infer_set', type=str, default='test', choices=['test', 'valid', 'coco_test', 'coco_valid'], help='The split to perform inference on.') parser.add_argument('--infer_checkpoints_dir', type=str, default=pjoin( 'mscoco', 'radix_b256_add_LN_softmax_h8_tie_lstm_run_01'), help='The directory containing the checkpoint files.') parser.add_argument( '--infer_checkpoints', type=str, default='all', help='The checkpoint numbers to be evaluated. Comma-separated.') parser.add_argument( '--annotations_file', type=str, default='captions_val2014.json', help='The annotations / reference file for calculating scores.') parser.add_argument('--dataset_dir', type=str, default='', help='Dataset directory.') parser.add_argument('--ckpt_prefix', type=str, default='model_compact-', help='Prefix of checkpoint names.') parser.add_argument('--run_inference', type=bool, default=True, help='Whether to perform inference.') parser.add_argument('--get_metric_score', type=bool, default=True, help='Whether to perform metric score calculations.') parser.add_argument( '--save_attention_maps', type=bool, default=False, help='Whether to save attention maps to disk as pickle file.') parser.add_argument('--gpu', type=str, default='0', help='The gpu number.') parser.add_argument('--per_process_gpu_memory_fraction', type=float, default=0.75, help='The fraction of GPU memory allocated.') parser.add_argument('--verbosity', type=int, default=10, choices=[10, 20]) parser.add_argument('--infer_beam_size', type=int, default=3, help='The beam size.') parser.add_argument('--infer_length_penalty_weight', type=float, default=0.0, help='The length penalty weight used in beam search.') parser.add_argument( '--infer_max_length', type=int, default=30, help='The maximum caption length allowed during inference.') parser.add_argument('--batch_size_infer', type=int, default=25, help='The batch size.') args = parser.parse_args() return args
import textwrap import matplotlib.pyplot as plt import matplotlib.font_manager as fmng from PIL import Image, ImageEnhance, ImageOps, ImageFont, ImageDraw # Variables sort_by_metric = 'CIDEr' jump_to_idx = 0 model_names = ['sps_80.0', 'sps_97.5'] baseline_name = 'baseline' output_dir = '/home/jiahuei/Documents/1_TF_files/prune/compiled_mscoco_val' image_dir = '/master/datasets/mscoco/val2014' JSON_ROOT = '/home/jiahuei/Documents/1_TF_files/prune' baseline_json = pjoin( JSON_ROOT, 'mscoco_v2/word_w256_LSTM_r512_cnnFT_SCST_b7C1.0B2.0/run_01___infer_valid_b3_lp0.0___08-27_18-10/captions___113287.json' ) model_json = [ pjoin( JSON_ROOT, 'mscoco_v3/word_w256_LSTM_r512_h1_ind_xu_REG_1.0e+02_init_5.0_L1_wg_7.5_ann_sps_0.800_dec_prune_cnnFT_SCST_b7C1.0B2.0/run_01___infer_valid_b3_lp0.0___05-23_18-43/captions___113287.json' ), pjoin( JSON_ROOT, 'mscoco_v3/word_w256_LSTM_r512_h1_ind_xu_REG_1.0e+02_init_5.0_L1_wg_60.0_ann_sps_0.975_dec_prune_cnnFT_SCST_b7C1.0B2.0/run_01___infer_valid_b3_lp0.0___05-24_07-15/captions___113287.json' ) ] baseline_scores_json = pjoin( JSON_ROOT, 'mscoco_v2/word_w256_LSTM_r512_cnnFT_SCST_b7C1.0B2.0/run_01___infer_valid_b3_lp0.0___08-27_18-10/metric_scores_detailed_113287.json'
def _loop(all_score_dirs, score_dict, current_set, sort_checkpoints, args): desc = 'Collecting `{}` {} checkpoint sorting'.format( current_set, 'with' if sort_checkpoints else 'without') for d in tqdm(sorted(all_score_dirs), desc=desc): if current_set not in d: continue exp_name, run, infer_name, datetime = _extract_keys(d) score_file = pjoin(d, 'metric_scores.csv') if not os.path.isfile(score_file): print('WARNING: `{}` does not contain `metric_scores.csv` file.'. format( pjoin(exp_name, '___'.join([run, infer_name, datetime])))) continue if args.verbose: s = os.path.sep print('Processing dir: `{}`'.format(s.join(d.split(s)[-2:]))) if current_set == 'test': valid_name = infer_name.replace('test', 'valid') try: best_ckpt_num = score_dict[valid_name][exp_name][run][ 'best_ckpt'] _, best_score = _get_ckpt(score_file, get_checkpoint_num=best_ckpt_num) except KeyError: print('WARNING: Validation results not found for: `{}`'.format( pjoin(exp_name, '___'.join([run, infer_name])))) continue else: best_ckpt_num, best_score = _get_ckpt(score_file, sort_checkpoints) # Get captions stats if args.caption_statistics: with open(args.train_caption_txt, 'r') as f: train_caption = [ l.strip().split(',')[1].replace('<GO> ', '').replace(' <EOS>', '') for l in f.readlines() ] train_caption = set(train_caption) # train_caption.sort() stats = _get_caption_statistics(train_caption_set=train_caption, curr_score_dir=d, checkpoint_num=best_ckpt_num) else: stats = np.array([-1, -1, -1]) model_size = _get_model_size(curr_score_dir=d) val = dict(name=exp_name, run=run, infer_name=infer_name, datetime=datetime, best_ckpt=best_ckpt_num, best_score=best_score, caption_stats=stats, model_size=model_size) if infer_name not in score_dict: score_dict[infer_name] = {} if exp_name not in score_dict[infer_name]: score_dict[infer_name][exp_name] = {} if run in score_dict[infer_name][exp_name]: print( 'WARNING: `{}` has more than 1 eval results. Keeping latest one.' .format(pjoin(exp_name, '___'.join([run, infer_name])))) score_dict[infer_name][exp_name][run] = val
def main(args, run_inference=None): os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu rand_seed = [0, 48964896, 88888888, 123456789] try: rand_seed = rand_seed[args.run] except IndexError: rand_seed = args.run if args.attn_size is None or not isinstance( args.attn_size, (float, int)) or args.attn_size <= 1: args.attn_size = args.rnn_size if args.legacy: logger.info('LEGACY mode enabled. Some arguments will be overridden.') args.cnn_name = 'inception_v1' args.cnn_input_size = '224,224' args.cnn_input_augment = True args.cnn_fm_attention = 'Mixed_4f' args.rnn_name = 'LSTM' args.rnn_size = 512 args.rnn_word_size = 256 args.rnn_init_method = 'project_hidden' args.rnn_keep_prob = 0.65 args.rnn_recurr_dropout = False args.attn_context_layer = False args.attn_alignment_method = 'add_LN' args.attn_probability_fn = 'softmax' args.attn_keep_prob = 1.0 args.lr_start = 1e-3 args.lr_end = 2e-4 args.lr_reduce_every_n_epochs = 4 args.cnn_grad_multiplier = 1.0 args.initialiser = 'xavier' args.optimiser = 'adam' args.batch_size_train = 32 args.adam_epsilon = 1e-6 ### ## Log name dataset = args.dataset_file_pattern.split('_')[0] if args.log_root == '': args.log_root = pjoin(BASE_DIR, 'experiments') log_root = pjoin(args.log_root, dataset + '_v3') if args.dataset_dir == '': args.dataset_dir = pjoin(BASE_DIR, 'datasets', dataset) if not os.path.isfile(args.glove_filepath): args.glove_filepath = pjoin(BASE_DIR, 'pretrained', 'glove.6B.300d.txt') if args.supermask_type: if args.supermask_sparsity_weight < 0: if 'masked' in args.cnn_name: args.supermask_sparsity_weight = max( 5., 1.5 / (1 - args.supermask_sparsity_target)) else: args.supermask_sparsity_weight = max( 5., 0.5 / (1 - args.supermask_sparsity_target)) if args.supermask_type and args.supermask_sparsity_target > 0: args.rnn_keep_prob = 0.89 args.attn_keep_prob = 0.97 if args.supermask_type in masked_layer.MAG_HARD: args.max_epoch = 10 name = train.get_log_name(args) if args.name: name = '{}_{}'.format(name, args.name) dec_dir = pjoin(log_root, '{}'.format(name), 'run_{:02d}'.format(args.run)) cnnft_dir = pjoin(log_root, '{}_cnnFT'.format(name), 'run_{:02d}'.format(args.run)) log_path = dec_dir train_fn = train.train_caption_xe if args.train_mode == 'cnn_freeze': assert args.freeze_scopes == ['Model/encoder/cnn'] _ckpt = args.checkpoint_path if os.path.isfile(_ckpt + '.index') or os.path.isfile( _ckpt) or tf.train.latest_checkpoint(_ckpt): pass else: # Maybe download weights net = net_params.get_net_params( args.cnn_name, ckpt_dir_or_file=args.checkpoint_path) utils.maybe_get_ckpt_file(net) args.checkpoint_path = net['ckpt_path'] elif args.train_mode == 'cnn_finetune': # CNN fine-tune if args.legacy: raise NotImplementedError # if not os.path.exists(dec_dir): # raise ValueError('Decoder training log path not found: {}'.format(dec_dir)) args.lr_start = 1e-3 args.max_epoch = 10 args.freeze_scopes = None # args.checkpoint_path = dec_dir log_path = cnnft_dir elif args.train_mode == 'scst': # SCST fine-tune (after CNN fine-tune) if args.legacy: raise NotImplementedError # if not os.path.exists(cnnft_dir): # raise ValueError('CNN finetune log path not found: {}'.format(cnnft_dir)) args.batch_size_train = 10 args.lr_start = 1e-3 args.max_epoch = 10 if dataset == 'mscoco' else 3 args.freeze_scopes = ['Model/encoder/cnn'] # args.checkpoint_path = cnnft_dir scst = 'b{}C{}B{}'.format(args.scst_beam_size, args.scst_weight_ciderD, args.scst_weight_bleu[-1]) scst_dir = pjoin(log_root, '{}_cnnFT_SCST_{}'.format(name, scst), 'run_{:02d}'.format(args.run)) log_path = scst_dir train_fn = train.train_caption_scst elif args.train_mode == 'mnist': train_fn = train.train_rnn_mnist args.batch_size_train = 100 args.batch_size_eval = 100 args.lr_start = 0.1 args.max_epoch = 60 args.optimiser = 'sgd' args.checkpoint_path = None log_path = dec_dir # elif args.train_mode == 'autoencoder': # train_fn = train.train_caption_ae # args.checkpoint_path = None # log_path = dec_dir ### defaults = dict( rnn_layers=1, rnn_keep_in=args.rnn_keep_prob, rnn_keep_out=args.rnn_keep_prob, max_saves=12 if args.train_mode != 'mnist' else 3, num_logs_per_epoch=100 if args.train_mode != 'mnist' else 5, per_process_gpu_memory_fraction=None, rand_seed=rand_seed, add_image_summaries=False, add_vars_summaries=False, add_grad_summaries=False, log_path=log_path, save_path=pjoin(log_path, 'model'), resume_training=args.resume_training and os.path.exists(log_path), ) del args.rnn_keep_prob defaults.update(vars(args)) config = cfg.Config(**defaults) config.overwrite_safety_check(overwrite=args.resume_training) ### train.try_to_train(train_fn=train_fn, config=config, try_block=False) ### if run_inference in ['test', 'valid', 'coco_test', 'coco_valid' ] and args.train_mode not in ['mnist', 'autoencoder']: args.infer_set = run_inference args.infer_checkpoints_dir = log_path args.infer_checkpoints = 'all' args.ckpt_prefix = 'model_compact-' if 'coco' in dataset: args.annotations_file = 'captions_val2014.json' elif 'insta' in dataset: args.annotations_file = 'insta_testval_clean.json' else: raise NotImplementedError('Invalid dataset: {}'.format(dataset)) args.run_inference = True args.get_metric_score = True args.save_attention_maps = False args.per_process_gpu_memory_fraction = 0.75 args.infer_beam_size = 3 args.infer_length_penalty_weight = 0. args.infer_max_length = 30 args.batch_size_infer = 25 infer.main(args) args.infer_set = 'test' infer.main(args)
def _display_captions(captions_list, sort_metric): # Display captions print('') instructions = [ '"x" if both are wrong', '"y" if both are correct', '"b" if baseline is correct', '"m" if model is correct', '"a" if ambiguous', '"e" to exit', 'other keys to skip.\n', ] instructions = '\n'.join(instructions) global jump_to_idx if jump_to_idx < 0 or jump_to_idx >= len(captions_list): jump_to_idx = 0 img_plot = None fig = plt.figure(figsize=(20, 10)) for cap_idx, cap in enumerate(captions_list[jump_to_idx:]): if len(shortlisted_imgs) > 0 and any( str(cap['image_id']) in _ for _ in shortlisted_imgs): # If there are shortlisted images, might skip pass else: continue img = Image.open(pjoin(image_dir, cap['image_name'])) img = ImageEnhance.Brightness(img).enhance(1.10) img = ImageEnhance.Contrast(img).enhance(1.050) # Resize to 512 x 512 instead of 256 x 256 # Crop to 448 x 448 instead of 224 x 224 img = img.resize([IMG_RESIZE, IMG_RESIZE], Image.BILINEAR) img = ImageOps.crop(img, (IMG_RESIZE - IMG_CROP) / 2) # Collect info base_score = cap[baseline_name][sort_metric] model_score = [cap[n][sort_metric] for n in model_names] base_cap = '{} ({:.2f}): {}'.format(baseline_name, base_score, cap[baseline_name]['caption']) model_cap = [ '{} ({:.2f}): {}'.format(n, model_score[i], cap[n]['caption']) for i, n in enumerate(model_names) ] # Visualise border = int((IMG_RESIZE - IMG_CROP) / 2) bg_big = Image.new('RGB', DISPLAY_BG_SIZE) bg_big.paste(img, (border, border)) draw = ImageDraw.Draw(bg_big) draw.text((IMG_RESIZE, border), '# {} / {}'.format(jump_to_idx + cap_idx + 1, len(captions_list)), font=font) # Draw captions texts_wrp = [] for t in [base_cap] + model_cap: texts_wrp.append(textwrap.wrap(t, width=50)) offset = 0. for text_group in texts_wrp: for text in text_group: draw.text((border, IMG_RESIZE + offset), text, font=font) offset += int(TEXT_SIZE * 1.05) offset += TEXT_SIZE # draw.text((10, int(IMG_RESIZE * 1.00)), base_cap, font=font) # for j, m_cap in enumerate(model_cap): # offset = 0.2 * (j + 1) # draw.text((10, int(IMG_RESIZE * (1. + offset))), m_cap, font=font) if img_plot is None: img_plot = plt.imshow(bg_big) else: img_plot.set_data(bg_big) plt.show(block=False) fig.canvas.draw() # Get key press # key_input = raw_input(instructions) key_input = input(instructions) fig.canvas.flush_events() if key_input == 'e': plt.close() break elif key_input in CATEGORIES: _save_files(CATEGORIES[key_input], img, cap, bg_big, sort_metric) print('')
parser.add_argument('--word_count_thres', type=int, default=5) parser.add_argument('--caption_len_thres', type=int, default=20) parser.add_argument('--pad_value', type=int, default=-1) parser.add_argument('--wtoi_file', type=str, default=None) parser.add_argument('--itow_file', type=str, default=None) parser.add_argument('--verify_images', type=bool, default=False) return parser.parse_args() if __name__ == '__main__': logger = logging.getLogger(__name__) args = parse_args() if args.dataset_dir == '': dset_dir = pjoin(os.path.dirname(CURR_DIR), 'datasets', 'mscoco') else: dset_dir = args.dataset_dir out_path = pjoin(dset_dir, 'captions') json_path = pjoin(dset_dir, JSON_FILE) ### Get the caption JSON files ### if os.path.isfile(json_path): logger.info('Found file: `{}`'.format(JSON_FILE)) else: zip_path = utils.maybe_download_from_url( r'https://cs.stanford.edu/people/karpathy/deepimagesent/caption_datasets.zip', dset_dir) utils.extract_zip(zip_path) # os.remove(zip_path)
def _display_captions(captions_list, sort_metric): # Display captions print('') instructions = [ '"x" if both are wrong', '"y" if both are correct', '"b" if baseline is correct', '"m" if model is correct', '"a" if ambiguous', '"e" to exit', 'other keys to skip.', '---\n', ] instructions = '\n'.join(instructions) global JUMP_TO_IDX if JUMP_TO_IDX < 0 or JUMP_TO_IDX >= len(captions_list): JUMP_TO_IDX = 0 img_plot = None fig = plt.figure(figsize=(20, 10)) for cap_idx, cap in enumerate(captions_list[JUMP_TO_IDX:]): if len(SHORTLISTED_IMGS) > 0 and not any(str(cap['image_id']) in _ for _ in SHORTLISTED_IMGS): # Skip if no partial match with any shortlisted images continue img = _prepare_img(pjoin(IMAGE_DIR, cap['image_name'])) # Collect info base_score = cap[BASELINE_NAME][sort_metric] model_score = [cap[n][sort_metric] for n in MODEL_NAMES] base_cap = '{} ({:.2f}): {}'.format( BASELINE_NAME, base_score, cap[BASELINE_NAME]['caption']) model_cap = ['{} ({:.2f}): {}'.format( n, model_score[i], cap[n]['caption']) for i, n in enumerate(MODEL_NAMES)] # Visualise bg_big = Image.new('RGB', DISPLAY_BG_SIZE) bg_big.paste(img, (BORDER, int(BORDER * 1.5))) draw = ImageDraw.Draw(bg_big) draw.text( (BORDER, int(BORDER * 0.5)), '# {} / {}'.format(JUMP_TO_IDX + cap_idx + 1, len(captions_list)), font=font ) # Draw captions texts_wrp = [] for t in [base_cap] + model_cap: print(t) texts_wrp.append(textwrap.wrap(t, width=45)) print('') offset = int(BORDER * 1.5) for text_group in texts_wrp: for text in text_group: draw.text((BORDER, IMG_RESIZE + offset), text, font=font) offset += int(TEXT_SIZE * 1.05) offset += TEXT_SIZE if img_plot is None: img_plot = plt.imshow(bg_big) else: img_plot.set_data(bg_big) plt.show(block=False) fig.canvas.draw() # Get key press # key_input = raw_input(instructions) key_input = input(instructions) fig.canvas.flush_events() if key_input == 'e': plt.close() break elif key_input in CATEGORIES: _save_captions(CATEGORIES[key_input], img, cap, bg_big, sort_metric) print('')
parser.add_argument( '--itow_file', type=str, default=None) parser.add_argument( '--verify_images', type=bool, default=False) return parser.parse_args() if __name__ == '__main__': logger = logging.getLogger(__name__) args = parse_args() if args.vocab_size < 1 or isinstance(args.vocab_size, str): args.vocab_size = None if args.dataset_dir == '': dset_dir = pjoin(os.path.dirname(CURR_DIR), 'datasets', 'insta') else: dset_dir = args.dataset_dir out_path = pjoin(dset_dir, 'captions') cap_train_json_path = pjoin(dset_dir, 'json', 'insta-caption-train.json') cap_test1_json_path = pjoin(dset_dir, 'json', 'insta-caption-test1.json') ### Get the caption JSON files ### json_exists = (os.path.isfile(cap_train_json_path) and os.path.isfile(cap_test1_json_path)) tgz_path = pjoin(dset_dir, 'json.tar.gz') if json_exists: logger.info('Found exising json files.') else: utils.maybe_download_from_google_drive( r'0B3xszfcsfVUBdG0tU3BOQWV0a0E',
# Variables SORT_BY_METRIC = 'CIDEr' JUMP_TO_IDX = 4970 VISUALISE_ATTENTION = True RADIX_SAMPLE_TIMESTEP = False RADIX_NUM_TOKENS = 2 MODEL_NAMES = ['sps_80.0', 'sps_97.5'] BASELINE_NAME = 'baseline' OUTPUT_DIR = '/home/jiahuei/Documents/1_TF_files/radix_v2/compiled_mscoco_test' IMAGE_DIR = '/master/datasets/mscoco/val2014' # OUTPUT_DIR = '/home/jiahuei/Documents/1_TF_files/radix_v2/compiled_insta_val' # IMAGE_DIR = '/master/datasets/insta/images' JSON_ROOT = '/home/jiahuei/Documents/1_TF_files/radix_v2' BASELINE_JSON = pjoin( JSON_ROOT, '/home/jiahuei/Documents/1_TF_files/radix_v2/mscoco_v2/word_w256_LSTM_r512_h1_none_cnnFT_SCST_b7C1.0B0.0/run_01___infer_test_b1_lp0.0___08-11_14-58/captions___113287.json' ) MODEL_JSON = [ pjoin(JSON_ROOT, '/home/jiahuei/Documents/1_TF_files/radix_v2/mscoco_v2/word_w256_LSTM_r512_h1_none_cnnFT_SCST_b7C1.0B0.0/run_01___infer_test_b1_lp0.0___08-11_14-58/captions___113287.json'), pjoin(JSON_ROOT, '/home/jiahuei/Documents/1_TF_files/radix_v2/mscoco_v2/word_w256_LSTM_r512_h1_none_cnnFT_SCST_b7C1.0B0.0/run_01___infer_test_b1_lp0.0___08-11_14-58/captions___113287.json') ] BASELINE_SCORES_JSON = pjoin( JSON_ROOT, '/home/jiahuei/Documents/1_TF_files/radix_v2/mscoco_v2/word_w256_LSTM_r512_h1_none_cnnFT_SCST_b7C1.0B0.0/run_01___infer_test_b1_lp0.0___08-11_14-58/metric_scores_detailed_113287.json' ) MODEL_SCORES_JSON = [ pjoin(JSON_ROOT, '/home/jiahuei/Documents/1_TF_files/radix_v2/mscoco_v2/word_w256_LSTM_r512_h1_none_cnnFT_SCST_b7C1.0B0.0/run_01___infer_test_b1_lp0.0___08-11_14-58/metric_scores_detailed_113287.json'),