Exemplo n.º 1
0
def get_conv_features(image_file, model_type, feature_layer):
    if model_type == "vgg":
        cnn_model = vgg16.create_vgg_model(448,
                                           only_conv=feature_layer != 'fc7')
    else:
        cnn_model = resnet.create_resnet_model(448)

    sess = cnn_model['session']
    images = cnn_model['images_placeholder']
    image_feature_layer = cnn_model[feature_layer]
    img_dim = 448

    if model_type == 'resnet':
        image_array = sess.run(cnn_model['processed_image'],
                               feed_dict={
                                   cnn_model['pre_image']:
                                   utils.load_image_array(image_file,
                                                          img_dim=None)
                               })
    else:
        image_array = utils.load_image_array(image_file, img_dim=img_dim)

    feed_dict = {images: [image_array]}
    conv_features_batch = sess.run(image_feature_layer, feed_dict=feed_dict)
    sess.close()

    return conv_features_batch
Exemplo n.º 2
0
def get_minibatches(input_set, batchsize):
    batch_image = np.ndarray((batchsize, 224, 224, 3))
    actual = 0
    count = 0
    for idx in input_set:
        image = os.path.join(args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg' % (args.mode, args.mode, idx))
        batch_image[actual, :, :, :] = utils.load_image_array(image)
        actual += 1
        count += 1
        if actual >= batchsize or count >= len(input_set):
            yield batch_image[0: actual, :, :, :], actual
            actual = 0
Exemplo n.º 3
0
def test():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str, default='mask_rcnn_coco.h5')
    parser.add_argument('--image', type=str, default='example.jpg')
    args = parser.parse_args()

    detector = Detector(args.model)
    image = load_image_array(args.image)
    r = detector.detect(image)

    for i, (roi, class_id) in enumerate(zip(r['rois'], r['class_ids'])):
        print('Object #{}, roi: {}, class: {}'.format(i + 1, roi,
                                                      CLASS_NAMES[class_id]))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split',
                        type=str,
                        default='train',
                        help='train/val/test')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Batch Size')
    parser.add_argument('--feature_layer',
                        type=str,
                        default="block4",
                        help='CONV FEATURE LAYER, fc7, pool5 or block4')
    parser.add_argument('--model', type=str, default="resnet", help='vgg')
    args = parser.parse_args()

    if args.split == "train":
        with open('Data/annotations/test.json') as f:
            images = json.loads(f.read())['images']
    else:
        with open('Data/annotations/captions_val2014.json') as f:
            images = json.loads(f.read())['images']

    image_ids = {image['image_id']: 1 for image in images}
    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))

    try:
        shutil.rmtree('Data/conv_features_{}_{}'.format(
            args.split, args.model))
    except:
        pass

    os.makedirs('Data/conv_features_{}_{}'.format(args.split, args.model))

    if args.model == "vgg":
        cnn_model = vgg16.create_vgg_model(
            448, only_conv=args.feature_layer != 'fc7')
    else:
        cnn_model = resnet.create_resnet_model(448)

    image_id_file_name = "Data/conv_features_{}_{}/image_id_list_{}.h5".format(
        args.split, args.model, args.feature_layer)
    h5f_image_id_list = h5py.File(image_id_file_name, 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()

    conv_file_name = "Data/conv_features_{}_{}/conv_features_{}.h5".format(
        args.split, args.model, args.feature_layer)
    hdf5_conv_file = h5py.File(conv_file_name, 'w')

    if args.feature_layer == "fc7":
        conv_features = None
        feature_shape = (len(image_id_list), 4096)
        img_dim = 224

    else:
        if args.model == "vgg":
            conv_features = None
            feature_shape = (len(image_id_list), 14, 14, 512)
            img_dim = 448
        else:
            conv_features = None
            feature_shape = (len(image_id_list), 14, 14, 2048)
            img_dim = 448
            print("it's done!!!")

    hdf5_data = hdf5_conv_file.create_dataset('conv_features',
                                              shape=feature_shape,
                                              dtype='f')

    sess = cnn_model['session']
    images = cnn_model['images_placeholder']
    image_feature_layer = cnn_model[args.feature_layer]

    idx = 0
    while idx < len(image_id_list):
        start = time.clock()

        image_batch = np.ndarray((args.batch_size, img_dim, img_dim, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break

            image_file = ('Data/images/abstract_v002_%s2015_%.12d.jpg' %
                          (args.split, image_id_list[idx]))

            if args.model == 'resnet':
                image_array = sess.run(cnn_model['processed_image'],
                                       feed_dict={
                                           cnn_model['pre_image']:
                                           utils.load_image_array(image_file,
                                                                  img_dim=None)
                                       })
            else:
                image_array = utils.load_image_array(image_file,
                                                     img_dim=img_dim)

            image_batch[i, :, :, :] = image_array
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        conv_features_batch = sess.run(image_feature_layer,
                                       feed_dict=feed_dict)
        #np.reshape not needed
        #conv_features_batch = np.reshape(conv_features_batch, ( conv_features_batch.shape[0], -1 ))
        hdf5_data[(idx - count):idx] = conv_features_batch[0:count]

        end = time.clock()
        print("Time for batch of photos", end - start)
        print("Hours Remaining", ((len(image_id_list) - idx) * 1.0) *
              (end - start) / 60.0 / 60.0 / args.batch_size)
        print("Images Processed", idx)

    hdf5_conv_file.close()
    print("Done!")
Exemplo n.º 5
0
def main():
    print "Total Images"
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str, default='train',
                        help='train/val')
    parser.add_argument('--data_dir', type=str, default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size', type=int, default=10,
                        help='Batch Size')

    print "Total Images"
    args = parser.parse_args()
    print "Total Images"
    data_loader.prepare_training_data(version=1);
    all_data = data_loader.load_questions_answers(version=1);
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print "Total Images", len(image_id_list)

    model = VGG16(weights='imagenet', include_top=False, outputs=base_model.get_layer('Conv2D').output)

    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join(args.data_dir,
                              '%s2014/COCO_%s2014_%.12d.jpg' % (args.split, args.split, image_id_list[idx]))

            image_batch[i, :, :, :] = utils.load_image_array(image_file)

            x = np.expand_dims(image_batch[i, :, :, :], axis=0)
            x = preprocess_input(x)

            features = model.predict(x)
            fc7_batch[i, :] = features


            idx += 1
            count += 1

        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print "Time for batch 10 photos", end - start
        print "Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0

        print "Images Processed", idx

    print "Saving fc7 features"
    h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print "Saving image id list"
    h5f_image_id_list = h5py.File(join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print "Done!"
Exemplo n.º 6
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--split', type=str, default='train',
                       help='train/val')
	parser.add_argument('--model_path', type=str, default='Data/vgg16.tfmodel',
                       help='Pretrained VGG16 Model')
	parser.add_argument('--data_dir', type=str, default='Data',
                       help='Data directory')
	parser.add_argument('--batch_size', type=int, default=10,
                       help='Batch Size')
	


	args = parser.parse_args()
	
	vgg_file = open(args.model_path)
	vgg16raw = vgg_file.read()
	vgg_file.close()

	graph_def = tf.GraphDef()
	graph_def.ParseFromString(vgg16raw)

	images = tf.placeholder("float", [None, 224, 224, 3])
	tf.import_graph_def(graph_def, input_map={ "images": images })

	graph = tf.get_default_graph()

	for opn in graph.get_operations():
		print "Name", opn.name, opn.values()

	all_data = data_loader.load_questions_answers(args)
	if args.split == "train":
		qa_data = all_data['training']
	else:
		qa_data = all_data['validation']
	
	image_ids = {}
	for qa in qa_data:
		image_ids[qa['image_id']] = 1

	image_id_list = [img_id for img_id in image_ids]
	print "Total Images", len(image_id_list)
	
	
	sess = tf.Session()
	fc7 = np.ndarray( (len(image_id_list), 4096 ) )
	idx = 0

	while idx < len(image_id_list):
		start = time.clock()
		image_batch = np.ndarray( (args.batch_size, 224, 224, 3 ) )

		count = 0
		for i in range(0, args.batch_size):
			if idx >= len(image_id_list):
				break
			image_file = join(args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg'%(args.split, args.split, image_id_list[idx]) )
			image_batch[i,:,:,:] = utils.load_image_array(image_file)
			idx += 1
			count += 1
		
		
		feed_dict  = { images : image_batch[0:count,:,:,:] }
		fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
		fc7_batch = sess.run(fc7_tensor, feed_dict = feed_dict)
		fc7[(idx - count):idx, :] = fc7_batch[0:count,:]
		end = time.clock()
		print "Time for batch 10 photos", end - start
		print "Hours For Whole Dataset" , (len(image_id_list) * 1.0)*(end - start)/60.0/60.0/10.0

		print "Images Processed", idx

		

	print "Saving fc7 features"
	h5f_fc7 = h5py.File( join(args.data_dir, args.split + '_fc7.h5'), 'w')
	h5f_fc7.create_dataset('fc7_features', data=fc7)
	h5f_fc7.close()

	print "Saving image id list"
	h5f_image_id_list = h5py.File( join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
	h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
	h5f_image_id_list.close()
	print "Done!"
def main():
    parser = argparse.ArgumentParser()
    
    parser.add_argument('--residual_channels', type=int, default=512,
                       help='residual_channels')  
    parser.add_argument('--data_dir', type=str, default='Data',
                       help='Data directory')
    parser.add_argument('--version', type=int, default=1,
                       help='VQA data version')
    parser.add_argument('--model_path', type=str, default=None,
                       help='Trained Model Path')
    parser.add_argument('--feature_layer', type=str, default="block4",
                       help='CONV FEATURE LAYER, fc7, pool5 or block4')
    parser.add_argument('--cnn_model', type=str, default="resnet",
                       help='CNN model')
    parser.add_argument('--text_model', type=str, default="bytenet",
                       help='bytenet/lstm')
    parser.add_argument('--question', type=str, default="What animal is shown in the picture",
                       help='question about the image')
    parser.add_argument('--image_file', type=str, default="Image File path for the question",
                       help='Image File path')

    
    args = parser.parse_args()
    conv_features_batch = get_conv_features(args.image_file, args.cnn_model, args.feature_layer)
    
    tf.reset_default_graph()

    meta_data = data_loader.load_meta_data(args.version, args.data_dir)
    ans_vocab_rev = meta_data['index_to_ans']
    ques_vocab_rev = meta_data['index_to_qw']
    qw_to_index = meta_data['qw_to_index']
    
    
    question_words = data_loader.tokenize_mcb(args.question)
    question_indices = [qw_to_index[qw] if qw in qw_to_index else qw_to_index['UNK'] 
    for qw in question_words]
    
    question_indices += [0 for i in range(len(question_indices), meta_data['max_question_length'])]
    sentence_batch = np.ndarray( (1, meta_data['max_question_length']), dtype = 'int32')
    sentence_batch[0] = question_indices

    

    model_options = {
        'question_vocab_size' : len(meta_data['index_to_qw']),
        'residual_channels' : args.residual_channels,
        'ans_vocab_size' : len(meta_data['index_to_ans']),
        'filter_width' : 3,
        'img_dim' : 14,
        'img_channels' : 2048,
        'dilations' : [ 1, 2, 4, 8,
                        1, 2, 4, 8, 
                       ],
        'text_model' : args.text_model,
        'dropout_keep_prob' : 0.6,
        'max_question_length' : meta_data['max_question_length'],
        'num_answers' : 10
    }
    
    
    model = VQA_model_attention.VQA_model(model_options)
    model.build_generator()

    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()

    saver = tf.train.Saver()
    if args.model_path:
        saver.restore(sess, args.model_path)


    try:
        shutil.rmtree('Data/gen_samples')
    except:
        pass
    
    os.makedirs('Data/gen_samples')

    pred_answer, prob1, prob2 = sess.run([model.g_predictions, model.g_prob1, model.g_prob2],
        feed_dict = {
            model.g_question : sentence_batch,
            model.g_image_features : conv_features_batch
        })

    pred_ans_text = utils.answer_indices_to_text(pred_answer, ans_vocab_rev)
    
    sample_data = []
    print "Actual vs Prediction"
    for sample_i in range(len(pred_ans_text)):
        print pred_ans_text[sample_i]
        image_array = utils.load_image_array(args.image_file, 224)
        blend1 = utils.get_blend_map(image_array, prob1[sample_i], overlap = True)
        blend2 = utils.get_blend_map(image_array, prob2[sample_i], overlap = True)
        sample_data.append({
            'question' : args.question,
            'predicted_answer' : pred_ans_text[sample_i],
            'batch_index' : sample_i
            })
        misc.imsave('Data/gen_samples/{}_actual_image.jpg'.format(sample_i), image_array)
        misc.imsave('Data/gen_samples/{}_blend1.jpg'.format(sample_i), blend1)
        misc.imsave('Data/gen_samples/{}_blend2.jpg'.format(sample_i), blend2)

        f = open('Data/gen_samples/sample.json', 'wb')
        f.write(json.dumps(sample_data))
        f.close()
        shutil.make_archive('Data/gen_samples', 'zip', 'Data/gen_samples')  
Exemplo n.º 8
0
Arquivo: app.py Projeto: ceynri/FIC
def demo_process():
    '''提供demo展示功能'''

    # 获取文件对象
    file = request.files['file']
    file = File(file)

    feature_model = request.form['feature_model']
    quality_level = request.form['quality_level']
    if model.quality_level != quality_level:
        model.switch_quality_level(quality_level)
    # 将二进制转为tensor
    input = file.load_tensor().cuda()

    # 输入模型,得到返回结果
    e_data = model.encode(input)
    d_data = model.decode(feat=e_data['feat'],
                          tex=e_data['tex'],
                          intervals=e_data['intervals'],
                          recon=e_data['recon'])
    data = {**e_data, **d_data}

    # 保存压缩数据
    fic_path = get_path(f'{file.name}.fic')
    File.save_binary(
        {
            'feat': data['feat'],
            'tex': data['tex'],
            'intervals': data['intervals'],
            'ext': file.ext,
        }, fic_path)
    # fic 相关参数
    fic_size = path.getsize(fic_path)
    fic_bpp = get_bpp(fic_size)

    # 单独保存特征以计算特征和纹理的大小
    feat_path = get_path(f'{file.name}_feat.fic')
    File.save_binary({
        'feat': data['feat'],
    }, feat_path)
    # 特征相关参数
    feat_size = path.getsize(feat_path)
    feat_bpp = get_bpp(feat_size)
    # 纹理相关参数
    tex_size = fic_size - feat_size
    tex_bpp = get_bpp(tex_size)

    # 待保存图片
    imgs = {
        'input': data['input'],
        'recon': data['recon'],
        'resi': data['resi'],
        'resi_decoded': data['resi_decoded'],
        'resi_norm': data['resi_norm'],
        'resi_decoded_norm': data['resi_decoded_norm'],
        'output': data['output'],
    }

    # 将 imgs 保存并获得对应URL
    img_urls = {}
    for key, value in imgs.items():
        # 保存图片
        file_name = file.name_suffix(key, ext='.bmp')
        file_path = get_path(file_name)
        save_image(value, file_path)
        # 返回图片url链接
        img_urls[key] = get_url(file_name)

    # 计算压缩率
    input_name = file.name_suffix('input', ext='.bmp')
    input_path = get_path(input_name)
    input_size = path.getsize(input_path)
    fic_compression_ratio = fic_size / input_size

    # jpeg对照组处理
    jpeg_name = file.name_suffix('jpeg', ext='.jpg')
    jpeg_path = get_path(jpeg_name)
    dichotomy_compress(input_path, jpeg_path, target_size=tex_size)
    img_urls['jpeg'] = get_url(jpeg_name)

    # jpeg 相关参数计算
    jpeg_size = path.getsize(jpeg_path)
    jpeg_compression_ratio = jpeg_size / input_size
    jpeg_bpp = get_bpp(jpeg_size)

    # 其他数据
    input_arr = tensor_to_array(data['input'])
    output_arr = tensor_to_array(data['output'])
    jpeg_arr = load_image_array(jpeg_path)

    # 返回的对象
    ret = {
        'image': img_urls,
        'data': get_url(f'{file.name}.fic'),
        'eval': {
            'fic_bpp': fic_bpp,
            'feat_bpp': feat_bpp,
            'tex_bpp': tex_bpp,
            'jpeg_bpp': jpeg_bpp,
            'fic_compression_ratio': fic_compression_ratio,
            'jpeg_compression_ratio': jpeg_compression_ratio,
            'fic_psnr': psnr(input_arr, output_arr),
            'fic_ssim': ssim(input_arr, output_arr),
            'jpeg_psnr': psnr(input_arr, jpeg_arr),
            'jpeg_ssim': ssim(input_arr, jpeg_arr),
        },
        'size': {
            'fic': fic_size,
            'input': input_size,
            # 'output': fic_size,
            'output': tex_size,
            'feat': feat_size,
            'tex': tex_size,
            'jpeg': jpeg_size,
        }
    }
    # 响应请求
    response = jsonify(ret)
    return response
Exemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split',
                        type=str,
                        default='train',
                        help='train/val/test')
    parser.add_argument('--model_path',
                        type=str,
                        default='./Data/ResNet/resnet_v2_101.ckpt',
                        help='Pretrained RESNET Model')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=10,
                        help='Batch Size')

    args = parser.parse_args()

    slim = tf.contrib.slim
    resnet = nets.resnet_v2

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.9
    sess = tf.InteractiveSession(config=config)
    sess.run(tf.global_variables_initializer())

    if args.split == 'test':
        all_data = data_loader.load_test_questions()
        qa_data = all_data['testing']
    else:
        all_data = data_loader.load_questions_answers(args)
        if args.split == "train":
            qa_data = all_data['training']
        else:
            qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))

    length = 100 if args.split == 'test' else len(image_id_list)
    res5c = np.ndarray((length, 2048))
    idx = 0
    SIZE = 299
    flag = 0

    while idx < length:
        if idx % 500 == 0:
            flag = 0
            tf.reset_default_graph()
        with tf.Graph().as_default():
            with tf.Session() as sess:
                while idx < length:
                    start = time.clock()
                    image_batch = np.ndarray((args.batch_size, SIZE, SIZE, 3),
                                             dtype=np.float32)

                    count = 0
                    for i in range(0, args.batch_size):
                        if idx >= len(image_id_list):
                            break
                        if args.split == 'test':
                            image_file = join(
                                args.data_dir, '%s2015/COCO_%s2015_%.12d.jpg' %
                                (args.split, args.split, image_id_list[idx]))
                        else:
                            image_file = join(
                                args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg' %
                                (args.split, args.split, image_id_list[idx]))
                        image_batch[i, :, :, :] = utils.load_image_array(
                            image_file, size=SIZE)
                        idx += 1
                        count += 1

                    with slim.arg_scope(resnet.resnet_arg_scope()):
                        logits, end_points = resnet.resnet_v2_101(
                            image_batch[0:count, :, :, :],
                            num_classes=None,
                            is_training=False,
                            reuse=tf.AUTO_REUSE)
                        if not flag:
                            vals = slim.get_model_variables('resnet_v2_101')
                            init_fn = slim.assign_from_checkpoint_fn(
                                args.model_path, vals)
                            init_fn(sess)
                            flag = 1

                    res5c_batch = sess.run([logits])
                    if idx % 10 == 0:
                        res5c_batch = res5c_batch[0].reshape(
                            (args.batch_size, 2048))
                    else:
                        res5c_batch = res5c_batch[0].reshape(
                            (idx % args.batch_size, 2048))
                    res5c[(idx - count):idx, :] = res5c_batch[0:count, :]
                    end = time.clock()
                    print("Time for batch 10 photos", end - start)
                    print("Hours For Whole Dataset",
                          (len(image_id_list) * 1.0) * (end - start) / 60.0 /
                          60.0 / 10.0)

                    print("Images Processed", idx)
                    if idx % 500 == 0:
                        break

    print("Saving res5c features")
    h5f_res5c = h5py.File(join(args.data_dir, args.split + '_res5c.h5'), 'w')
    h5f_res5c.create_dataset('res5c_features', data=res5c)
    h5f_res5c.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
Exemplo n.º 10
0
Arquivo: test.py Projeto: ceynri/FIC
        fic_compression_ratio = fic_size / input_size

        # jpeg对照组处理
        jpeg_name = file.name_suffix('jpeg', ext='.jpg')
        jpeg_path = get_path(jpeg_name)
        dichotomy_compress(input_path, jpeg_path, target_size=tex_size)

        # jpeg 相关参数计算
        jpeg_size = path.getsize(jpeg_path)
        jpeg_compression_ratio = jpeg_size / input_size
        jpeg_bpp = jpeg_size / conf.IMAGE_PIXEL_NUM

        # 其他数据
        input_arr = tensor_to_array(input)
        output_arr = tensor_to_array(output)
        jpeg_arr = load_image_array(jpeg_path)

        print(json.dumps({
            'eval': {
                # 'fic_bpp': fic_bpp,
                # 'feat_bpp': feat_bpp,
                'tex_bpp': tex_bpp,
                'jpeg_bpp': jpeg_bpp,
                # 'fic_compression_ratio': fic_compression_ratio,
                # 'jpeg_compression_ratio': jpeg_compression_ratio,
                'fic_psnr': psnr(input_arr, output_arr),
                'fic_ssim': ssim(input_arr, output_arr),
                'jpeg_psnr': psnr(input_arr, jpeg_arr),
                'jpeg_ssim': ssim(input_arr, jpeg_arr),
            },
            'size': {
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str, default='train', help='train/val')
    parser.add_argument(
        '--model_path',
        type=str,
        default='Data/train2014/Tri Training 2/vgg16-20160129.tfmodel',
        help='Pretrained VGG16 Model')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data/',
                        help='Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=10,
                        help='Batch Size')

    args = parser.parse_args()

    vgg_file = io.open(args.model_path, mode='rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print("Name", opn.name, opn.values())

    image_id_list = [img_id for img_id in range(20000)]
    #print(image_id_list[:5])
    print("Total Images", len(image_id_list))
    #print(0/0)

    sess = tf.Session()
    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join(
                args.data_dir, '%s_2014/VizWiz_%s_%.12d.jpg' %
                (args.split, args.split, image_id_list[idx]))
            image_batch[i, :, :, :] = utils.load_image_array(image_file)
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict)
        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print("Time for batch 10 photos", end - start)
        print("Hours For Whole Dataset",
              (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0)

        print("Images Processed", idx)

    print("Saving fc7 features")
    h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
Exemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_lstm_layers',
                        type=int,
                        default=2,
                        help='num_lstm_layers')
    parser.add_argument('--fc7_feature_length',
                        type=int,
                        default=4096,
                        help='fc7_feature_length')
    parser.add_argument('--rnn_size', type=int, default=512, help='rnn_size')
    parser.add_argument('--embedding_size',
                        type=int,
                        default=512,
                        help='embedding_size'),
    parser.add_argument('--word_emb_dropout',
                        type=float,
                        default=0.5,
                        help='word_emb_dropout')
    parser.add_argument('--image_dropout',
                        type=float,
                        default=0.5,
                        help='image_dropout')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=200,
                        help='Batch Size')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Batch Size')
    parser.add_argument('--epochs', type=int, default=100, help='Epochs')
    parser.add_argument('--debug', type=bool, default=False, help='Debug')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Trained Model Path')
    parser.add_argument('--version',
                        type=int,
                        default=2,
                        help='VQA data version')

    args = parser.parse_args()
    print("Creating QuestionAnswer data")
    prepare_training_data('trainquestions.json', 'trainannotations.json',
                          'valquestions.json', 'valannotations.json')
    print("Prepared given data")
    print("Reading QuestionAnswer data")

    qa_data = load_questions_answers('newqadata.pkl', 'Data')
    print(qa_data['answer_vocab'])

    print("Creating Image features")
    ################################################
    split = 'train'
    vgg_file = open('Data/vgg16.tfmodel', 'rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print("Name", opn.name, opn.values())

    all_data = load_questions_answers()
    if split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))

    sess = tf.Session()
    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((10, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join('Data',
                              '%snew/%.1d.jpg' % (split, image_id_list[idx]))
            image_batch[i, :, :, :] = utils.load_image_array(image_file)
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict)
        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print("Time for batch 10 photos", end - start)
        print("Hours For Whole Dataset",
              (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0)

        print("Images Processed", idx)

    print("Saving fc7 features")
    h5f_fc7 = h5py.File(join('Data', 'fc7new.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(join('Data', 'image_id_listnew.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")

    ##################################################33
    print("Reading image features")
    fc7_features, image_id_list = load_fc7_features('Data', 'train')
    print("FC7 features", fc7_features.shape)
    print("image_id_list", image_id_list.shape)
    qa_data = load_questions_answers('newqadata.pkl', 'Data')
    print(qa_data['answer_vocab'])
    image_id_map = {}
    for i in range(len(image_id_list)):
        image_id_map[image_id_list[i]] = i
    ans_map = {
        qa_data['answer_vocab'][ans]: ans
        for ans in qa_data['answer_vocab']
    }
    model_options = {
        'num_lstm_layers': 2,
        'rnn_size': 512,
        'embedding_size': 512,
        'word_emb_dropout': 0.5,
        'image_dropout': 0.5,
        'fc7_feature_length': 4096,
        'lstm_steps': qa_data['max_question_length'] + 1,
        'q_vocab_size': len(qa_data['question_vocab']),
        'ans_vocab_size': len(qa_data['answer_vocab'])
    }
    model = vis_lstm_model.Vis_lstm_model(model_options)
    input_tensors, t_loss, t_accuracy, t_p = model.build_model()
    train_op = tf.train.AdamOptimizer(0.001).minimize(t_loss)
    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()
    saver = tf.train.Saver()
    #model.summary()
    #plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
    if args.resume_model:
        saver.restore(sess, args.resume_model)
    for i in range(100):
        batch_no = 0
        while (batch_no * 10) < len(qa_data['training']):
            sentence, answer, fc7 = get_training_batch(batch_no, 10,
                                                       fc7_features,
                                                       image_id_map, qa_data,
                                                       'train')
            _, loss_value, accuracy, pred = sess.run(
                [train_op, t_loss, t_accuracy, t_p],
                feed_dict={
                    input_tensors['fc7']: fc7,
                    input_tensors['sentence']: sentence,
                    input_tensors['answer']: answer
                })
            batch_no += 1
            if args.debug:
                for idx, p in enumerate(pred):
                    print(ans_map[p], ans_map[np.argmax(answer[idx])])
                print("Loss", loss_value, batch_no, i)
                print("Accuracy", accuracy)
                print("---------------")
                skplt.metrics.plot_roc_curve(answer[idx], ans_map[p])
                plt.show()
            else:
                print("Loss", loss_value, batch_no, i)
                print("Training Accuracy", accuracy)
                #skplt.metrics.plot_roc_curve(answer[0], pred[0])
                #plt.show()
        save_path = saver.save(sess, "Data/Models/modelnew{}.ckpt".format(i))
Exemplo n.º 13
0
def main():

    with tf.Graph().as_default():

        utils.prepare_training_data(FLAGS.data_dir)

        all_data = utils.load_questions_answers(FLAGS.data_dir)
        if FLAGS.split == "train":
            qa_data = all_data['training']
        else:
            qa_data = all_data['validation']

        image_ids = {}
        for qa in qa_data:
            image_ids[qa['image_id']] = 1

        image_id_list = [img_id for img_id in image_ids]
        print("Total Images", len(image_id_list))

        images = tf.placeholder("float", [None, 224, 224, 3])

        with slim.arg_scope(resnet.resnet_arg_scope()):
            net, _ = resnet.resnet_v2_152(images,
                                          FLAGS.output_size,
                                          is_training=False)

        restorer = tf.train.Saver()

        results = np.ndarray((len(image_id_list), FLAGS.output_size))
        idx = 0

        with tf.Session(config=tf.ConfigProto(
                log_device_placement=True)) as sess:
            while idx < len(image_id_list):
                start = time.clock()
                image_batch = np.ndarray((FLAGS.batch_size, 224, 224, 3))

                count = 0
                for i in range(0, FLAGS.batch_size):
                    if idx >= len(image_id_list):
                        break
                    image_file = join(
                        FLAGS.data_dir,
                        '%s2017/%.12d.jpg' % (FLAGS.split, image_id_list[idx]))
                    image_batch[i, :, :, :] = utils.load_image_array(
                        image_file)
                    idx += 1
                    count += 1

                feed_dict = {images: image_batch[0:count, :, :, :]}

                checkpoint = join(FLAGS.data_dir, FLAGS.checkpoint_path)
                restorer.restore(sess, checkpoint)
                print("Model Restored")
                pred_batch = sess.run(net, feed_dict=feed_dict)
                # print(np.squeeze(pred_batch).shape)
                results[(idx -
                         count):idx, :] = np.squeeze(pred_batch)[0:count, :]
                end = time.clock()
                print("Time for batch 10 photos", end - start)
                print("Hours For Whole Dataset", (len(image_id_list) * 1.0) *
                      (end - start) / 60.0 / 60.0 / 10.0)

                print("Images Processed", idx)

        print("Saving image features")

        h5f_img_embed = h5py.File(
            join(FLAGS.data_dir, FLAGS.split + '_img_embed.h5'), 'w')
        h5f_img_embed.create_dataset('img_features', data=results)
        h5f_img_embed.close()

        print("Saving image id list")
        h5f_image_id_list = h5py.File(
            join(FLAGS.data_dir, FLAGS.split + '_image_id_list.h5'), 'w')
        h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
        h5f_image_id_list.close()
        print("Done!")
Exemplo n.º 14
0
def get_style_features():
    lim=10000
    model_path = './Data/vgg16.tfmodel'
    split = 'train'
    data_dir = './Data'
    batch_size = 1

    vgg_file = open(model_path,'rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    print ("VGG done successfully")

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={ "images": images })

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print ("Name", opn.name, opn.values())

    image_names1 = os.listdir('/scratch/bam_subset/')
    image_names1.sort()
    image_names1=image_names1[:lim]
    print ("No of images", len(image_names1))

    image_names = []
    no_of_images = len(image_names1)
    for i in range(no_of_images):
        im = image_names1[i]
        image_names.append(im)

    print ("Images extracted", no_of_images)

    image_id_list = []

    for i in range(len(image_names)):
        image_id_list.append(i)

    print ("Total Images", len(image_id_list))


    sess = tf.Session()
    #fc7 = np.ndarray( (len(image_id_list), 4096 ) )
    conv4_3_n = np.ndarray( (len(image_id_list),  512*512) )
    conv4_3 = []

    idx = 0

    from_start = time.clock()

    image_name_list = []

    while idx < 1000:
        start = time.clock()
        image_batch = np.ndarray( (batch_size, 224, 224, 3 ) )

        count = 0
        for i in range(0, batch_size):
            if idx >= len(image_id_list):
                    break
            image_name_list.append(image_names[idx])
            #image_file = join(data_dir, '%s2014/COCO_%s2014_%.12d.jpg'%(split, split, image_id_list[idx]) )
            image_file = "/scratch/bam_subset/"+image_names[idx]
            print ("Image name", image_file)
            image_batch[i,:,:,:] = utils.load_image_array(image_file)
            idx += 1
            count += 1
            


        #print(image_batch.shape)
        #print(image_batch[0:count,:,:,:].shape)
        #print(image_batch[0:count,:,:,:])
        feed_dict2  = { images : image_batch[0:count,:,:,:] }
        #fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        conv4_3_tensor = graph.get_tensor_by_name("import/conv4_3/Relu:0")
        #fc7_batch = sess.run(fc7_tensor, feed_dict = feed_dict)
        conv4_3_batch = sess.run(conv4_3_tensor, feed_dict = feed_dict2)
        conv4_3_batch = conv4_3_batch.reshape((1,28*28,512))
        conv4_3_batch = np.matmul(conv4_3_batch[0,:,:].T, conv4_3_batch[0,:,:])
        temp = np.ndarray((1,512*512))
        temp[0,:] = conv4_3_batch.reshape(512*512)
        conv4_3_batch = temp
        #fc7[(idx - count):idx, :] = fc7_batch[0:count,:]
        conv4_3_n[(idx - count):idx, :] = conv4_3_batch[0:count,:]
        #conv4_3.append( conv4_3_batch[0:count,:])
	

        end = time.clock()
        #print ("Time for batch 1 photos", end - start)
        print ("Hours For Whole Dataset" , (len(image_id_list) * 1.0)*(end - start)/60.0/60.0/10.0)
        print ("Time Elapsed:", (from_start)/60, "Minutes")

        print ("Images Processed", idx)
        np.save('/scratch/mohsin/final_features/temp'+image_names[idx-1]+'.npy',conv4_3_batch[0:count,:])

    #np.savetxt('/scratch/sid_imp/conv4_3_features_vgg16.txt', conv4_3)
    f = open('image_names_list_vgg_conv.txt', 'w')
    for name in image_name_list:
        f.write(name+'\n')
    f.close()

    return conv4_3_n
Exemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str, default='train', help='train/val')
    parser.add_argument('--model_path', type=str, help='VGGNet')
    #VGGNet version
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size', type=int, default=100)

    # read pretrained vgg16 network
    args = parser.parse_args()
    vgg_file = open(args.model_path, 'rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    # load the pretrained network into a tf graph
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print("[VGG16] Name", opn.name, list(opn.values()))

    #Loading data
    all_data = data_loader.load_questions_answers()
    print(args)
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))
    print(image_id_list[0:10])

    # begin extracting
    sess = tf.Session()
    idx = 0

    cnn7 = np.ndarray((len(image_id_list), 512, 49))
    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        # load images into a batch
        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join(
                args.data_dir, '%s2015/abstract_v002_%s2015_%.12d.png' %
                (args.split, args.split, image_id_list[idx]))
            image_batch[i, :, :, :] = utils.load_image_array(
                image_file)[:, :, :3]
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        cnn7_tensor = graph.get_tensor_by_name("import/pool5:0")
        cnn7_batch = sess.run(cnn7_tensor, feed_dict=feed_dict)
        cnn7_batch = np.transpose(cnn7_batch, [0, 3, 1, 2])
        cnn7_batch = cnn7_batch.reshape(count, 512, -1)
        for i in range(args.batch_size):
            cnn7_batch[i, :, :] = cnn7_batch[i, :, :] / np.linalg.norm(
                cnn7_batch[i, :, :], axis=0, keepdims=True)

        cnn7[(idx - count):idx, ...] = cnn7_batch[0:count, ...]
        end = time.clock()
        print("Time for batch 10 photos", end - start)
        print("Hours For Whole Dataset",
              (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0)
        print("Images Processed", idx)

    print("Saving cnn7 features")
    h5f_cnn7 = h5py.File(join(args.data_dir, args.split + '_cnn7.h5'), 'w')
    h5f_cnn7.create_dataset('cnn7_features', data=cnn7)
    h5f_cnn7.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
Exemplo n.º 16
0
def main(image_path="test.jpg", question="what is in the image?"):

    slim = tf.contrib.slim
    resnet = nets.resnet_v2
	
    """
    tf.app.flags.DEFINE_string("image_path", image_path, "directory of image")
	
    tf.app.flags.DEFINE_string("question", question, "question")

    tf.app.flags.DEFINE_string("img_checkpoint_path", "./data/pretrain/resnet152/resnet_v2_152.ckpt",
                               "directory of checkpoint files for image feature extraction")

    tf.app.flags.DEFINE_string("checkpoint_path", "./data/pretrain/model",
                               "directory of checkpoint files for overall model")

    tf.app.flags.DEFINE_integer("num_lstm_layers", 2, "number of lstm layers")

    tf.app.flags.DEFINE_integer(
        "img_feat_len", 1001, "length of image feature vector")

    tf.app.flags.DEFINE_integer("rnn_size", 300, "size of rnn")

    tf.app.flags.DEFINE_integer(
        "que_feat_len", 300, "length of question feature vector")

    tf.app.flags.DEFINE_float("word_dropout", 0.5, "dropout rate of word nodes")

    tf.app.flags.DEFINE_float("img_dropout", 0.5, "dropout rate of image nodes")

    tf.app.flags.DEFINE_string("data_dir", "./data", "directory of data")
	
    FLAGS = tf.app.flags.FLAGS
    print ("Image:", FLAGS.image_path)
    print ("Question:", FLAGS.question)
    """
	
    #FLAGS = object()
    flags_image_path = image_path
    flags_question = question
    flags_img_checkpoint_path = "./data/pretrain/resnet152/resnet_v2_152.ckpt"
    flags_checkpoint_path = "./data/pretrain/model"
    flags_num_lstm_layers = 2
    flags_img_feat_len = 1001
    flags_rnn_size = 300
    flags_que_feat_len = 300
    flags_word_dropout = 0.5
    flags_img_dropout = 0.5
    flags_data_dir = "./data"
	
    vocab_data = utils.get_question_answer_vocab(flags_data_dir)
    qvocab = vocab_data['question_vocab']
    q_map = {vocab_data['question_vocab'][qw]
        : qw for qw in vocab_data['question_vocab']}

    with tf.Graph().as_default():
        images = tf.placeholder("float32", [None, 224, 224, 3])
        with slim.arg_scope(resnet.resnet_arg_scope()):
            net, _ = resnet.resnet_v2_152(images, 1001, is_training=False)
        restorer = tf.train.Saver()

        with tf.Session() as sess:#config=tf.ConfigProto(log_device_placement=True)) as sess:
            start = time.clock()
            image_array = utils.load_image_array(flags_image_path)
            image_feed = np.ndarray((1, 224, 224, 3))
            image_feed[0:, :, :] = image_array

            # checkpoint = tf.train.latest_checkpoint(flags_img_checkpoint_path)
            checkpoint = flags_img_checkpoint_path
            restorer.restore(sess, checkpoint)
            print("Image Model loaded")
            feed_dict = {images: image_feed}
            img_feature = sess.run(net, feed_dict=feed_dict)
            img_feature = np.squeeze(img_feature)
            end = time.clock()
            print("Time elapsed", end - start)
            print("Image processed")

    model_options = {
        'num_lstm_layers': flags_num_lstm_layers,
        'rnn_size': flags_rnn_size,
        'embedding_size': flags_que_feat_len,
        'word_emb_dropout': flags_word_dropout,
        'image_dropout': flags_img_dropout,
        'img_feature_length': flags_img_feat_len,
        'lstm_steps': vocab_data['max_question_length'] + 1,
        'q_vocab_size': len(vocab_data['question_vocab']),
        'ans_vocab_size': len(vocab_data['answer_vocab'])
    }

    question_vocab = vocab_data['question_vocab']
    word_regex = re.compile(r'\w+')
    question_ids = np.zeros(
        (1, vocab_data['max_question_length']), dtype='int32')
    question_words = re.findall(word_regex, flags_question)
    base = vocab_data['max_question_length'] - len(question_words)
    for i in range(0, len(question_words)):
        if question_words[i] in question_vocab:
            question_ids[0][base + i] = question_vocab[question_words[i]]
        else:
            question_ids[0][base + i] = question_vocab['UNK']

    ans_map = {vocab_data['answer_vocab'][ans]
        : ans for ans in vocab_data['answer_vocab']}

    with tf.Graph().as_default():
        model = vis_lstm_model.Vis_lstm_model(model_options)
        input_tensors, t_prediction, t_ans_probab = model.build_generator()
        restorer = tf.train.Saver()
        with tf.Session() as sess:#config=tf.ConfigProto(log_device_placement=True)) as sess:
            checkpoint = tf.train.latest_checkpoint(flags_checkpoint_path)
            restorer.restore(sess, checkpoint)
            pred, answer_probab = sess.run([t_prediction, t_ans_probab], feed_dict={
                input_tensors['img']: np.reshape(img_feature, [1,1001]),
                input_tensors['sentence']: question_ids,
            })

    print("Ans:", ans_map[pred[0]])
    answer_probab_tuples = [(-answer_probab[0][idx], idx)
                            for idx in range(len(answer_probab[0]))]
    answer_probab_tuples.sort()
    print("Top Answers")
    for i in range(5):
        print(ans_map[answer_probab_tuples[i][1]])
    
    return (ans_map, answer_probab_tuples)
Exemplo n.º 17
0
def main():
    config = json.load(open('config.json'))

    parser = argparse.ArgumentParser()
    parser.add_argument('--split',
                        type=str,
                        default=config['split'],
                        help='train/val')
    parser.add_argument('--model_path',
                        type=str,
                        default=config['model_path'],
                        help='Pretrained VGG16 Model')
    parser.add_argument('--qa_dir',
                        type=str,
                        default=config['qa_dir'],
                        help='QA Data directory')
    parser.add_argument('--data_dir',
                        type=str,
                        default=config['data_dir'],
                        help='Common Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=10,
                        help='Batch Size')

    args = parser.parse_args()

    vgg_file = open(args.model_path, 'rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print("Name", opn.name, list(opn.values()))

    all_data = data_loader.load_questions_answers(args.qa_dir)
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))

    sess = tf.Session()
    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    err_file = open('err.txt', 'w', encoding='utf-8')

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            # print(image_id_list[idx])
            filename = 'COCO_%s2014_%.12d.jpg' % (args.split,
                                                  image_id_list[idx])
            image_file = join(args.data_dir, '%s2014' % args.split, filename)
            try:
                image_batch[i, :, :, :] = utils.load_image_array(image_file)
            except (ValueError, FileNotFoundError, OSError) as e:
                print("http://images.cocodataset.org/%s2014/%s" %
                      (args.split, filename))
                err_file.write(str(image_id_list[idx]) + '\n')
            idx += 1
            count += 1
        err_file.flush()
        feed_dict = {images: image_batch[0:count, :, :, :]}
        fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict)
        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print("Time for batch 10 photos", end - start)
        print("Hours For Whole Dataset",
              (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0)

        print("Images Processed", idx)

    print("Saving fc7 features")
    h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str, default='train', help='train/val')
    parser.add_argument('--model_path',
                        type=str,
                        default='Data/vgg16.tfmodel',
                        help='Pretrained VGG16 Model')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size', type=int, default=1, help='Batch Size')

    args = parser.parse_args()

    vgg_file = open(args.model_path)
    vgg16raw = vgg_file.read()
    vgg_file.close()

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    print("VGG done successfully")

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print "Name", opn.name, opn.values()

    #image_id_list = [img_id for img_id in image_ids]
    image_names = os.listdir('./Images/')
    image_names.sort()
    image_id_list = []

    for i in range(len(image_names)):
        image_id_list.append(i)

    print "Total Images", len(image_id_list)

    sess = tf.Session()
    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    from_start = time.clock()

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            #image_file = join(args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg'%(args.split, args.split, image_id_list[idx]) )
            image_file = "Images/" + image_names[idx]
            image_batch[i, :, :, :] = utils.load_image_array(image_file)
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict)
        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print "Time for batch 1 photos", end - start
        # print "Hours For Whole Dataset" , (len(image_id_list) * 1.0)*(end - start)/60.0/60.0/10.0
        print "Time Elapsed:", (from_start) / 60, "Minutes"

        print "Images Processed", idx

    np.savetxt('FC7_Features_Animation', fc7)
Exemplo n.º 19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--split', type=str, default='train', help='train/val')
    parser.add_argument('--model_path',
                        type=str,
                        default='Data/vgg16.tfmodel',
                        help='Pretrained VGG16 Model')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data directory')
    parser.add_argument('--batch_size',
                        type=int,
                        default=10,
                        help='Batch Size')

    args = parser.parse_args()
    #print(args.model_path)
    vgg_file = open(args.model_path, 'rb')
    vgg16raw = vgg_file.read()
    vgg_file.close()

    graph_def = tf.GraphDef()
    graph_def.ParseFromString(vgg16raw)

    images = tf.placeholder("float", [None, 224, 224, 3])
    tf.import_graph_def(graph_def, input_map={"images": images})

    graph = tf.get_default_graph()

    for opn in graph.get_operations():
        print("Name", opn.name, list(opn.values()))

    #Loading data


# 	data_loader.prepare_training_data(version = 2, data_dir = 'Data')
    all_data = data_loader.load_questions_answers()
    print(args)
    if args.split == "train":
        qa_data = all_data['training']
    else:
        qa_data = all_data['validation']

    image_ids = {}
    for qa in qa_data:
        image_ids[qa['image_id']] = 1

    image_id_list = [img_id for img_id in image_ids]
    print("Total Images", len(image_id_list))

    sess = tf.Session()
    fc7 = np.ndarray((len(image_id_list), 4096))
    idx = 0

    while idx < len(image_id_list):
        start = time.clock()
        image_batch = np.ndarray((args.batch_size, 224, 224, 3))

        count = 0
        for i in range(0, args.batch_size):
            if idx >= len(image_id_list):
                break
            image_file = join(
                args.data_dir, '%s2015/abstract_v002_%s2015_%.12d.png' %
                (args.split, args.split, image_id_list[idx]))
            image_batch[i, :, :, :] = utils.load_image_array(
                image_file)[:, :, :3]
            idx += 1
            count += 1

        feed_dict = {images: image_batch[0:count, :, :, :]}
        fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0")
        fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict)
        fc7[(idx - count):idx, :] = fc7_batch[0:count, :]
        end = time.clock()
        print("Time for batch 10 photos", end - start)
        print("Hours For Whole Dataset",
              (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0)

        print("Images Processed", idx)

    print("Saving fc7 features")
    h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w')
    h5f_fc7.create_dataset('fc7_features', data=fc7)
    h5f_fc7.close()

    print("Saving image id list")
    h5f_image_id_list = h5py.File(
        join(args.data_dir, args.split + '_image_id_list.h5'), 'w')
    h5f_image_id_list.create_dataset('image_id_list', data=image_id_list)
    h5f_image_id_list.close()
    print("Done!")
Exemplo n.º 20
0
        return (r_ssim + g_ssim + b_ssim) / 3
    # 均值
    mu1 = img1.mean()
    mu2 = img2.mean()
    # 方差
    sigma1 = np.sqrt(((img1 - mu1)**2).mean())
    sigma2 = np.sqrt(((img2 - mu2)**2).mean())
    # 协方差
    sigma12 = ((img1 - mu1) * (img2 - mu2)).mean()
    # 超参数
    k1, k2, L = 0.01, 0.03, 255
    c1 = (k1 * L)**2
    c2 = (k2 * L)**2
    c3 = c2 / 2
    # 按照SSIM公式计算
    l12 = (2 * mu1 * mu2 + c1) / (mu1**2 + mu2**2 + c1)
    c12 = (2 * sigma1 * sigma2 + c2) / (sigma1**2 + sigma2**2 + c2)
    s12 = (sigma12 + c3) / (sigma1 * sigma2 + c3)
    ssim_val = l12 * c12 * s12
    return ssim_val


if __name__ == "__main__":
    sys.path.append(path.dirname(path.dirname(path.realpath(__file__))))
    from utils import load_image_array

    input = load_image_array(sys.argv[1])
    output = load_image_array(sys.argv[2])
    print(psnr(input, output))
    print(ssim(input, output))