def main(params):
    net = getattr(resnet, 'resnet152')()
    num_ftrs = net.fc.in_features
    #    net.fc = torch.nn.Linear(num_ftrs, 1)
    net.load_state_dict(torch.load('./resnet152.pth'), False)
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()
    if not os.path.exists('resnet152'):
        subprocess.call('mkdir resnet152', shell=True)

    train_set = hdf5storage.read(path='/img_path', filename='./val_set.h5')
    print(len(train_set))
    seed(123)  # make reproducible
    for i in range(8000 + 780, len(train_set)):
        outputs = []

        for j in range(8):
            I = skimage.io.imread('./' + train_set[i][j].replace('\\', '/'),
                                  as_gray=1)
            if len(I.shape) == 2:
                I = I[:, :, np.newaxis]
                I = np.concatenate((I, I, I), axis=2)
            I = I.astype('float32') / 255.0
            I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
            I = preprocess(I)
            with torch.no_grad():
                tmp_fc, tmp_att = my_resnet(I, params['att_size'])
            outputs.append(tmp_fc.data.cpu().float().numpy())
        file_nm = os.path.join('./resnet152', 'v_video' + str(i + 1) + '.npy')
        np.save(file_nm, outputs)
        print(file_nm)
def main(params):
    net = getattr(resnet, params['model'])()
    net.load_state_dict(
        torch.load('/datadrive/resnet_pretrianed_t7/' + params['model'] +
                   '.pth'))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']
    global N
    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    # for i,img in enumerate(imgs):
    p = Pool(4)
    p.map(image_preprocessing, imgs)
    p.close()
    p.join()

    print 'wrote ', params['ourput_dir']
Пример #3
0
    def __init__(self, opt):
        self.opt = opt
        self.coco_json = opt.get('coco_json', '')
        self.folder_path = opt.get('folder_path', '')

        self.batch_size = opt.get('batch_size', 1)
        self.seq_per_img = 1

        # Load resnet
        self.cnn_model = opt.get('cnn_model', 'resnet101')
        self.my_resnet = getattr(misc.resnet, self.cnn_model)()
        self.my_resnet.load_state_dict(
            torch.load('./data/imagenet_weights/' + self.cnn_model + '.pth'))
        self.my_resnet = myResnet(self.my_resnet)
        self.my_resnet.cuda()
        self.my_resnet.eval()

        # load the json file which contains additional information about the dataset
        print('DataLoaderRaw loading images from folder: ', self.folder_path)

        self.files = []
        self.ids = []

        print(len(self.coco_json))
        if len(self.coco_json) > 0:
            print('reading from ' + self.coco_json)
            # read in filenames from the coco-style json file
            self.coco_annotation = json.load(open(self.coco_json))
            for k, v in enumerate(self.coco_annotation['images']):
                fullpath = os.path.join(self.folder_path, v['file_name'])
                self.files.append(fullpath)
                self.ids.append(v['id'])
        else:
            # read in all the filenames from the folder
            print('listing all images in directory ' + self.folder_path)

            def isImage(f):
                supportedExt = [
                    '.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG', '.ppm',
                    '.PPM'
                ]
                for ext in supportedExt:
                    start_idx = f.rfind(ext)
                    if start_idx >= 0 and start_idx + len(ext) == len(f):
                        return True
                return False

            n = 1
            for root, dirs, files in os.walk(self.folder_path, topdown=False):
                for file in files:
                    fullpath = os.path.join(self.folder_path, file)
                    if isImage(fullpath):
                        self.files.append(fullpath)
                        self.ids.append(str(n))  # just order them sequentially
                        n = n + 1

        self.N = len(self.files)
        print('DataLoaderRaw found ', self.N, ' images')

        self.iterator = 0
Пример #4
0
def main(params):
    net = getattr(resnet,
                  params['model'])()  #返回resnet的参数中的model属性,相当于model的初始化
    net.load_state_dict(
        torch.load(os.path.join(params['model_root'],
                                params['model'] + '.pth')))  #导入模型参数
    my_resnet = myResnet(net)
    my_resnet.cuda()  #移到GPU
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']
    N = len(imgs)

    seed(123)  # make reproducible

    #设定导出文件夹
    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    with h5py.File(os.path.join(dir_fc, 'feats_fc.h5')) as file_fc,\
         h5py.File(os.path.join(dir_att, 'feats_att.h5')) as file_att:
        for i, img in enumerate(imgs):
            # load the image 导入训练用的图像
            I = skimage.io.imread(
                os.path.join(params['images_root'], img['filepath'],
                             img['filename']))
            # handle grayscale input images
            if len(I.shape) == 2:  #如果只有两维的话增加一维
                I = I[:, :, np.newaxis]
                I = np.concatenate((I, I, I), axis=2)  #如果图像是二维的就变成三维

            #图片转化为灰度,归一化
            I = I.astype('float32') / 255.0
            I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
            I = Variable(preprocess(I), volatile=True)
            #图像通过网络,返回fc和att
            tmp_fc, tmp_att = my_resnet(I, params['att_size'])

            # write to hdf5
            #每幅图片 编号和特征,按照各自大小保存设置字典
            d_set_fc = file_fc.create_dataset(str(img['cocoid']), (2048, ),
                                              dtype="float")
            d_set_att = file_att.create_dataset(str(
                img['cocoid']), (params['att_size'], params['att_size'], 2048),
                                                dtype="float")
            #最后把每一个图像的特征保存为h5py文件
            d_set_fc[...] = tmp_fc.data.cpu().float().numpy()
            d_set_att[...] = tmp_att.data.cpu().float().numpy()

            if i % 1000 == 0:  #查看进度
                print('processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N))
        file_fc.close()
        file_att.close()
def main(params):
    net = getattr(resnet, params['model'])()
    net.load_state_dict(
        torch.load(os.path.join(params['model_root'],
                                params['model'] + '.pth')))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']
    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    for i, img in enumerate(imgs):
        # load the image
        I = skimage.io.imread(
            os.path.join(params['images_root'], img['filepath'],
                         img['filename']))
        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        if I.shape[0] > MAX_SIZE or I.shape[1] > MAX_SIZE:
            print('I shape', I.shape, img['filename'],
                  'resize to %d' % MAX_SIZE)
            if I.shape[0] > I.shape[1]:
                w = MAX_SIZE
                h = int(I.shape[1] * MAX_SIZE / I.shape[0])
            else:
                h = MAX_SIZE
                w = int(I.shape[0] * MAX_SIZE / I.shape[1])
            I = skimage.transform.resize(I, (w, h))
            print('after resize I.shape', I.shape)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        I = Variable(preprocess(I), volatile=True)
        tmp_fc, tmp_att = my_resnet(I, params['att_size'])
        # write to pkl
        np.save(os.path.join(dir_fc, str(img['cocoid'])),
                tmp_fc.data.cpu().float().numpy())
        np.savez_compressed(os.path.join(dir_att, str(img['cocoid'])),
                            feat=tmp_att.data.cpu().float().numpy())

        if i % 1000 == 0:
            print('processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N))
    print('wrote ', params['output_dir'])
    def __init__(self, opt):
        self.opt = opt
        self.coco_json = opt.get('coco_json', '')
        self.folder_path = opt.get('folder_path', '')

        self.batch_size = opt.get('batch_size', 1)
        self.seq_per_img = 1

        # Load resnet
        self.cnn_model = opt.get('cnn_model', 'resnet101')
        self.my_resnet = getattr(misc.resnet, self.cnn_model)()
        self.my_resnet.load_state_dict(torch.load('./data/imagenet_weights/'+self.cnn_model+'.pth'))
        self.my_resnet = myResnet(self.my_resnet)
        self.my_resnet.cuda()
        self.my_resnet.eval()



        # load the json file which contains additional information about the dataset
        print('DataLoaderRaw loading images from folder: ', self.folder_path)

        self.files = []
        self.ids = []

        print(len(self.coco_json))
        if len(self.coco_json) > 0:
            print('reading from ' + opt.coco_json)
            # read in filenames from the coco-style json file
            self.coco_annotation = json.load(open(self.coco_json))
            for k,v in enumerate(self.coco_annotation['images']):
                fullpath = os.path.join(self.folder_path, v['file_name'])
                self.files.append(fullpath)
                self.ids.append(v['id'])
        else:
            # read in all the filenames from the folder
            print('listing all images in directory ' + self.folder_path)
            def isImage(f):
                supportedExt = ['.jpg','.JPG','.jpeg','.JPEG','.png','.PNG','.ppm','.PPM']
                for ext in supportedExt:
                    start_idx = f.rfind(ext)
                    if start_idx >= 0 and start_idx + len(ext) == len(f):
                        return True
                return False

            n = 1
            for root, dirs, files in os.walk(self.folder_path, topdown=False):
                for file in files:
                    fullpath = os.path.join(self.folder_path, file)
                    if isImage(fullpath):
                        self.files.append(fullpath)
                        self.ids.append(str(n)) # just order them sequentially
                        n = n + 1

        self.N = len(self.files)
        print('DataLoaderRaw found ', self.N, ' images')

        self.iterator = 0
Пример #7
0
 def run(self):
     if self.cnn_model_path != 'no_cnn_get':
         self.my_resnet = getattr(
             misc.resnet, self.cnn_model)(num_classes=self.num_classes)
         cnn_model = torch.load(self.cnn_model_path)
         #self.my_resnet.load_state_dict(torch.load(self.cnn_model_path).state_dict())
         self.my_resnet.load_state_dict(torch.load(self.cnn_model_path))
         self.my_resnet = myResnet(self.my_resnet)
         self.my_resnet.cuda()
         self.my_resnet.eval()
         self.cnnSignal.emit(self.my_resnet)
Пример #8
0
def main(params):
    net = getattr(resnet, params['model'])()
    net.load_state_dict(
        torch.load(os.path.join(params['model_root'],
                                params['model'] + '.pth')))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']
    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    with h5py.File(os.path.join(dir_fc, 'feats_fc.h5')) as file_fc,\
         h5py.File(os.path.join(dir_att, 'feats_att.h5')) as file_att:
        for i, img in enumerate(imgs):
            # load the image
            I = skimage.io.imread(
                os.path.join(params['images_root'], img['filepath'],
                             img['filename']))
            # handle grayscale input images
            if len(I.shape) == 2:
                I = I[:, :, np.newaxis]
                I = np.concatenate((I, I, I), axis=2)

            I = I.astype('float32') / 255.0
            I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
            with torch.no_grad():
                I = Variable(preprocess(I))
                tmp_fc, tmp_att = my_resnet(I, params['att_size'])

            # write to hdf5
            d_set_fc = file_fc.create_dataset(str(img['cocoid']), (2048, ),
                                              dtype="float")
            d_set_att = file_att.create_dataset(str(
                img['cocoid']), (params['att_size'], params['att_size'], 2048),
                                                dtype="float")

            d_set_fc[...] = tmp_fc.cpu().float().numpy()
            d_set_att[...] = tmp_att.cpu().float().numpy()
            if i % 1000 == 0:
                print('processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N))
        file_fc.close()
        file_att.close()
Пример #9
0
def main(opt):
    jpg_path = opt.image_path
    out_dir = opt.out_dir
    model_path = opt.model

    net = getattr(resnet, 'resnet101')()
    net.load_state_dict(torch.load(model_path))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = []
    for subdir, dirs, files in os.walk(jpg_path):
        for f in files:
            f = f.strip()
            imgs.append(f)

    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = os.path.join(out_dir, opt.fc_dir)
    dir_att = os.path.join(out_dir, opt.att_dir)

    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    for i, img in enumerate(imgs):
        print(img)
        image_id = get_image_id(img)
        # load the image
        I = skimage.io.imread(os.path.join(jpg_path, img))

        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        I = Variable(preprocess(I), volatile=True)
        tmp_fc, tmp_att = my_resnet(I, 14)
        # write to pkl
        np.save(os.path.join(dir_fc, str(image_id)),
                tmp_fc.data.cpu().float().numpy())
        np.savez_compressed(os.path.join(dir_att, str(image_id)),
                            feat=tmp_att.data.cpu().float().numpy())

        if i % 1000 == 0:
            print('processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N))
Пример #10
0
def main(params):
    # net = torch.hub.load('pytorch/vision:v0.5.0', 'resnet101', pretrained=True)
    net = models.resnet101(pretrained=True)
    #net = getattr(resnet, params['model'])()
    #net.load_state_dict(torch.load(os.path.join(params['model_root'],params['model']+'.pth')))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']
    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)

    if not args.no_att:
        if not os.path.isdir(dir_att):
            os.mkdir(dir_att)

    for i, img in tqdm(enumerate(imgs), total=len(imgs)):
        # load the image
        I = skimage.io.imread(
            os.path.join(params['images_root'], img['filepath'],
                         img['filename']))
        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        I = preprocess(I)
        with torch.no_grad():
            tmp_fc, tmp_att = my_resnet(I, params['att_size'])
        # write to pkl
        np.save(os.path.join(dir_fc, str(img['cocoid'])),
                tmp_fc.data.cpu().float().numpy())
        if not args.no_att:
            np.savez_compressed(os.path.join(dir_att, str(img['cocoid'])),
                                feat=tmp_att.data.cpu().float().numpy())

        # if i % 1000 == 0:
        #   print('processing %d/%d (%.2f%% done)' % (i, N, i*100.0/N))
    print('wrote ', params['output_dir'])
Пример #11
0
def prepo_feats_init(params_p):
    global net, my_resnet, imgs, N, params
    params = params_p
    #terence edit end
    net = getattr(resnet, params['model'])()
    net.load_state_dict(
        torch.load(os.path.join(params['model_root'],
                                params['model'] + '.pth')))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()
    imgs = json.load(open(params['input_json_original'], 'r'))
    imgs = imgs['images']
    N = len(imgs)
    seed(123)  # make reproducible
Пример #12
0
def main(params):
    net = getattr(resnet, params['model'])()
    net.load_state_dict(
        torch.load(os.path.join(params['model_root'],
                                params['model'] + '.pth')))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']
    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    with h5py.File(os.path.join(dir_fc, 'feats_fc.h5')) as file_fc,\
         h5py.File(os.path.join(dir_att, 'feats_att.h5')) as file_att:
        for i, img in enumerate(tqdm(imgs)):
            I = skimage.io.imread(
                os.path.join(params['images_root'], img['filepath'],
                             img['filename']))

            if len(I.shape) == 2:
                I = skimage.color.gray2rgb(I)

            I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
            I = I.float() / 255.0

            I = normalize(I)
            I = Variable(I, volatile=True)

            tmp_fc, tmp_att = my_resnet(I, params['att_size'])

            # write to hdf5
            file_fc.create_dataset(str(img['cocoid']),
                                   data=tmp_fc.data.cpu().numpy(),
                                   dtype=np.float32)
            file_att.create_dataset(str(img['cocoid']),
                                    data=tmp_att.data.cpu().numpy(),
                                    dtype=np.float32)
Пример #13
0
def main(params):
  net = getattr(resnet, params['model'])() # from pytorch, insert model
  net.load_state_dict(torch.load(os.path.join(params['model_root'],params['model']+'.pth'))) # insert pretrained model
  my_resnet = myResnet(net) # to my understanding, this is used to output attribute layer
  my_resnet.cuda()
  my_resnet.eval()

  imgs = json.load(open(params['input_json'], 'r'))
  imgs = imgs['images']
  N = len(imgs)

  seed(123) # make reproducible

  dir_fc = params['output_dir']+'_fc'
  dir_att = params['output_dir']+'_att'
  if not os.path.isdir(dir_fc):
    os.mkdir(dir_fc)
  if not os.path.isdir(dir_att):
    os.mkdir(dir_att)

  with h5py.File(os.path.join(dir_fc, 'feats_fc.h5')) as file_fc,\
       h5py.File(os.path.join(dir_att, 'feats_att.h5')) as file_att:
    for i, img in enumerate(imgs):
      # load the image
      I = skimage.io.imread(os.path.join(params['images_root'], img['filepath'], img['filename']))
      # handle grayscale input images
      if len(I.shape) == 2:
        I = I[:,:,np.newaxis]
        I = np.concatenate((I,I,I), axis=2) # duplicate the grayscale image for three times

      I = I.astype('float32')/255.0
      I = torch.from_numpy(I.transpose([2,0,1])).cuda()
      I = Variable(preprocess(I), volatile=True)
      tmp_fc, tmp_att = my_resnet(I, params['att_size']) # get fc layer and att layer

      # write to hdf5
      d_set_fc = file_fc.create_dataset(str(img['cocoid']),  # cocoid is the file name of coco, or the index of image in pascal
        (2048,), dtype="float") # 2048 is the fc layer output
      d_set_att = file_att.create_dataset(str(img['cocoid']), 
        (params['att_size'], params['att_size'], 2048), dtype="float") # 14x14x2048 for attention

      d_set_fc[...] = tmp_fc.data.cpu().float().numpy()
      d_set_att[...] = tmp_att.data.cpu().float().numpy()
      if i % 1000 == 0:
        print('processing %d/%d (%.2f%% done)' % (i, N, i*100.0 / N))
    file_fc.close()
    file_att.close()
def main(params):
  net = getattr(resnet, params['model'])()
  import IPython
  IPython.embed()
  net.load_state_dict(torch.load('/datadrive/resnet_pretrianed_t7/'+params['model']+'.pth'))
  my_resnet = myResnet(net)
  my_resnet.cuda()
  my_resnet.eval()

  imgs = json.load(open(params['input_json'], 'r'))
  imgs = imgs['images']
  N = len(imgs)

  seed(123) # make reproducible

  dir_fc = params['output_dir']+'_fc'
  dir_att = params['output_dir']+'_att'
  if not os.path.isdir(dir_fc):
    os.mkdir(dir_fc)
  if not os.path.isdir(dir_att):
    os.mkdir(dir_att)

  for i,img in enumerate(imgs):
    # load the image

    if int(str(img['filename'])[-5]) % 5 != params['endding']:
        continue

    I = skimage.io.imread(os.path.join(params['images_root'], img['filepath'], img['filename']))
    # handle grayscale input images
    if len(I.shape) == 2:
      I = I[:,:,np.newaxis]
      I = np.concatenate((I,I,I), axis=2)

    I = I.astype('float32')/255.0
    I = torch.from_numpy(I.transpose([2,0,1])).cuda()
    I = Variable(preprocess(I), volatile=True)
    tmp_fc, tmp_att = my_resnet(I, params['att_size'])
    # write to pkl
    np.save(os.path.join(dir_fc, str(img['cocoid'])), tmp_fc.data.cpu().float().numpy())
    np.savez_compressed(os.path.join(dir_att, str(img['cocoid'])), feat=tmp_att.data.cpu().float().numpy())

    if i % 1000 == 0:
      print 'processing %d/%d (%.2f%% done)' % (i, N, i*100.0/N)
  print 'wrote ', params['output_dir']
Пример #15
0
def main(params):
    net = getattr(resnet, params['model'])()
    net.load_state_dict(torch.load(os.path.join(
        params['model_root'], params['model'] + '.pth')))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']
    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    for i, img in enumerate(imgs):
        # load the image
        I = skimage.io.imread(os.path.join(
            params['images_root'], img['filepath'], img['filename']))
        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        I = Variable(preprocess(I), volatile=True)
        tmp_fc, tmp_att = my_resnet(I, params['att_size'])
        # write to pkl
        np.save(os.path.join(dir_fc, str(
            img['cocoid'])), tmp_fc.data.cpu().float().numpy())
        np.savez_compressed(os.path.join(dir_att, str(
            img['cocoid'])), feat=tmp_att.data.cpu().float().numpy())

        if i % 1000 == 0:
            print('processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N))
    print('wrote ', params['output_dir'])
Пример #16
0
    def __init__(self):

        # Load resnet
        self.size = 100
        self.cnn_model =  'resnet101'
        self.my_resnet = getattr(misc.resnet, self.cnn_model)()
        self.my_resnet.load_state_dict(torch.load('./data/imagenet_weights/' + self.cnn_model + '.pth'))
        self.my_resnet = myResnet(self.my_resnet)
        self.my_resnet.cuda()
        self.my_resnet.eval()
        self.path = os.listdir('/mnt/poplin/tmp/nakamura_M1/MMexercise/dataset/tmp')
        self.path.sort()
        self.imgs = np.zeros((len(self.path),3,self.size,self.size),dtype = np.float32)
        self.tmp_atts = np.zeros((len(self.path),14,14,2048))
        self.tmp_fcs = np.zeros((len(self.path),2048))
        self.batch = 10
        i = 0
        for i in range(len(self.path)):
            name = '/mnt/poplin/tmp/nakamura_M1/MMexercise/dataset/tmp/' + self.path[i]
            img = cv2.imread(name)
            img = cv2.resize(img,(self.size,self.size))
            img = img.astype('float32') / 255.0
            img = img.transpose([2, 0, 1])
            self.imgs[i] += img
def main(params):
    data = json.load(open(params['input_json'], 'r'))
    #imgs = imgs['images']

    seed(123)  # make reproducible
    #shuffle(imgs)  # shuffle the order
    imgs = data["images"]
    prepro_captions(imgs)

    # create the vocab
    vocab = build_vocab(imgs, params)
    itow = {i + 1: w
            for i, w in enumerate(vocab)
            }  # a 1-indexed vocab translation table
    wtoi = {w: i + 1 for i, w in enumerate(vocab)}  # inverse table

    # done

    # assign the splits
    assign_splits(imgs, params)

    # encode captions in large arrays, ready to ship to hdf5 file
    L, label_start_ix, label_end_ix, label_length = encode_captions(
        imgs, params, wtoi)

    import misc.resnet as resnet
    resnet_type = 'resnet151'
    if resnet_type == 'resnet101':
        #resnet = resnet.resnet101()
        #resnet.load_state_dict(torch.load('resnet/resnet101.pth'))
        resnet = models.resnet101()

    else:
        resnet = resnet.resnet152()
        resnet.load_state_dict(torch.load('resnet/resnet152.pth'))
    my_resnet = myResnet(resnet)
    my_resnet.cuda()
    my_resnet.eval()

    # create output h5 file
    N = len(imgs)
    f_lb = h5py.File(params['output_h5'] + '_' + resnet_type + '_label.h5',
                     "w")
    f_fc = h5py.File(params['output_h5'] + '_' + resnet_type + '_fc.h5', "w")
    f_att = h5py.File(params['output_h5'] + '_' + resnet_type + '_att.h5', "w")
    f_lb.create_dataset("labels", dtype='uint32', data=L)
    f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix)
    f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix)
    f_lb.create_dataset("label_length", dtype='uint32', data=label_length)
    f_lb.close()

    exit()
    ### extract features
    dset_fc = f_fc.create_dataset("fc", (N, 2048), dtype='float32')
    dset_att = f_att.create_dataset("att", (N, 14, 14, 2048), dtype='float32')
    for i, img in enumerate(imgs):
        # load the image
        real_path = img['filepath'] + "/" + img['filename']
        I = skimage.io.imread(
            os.path.join(params['images_root'] + "/",
                         real_path))  # note the path
        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        I = Variable(preprocess(I), volatile=True)
        tmp_fc, tmp_att = my_resnet(I)
        # write to h5
        dset_fc[i] = tmp_fc.data.cpu().float().numpy()
        dset_att[i] = tmp_att.data.cpu().float().numpy()
        if i % 1000 == 0:
            print 'processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N)
    f_fc.close()
    f_att.close()
    print 'wrote ', params['output_h5']

    # create output json file
    out = {}
    out['ix_to_word'] = itow  # encode the (1-indexed) vocab
    out['images'] = []
    for i, img in enumerate(imgs):

        jimg = {}
        jimg['split'] = img['split']
        if 'filepath' in img:
            jimg['filepath'] = img['filepath']  # copy it over, might need
        if 'id' in img:
            jimg['id'] = img[
                'id']  # copy over & mantain an id, if present (e.g. coco ids, useful)

        out['images'].append(jimg)

    json.dump(out, open(params['output_json'], 'w'))
    print 'wrote ', params['output_json']
Пример #18
0
def main(params):
    assert params['feature_type'] in ['fc', 'conv', 'both']
    compute_fc = params['feature_type'] == 'fc' or params[
        'feature_type'] == 'both'
    compute_conv = params['feature_type'] == 'conv' or params[
        'feature_type'] == 'both'

    net = getattr(resnet, params['model'])()
    net.load_state_dict(
        torch.load(os.path.join(params['model_root'],
                                params['model'] + '.pth')))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    if compute_fc:
        dir_fc = os.path.join(params['out_dir'], 'fc')
        if not os.path.exists(dir_fc):
            os.makedirs(dir_fc)
    if compute_conv:
        dir_conv = os.path.join(params['out_dir'], 'conv')
        if not os.path.exists(dir_conv):
            os.makedirs(dir_conv)

    for split in ['train', 'val', 'test']:
        count = 0
        if compute_fc and not os.path.exists(os.path.join(dir_fc, split)):
            os.makedirs(os.path.join(dir_fc, split))
        if compute_conv and not os.path.exists(os.path.join(dir_conv, split)):
            os.makedirs(os.path.join(dir_conv, split))

        files = glob.glob("{}/{}/*.jpg".format(params['img_dir'], split))
        start = time.time()
        for file in files:
            count += 1
            basename = os.path.basename(file)
            img_id = splitext(basename)[0]
            try:
                I = imread(file)
            except:
                I = np.zeros((224, 224, 3), 'float32')

            # handle grayscale input frames
            if len(I.shape) == 2:
                I = I[:, :, np.newaxis]
                I = np.concatenate((I, I, I), axis=2)

            I = I.astype('float32') / 255.0
            I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
            I = Variable(preprocess(I), volatile=True)
            tmp_fc, tmp_conv = my_resnet(I, params['att_size'])

            # write to pkl
            if compute_fc:
                np.save(os.path.join(dir_fc, split, img_id),
                        tmp_fc.data.cpu().float().numpy())
            if compute_conv:
                np.savez_compressed(os.path.join(dir_conv, split, img_id),
                                    tmp_conv.data.cpu().float().numpy())

            if count % 100 == 0:
                print(
                    'processing {} set -- {}/{} {:.3}%, time used: {}s'.format(
                        split, count, len(files), count * 100.0 / len(files),
                        time.time() - start))
                start = time.time()
def main(params):
  
  net = getattr(resnet, params['model'])()
  net.load_state_dict(torch.load(os.path.join(params['model_root'],params['model']+'.pth')))
  my_resnet = myResnet(net)
  my_resnet.cuda()
  my_resnet.eval()
  

  # You should set which inception net you want to use.
  net_name = "inceptionv4"
  
  inception_net = None
  
  if(net_name == "inceptionv4"):
    inception_net = inceptionv4()
  elif (net_name == "inceptionresnetv2"):
    inception_net = inceptionresnetv2()
  
  else:
    print("Invalid inception net name")
    return(-1)  
  
  inception_net.cuda()
  inception_net.eval()

  print("Done loading inception")
  imgs = json.load(open(params['input_json'], 'r'))
  imgs = imgs['images']
  N = len(imgs)

  seed(123) # make reproducible

  # This program will not use fc
  #dir_fc = params['output_dir']+'_fc'
  dir_att = params['output_dir']+'_att'
  
  #if not os.path.isdir(dir_fc):
  #  os.mkdir(dir_fc)
  
  if not os.path.isdir(dir_att):
    os.mkdir(dir_att)

  for i,img in enumerate(imgs):
    if True:
      #print(i)
      #print(img['cocoid'])
      # load the image
      I = skimage.io.imread(os.path.join(params['images_root'], img['filepath'], img['filename']))
      
      # images that should be resized
      # 99907, 100398 (187714)
      if((i==99907) or (i == 100398)):
        I = cv2.resize(I, None,fx=2, fy=2)
      #print(I.shape)
      # handle grayscale input images
      if len(I.shape) == 2:
        I = I[:,:,np.newaxis]
        I = np.concatenate((I,I,I), axis=2)

      I = I.astype('float32')/255.0
      I = torch.from_numpy(I.transpose([2,0,1])).cuda()
      I = preprocess(I)
      #print(I.shape)
      with torch.no_grad():
        #tmp_fc, tmp_att = my_resnet(I, params['att_size'])
        try:
          tmp_fc, tmp_att = inception_net(I)
        except:
          print("error with this image")
          print(i)
          print(I.shape)
      # write to pkl
      #np.save(os.path.join(dir_fc, str(img['cocoid'])), tmp_fc.data.cpu().float().numpy())
      np.savez_compressed(os.path.join(dir_att, str(img['cocoid'])), feat=tmp_att.data.cpu().float().numpy())

    if i % 1000 == 0:
      print('processing %d/%d (%.2f%% done)' % (i, N, i*100.0/N))
  print('wrote ', params['output_dir'])
Пример #20
0
    loader = DataLoader(opt)
else:
    loader = DataLoaderRaw({
        'folder_path': opt.image_folder,
        'coco_json': opt.coco_json,
        'batch_size': opt.batch_size
    })
    loader.ix_to_word = infos['vocab']

# fc_feats = np.random.rand(1, 2048)
# att_feats = np.random.rand(1, 14, 14, 2048)
# compute features given a new image
net = getattr(resnet, 'resnet101')()
net.load_state_dict(
    torch.load('/datadrive/resnet_pretrianed_t7/resnet101.pth'))
my_resnet = myResnet(net)
my_resnet.cuda()
my_resnet.eval()

# filename = 'coco_test.jpg'
import wget
url = opt.demo_image
filename = wget.download(url)
# filename = opt.demo_image

I = skimage.io.imread(filename)
if len(I.shape) == 2:
    I = I[:, :, np.newaxis]
    I = np.concatenate((I, I, I), axis=2)
if I.shape[2] >= 3:
    I = I[:, :, :3]
Пример #21
0
def main(opt):
    params = vars(opt)
    sys.path.append('/home/nakamura/project/selfsequential')
    sys.path.append('/home/nakamura/project/python3_selfsequential')
    net = getattr(resnet, params['model'])()
    net.load_state_dict(torch.load(os.path.join(params['model_root'], params['model'] + '.pth')))
    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']

    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'

    bu_infos = json.load(open(opt.bu_info))

    for num, bu_info in enumerate(bu_infos):
        id = bu_info['id']

        if os.path.exists(opt.image_root + '/' + str(id) + '.jpg'):
            I = skimage.io.imread(opt.image_root + '/' + str(id) + '.jpg')
        else:
            I = skimage.io.imread(opt.image_root + '_2/' + str(id) + '.jpg')

        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        rate = I.shape[1] / bu_info['shape'][1]

        I = I.astype('float32') / 255.0

        boxes = bu_info['box']
        dets = boxes.astype(np.int32)

        att_feats = np.zeros((36, 2048))

        for k, region in enumerate(dets):
            I_ = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
            I_ = preprocess(I_)

            if np.ceil((region[3]) * rate) - I.shape[0] > 0 and np.ceil((region[2]) * rate) - I.shape[1] > 0:
                I__ = I_ * 0.0
                print('zero')
            else:
                y_0 = int(np.floor(region[1] * rate))
                y_1 = int(np.ceil((region[3]) * rate))
                x_0 = int(np.floor(region[0] * rate))
                x_1 = int(np.ceil((region[2]) * rate))
                I__ = I_[:, y_0:y_1, x_0:x_1]

            try:
                with torch.no_grad():
                    tmp_fc, tmp_att = my_resnet(I__, params['att_size'])
            except RuntimeError:
                I__ = I_ * 0.0
                with torch.no_grad():
                    tmp_fc, tmp_att = my_resnet(I__, params['att_size'])
            att_feats[k] += tmp_fc.data.cpu().float().numpy()

        np.savez_compressed(os.path.join(dir_att, str(id)), feat=att_feats)

        if num % 1000 == 0:
            print('processing %d/%d (%.2f%% done)' % (num, len(bu_infos), num * 100.0 / len(bu_infos)))
Пример #22
0
def main(params):
    net = getattr(resnet, params['model'])()
    net.load_state_dict(
        torch.load(os.path.join(params['model_root'],
                                params['model'] + '.pth')))
    my_resnet = myResnet(net)
    # print(my_resnet.state_dict())
    # pdb.set_trace()
    my_resnet.cuda()
    # my_resnet.half()
    my_resnet.eval()

    # imgs = json.load(open(params['input_json'], 'r'))
    # imgs = imgs['images']
    # N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    img_dir = params['input_img_dir']
    filenames = []
    for (dirpath, dirnames, filenames) in os.walk(img_dir):
        # f.extend(filenames)
        break

    N = len(filenames)

    for i, img in enumerate(tqdm.tqdm(filenames)):
        # load the image
        try:
            I = skimage.io.imread(os.path.join(img_dir, img))
        except Exception as e:
            print('Removing {} due {}', os.path.join(img_dir, img), e)
            # os.remove(os.path.join(img_dir, img))
            continue
        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        # I = torch.from_numpy(I.transpose([2,0,1]))
        I = Variable(preprocess(I), volatile=True)
        try:
            # for obj in gc.get_objects():
            # if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
            # print(type(obj), obj.size())

            tmp_fc, tmp_att = my_resnet(I, params['att_size'])
        except Exception as e:
            print('Removing {} due {}'.format(os.path.join(img_dir, img), e))
            # os.remove(os.path.join(img_dir, img))
            continue
        # write to pkl
        np.save(os.path.join(dir_fc, str(img)),
                tmp_fc.data.cpu().float().numpy())
        np.savez_compressed(os.path.join(dir_att, str(img)),
                            feat=tmp_att.data.cpu().float().numpy())

        if i % 1000 == 0:
            print('processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N))
    print('wrote ', params['output_dir'])
Пример #23
0
    def __init__(self,
                 is_relative=True,
                 model_path=None,
                 image_feat_params=None):
        opt = object()
        # inputs specific to shoe dataset
        infos_path = os.path.join(model_path, 'infos_best.pkl')
        model_name = os.path.join(model_path, 'model_best.pth')

        opt.infos_path = infos_path
        opt.model = model_name
        opt.beam_size = 1
        opt.load_resnet = False

        with open(opt.infos_path, 'rb') as f:
            infos = cPickle.load(f)
            # KS: infos is a dictionary; among others, it has a field split_ix,
            # which has the form {'train': [], 'val': [], 'test': []}
            # train_ix = infos["split_ix"]["train"]
            # val_ix = infos["split_ix"]["val"]
            # test_ix = infos["split_ix"]["test"]
            # print(len(train_ix), len(val_ix), len(test_ix))

        # override and collect parameters
        if len(opt.input_fc_dir) == 0:
            opt.input_fc_dir = infos['opt'].input_fc_dir
            opt.input_att_dir = infos['opt'].input_att_dir
            opt.input_label_h5 = infos['opt'].input_label_h5
        if len(opt.input_json) == 0:
            opt.input_json = infos['opt'].input_json
        if opt.batch_size == 0:
            opt.batch_size = infos['opt'].batch_size
        if len(opt.id) == 0:
            opt.id = infos['opt'].id
        ignore = [
            "id", "batch_size", "beam_size", "start_from", "language_eval",
            "model"
        ]
        for k in vars(infos['opt']).keys():
            if k not in ignore:
                if k in vars(opt):
                    assert vars(opt)[k] == vars(
                        infos['opt'])[k], k + ' option not consistent'
                else:
                    vars(opt).update({k: vars(infos['opt'])[k]
                                      })  # copy over options from model

        vocab = infos['vocab']  # ix -> word mapping

        # Setup the model
        model = models.setup(opt)

        # remap to CPU, even if checkpoint was saved from GPU
        state_dict = torch.load(opt.model,
                                map_location=lambda storage, location: storage)
        model.load_state_dict(state_dict)
        model.to(DEVICE)

        model.eval()

        self.is_relative = is_relative
        self.model = model
        self.vocab = vocab
        self.opt = vars(opt)

        if opt.load_resnet:
            net = getattr(resnet, image_feat_params['model'])()
            net.load_state_dict(
                torch.load(
                    os.path.join(image_feat_params['model_root'],
                                 image_feat_params['model'] + '.pth')))
            my_resnet = myResnet(net)
            my_resnet.to(DEVICE)
            my_resnet.eval()

            my_resnet_batch = ResNetBatch(net)
            my_resnet_batch.to(DEVICE)

            self.my_resnet_batch = my_resnet_batch
            self.my_resnet = my_resnet
        self.att_size = image_feat_params['att_size']
Пример #24
0
def main(params):
    if params['vocab_size']:
        net = getattr(resnet, params['model'])(pretrained=False,
                                               vocab_size=params['vocab_size'])
    else:
        net = getattr(resnet, params['model'])()

    net.load_state_dict(
        torch.load(os.path.join(params['model_root'],
                                params['model'] + '.pth')))
    # Or load using network
    # if params['model'] == 'resnet101':
    #   net.load_state_dict(models.resnet101(pretrained=True))
    # elif params['model'] == 'resnet152':
    #   net.load_state_dict(models.resnet152(pretrained=True))

    my_resnet = myResnet(net)
    my_resnet.cuda()
    my_resnet.eval()

    imgs = json.load(open(params['input_json'], 'r'))
    imgs = imgs['images']
    N = len(imgs)

    seed(123)  # make reproducible

    dir_fc = params['output_dir'] + '_fc'
    dir_att = params['output_dir'] + '_att'
    if not os.path.isdir(dir_fc):
        os.mkdir(dir_fc)
    if not os.path.isdir(dir_att):
        os.mkdir(dir_att)

    for i, img in enumerate(imgs):
        # load the image
        ''' skimage
    I = skimage.io.imread(
            os.path.join(
                params['images_root'], 
                img['filepath'], 
                img['filename']))
    # handle grayscale input images
    if len(I.shape) == 2:
      I = I[:,:,np.newaxis]
      I = np.concatenate((I,I,I), axis=2)

    I = resize(I, (224, 224, 3), anti_aliasing=True)
    I = I.astype('float32')/255.0
    I = torch.from_numpy(I.transpose([2,0,1])).cuda()
    '''
        I = Image.open(
            os.path.join(params['images_root'], params['images_path'],
                         img['file_name'])).convert('RGB')
        I = preprocess(I).cuda()
        with torch.no_grad():
            tmp_fc, tmp_att = my_resnet(I, params['att_size'],
                                        params['visual_concepts'])
        # write to pkl
        np.save(os.path.join(dir_fc, str(img['id'])),
                tmp_fc.data.cpu().float().numpy())
        np.savez_compressed(os.path.join(dir_att, str(img['id'])),
                            feat=tmp_att.data.cpu().float().numpy())

        if i % 1000 == 0:
            print('processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N))

        # break

    print('wrote ', params['output_dir'])
Пример #25
0
def main(args):
    # Load vocabulary wrapper
    with open(cfg.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    cfg.vocab_size = len(vocab)

    if args.raw_image == "":
        img_path = load_from_tiny(args.idx)
    else:
        img_path = args.raw_image

    print(img_path)
    img = skimage.io.imread(img_path)
    img = img.astype('float32') / 255.0
    img = torch.from_numpy(img.transpose([2, 0, 1]))
    img = preprocess(img)
    img = img.cuda()

    # Build models
    my_resnet = getattr(misc.resnet, 'resnet101')()
    my_resnet.load_state_dict(torch.load(
        './data/imagenet_weights/resnet101.pth'))
    my_resnet = myResnet(my_resnet)
    my_resnet.eval()
    my_resnet.cuda()

    cfg.use_cuda = True
    cfg.id = args.id
    cfg.start_from = args.start_from
    cfg.max_seq_length = args.max_seq_length
    cfg.caption_model = args.caption_model
    decoder = models.setup(cfg)
    decoder.cuda()
    decoder.eval()

    with torch.no_grad():
        feature, att_feats = my_resnet(img)
    feature = feature.unsqueeze(0)
    att_feats = att_feats.view(-1, att_feats.size(-1))
    att_feats = att_feats.unsqueeze(0)
    print(att_feats.size())

    # tree = decoder.greedy_search(feature, vocab, args.max_seq_length)
    # init_state, init_logprobs = decoder.beam_from_scratch(feature)
    # candidates = decoder.beam_search(init_state, init_logprobs, 5, 5)
    # candidates = decoder.beam_test(feature, vocab, 5, 5)
    print("greedy search:\n")
    # word_idx, father_idx, mask = decoder._greedy_search(feature, att_feats, vocab, args.max_seq_length)
    word_idx, father_idx, mask, seqLogprobs = decoder._sample(feature, att_feats, args.max_seq_length)
    print(word_idx)
    print(father_idx)
    ratio = utils.seq2ratio(father_idx, mask)
    print(mask)
    mask = torch.cat([mask.new(mask.size(0), 1).fill_(1), mask[:, :-1]], 1)
    print(mask)
    print(ratio)
    exit(0)

    words = [vocab.idx2word[word_idx[0][i].item()] for i in range(word_idx.size(1))]
    for i in range(word_idx.size(1)):
        print(i, word_idx[0][i].item(), words[i], father_idx[0][i].item(), mask[0][i].item())
    print(words)
    sents = utils.decode_sequence(vocab, word_idx, father_idx, mask)
    print(mask)
    print(sents)
    exit(0)
    logprob = [_.logprob for _ in tree.nodes.values()]
    logprob = reduce(lambda x, y: x + y, logprob)
    print("logprob: {}".format(logprob / len(tree.nodes) ** 0.8))
    print(tree.root.lex)
    print(tree.__str__())
    # with open('gs.dot', 'w') as f:
    #     f.write(tree.graphviz())
    # with open('gs_logprob.dot', 'w') as f:
    #     f.write(tree.graphviz_info())

    return

    print("\nbeam search:\n")
    candidates, completed_sentences = decoder.beam_search(feature, vocab, args.max_seq_length, args.global_beam_size,
                                                          args.local_beam_size)

    for cs in completed_sentences:
        cs.logprob /= len(cs.nodes) ** 2.0
    completed_sentences = sorted(completed_sentences, key=lambda x: -x.logprob)
    candidates = sorted(candidates, key=lambda x: -x.logprob)

    with open('bs_0.dot', 'w') as f:
        f.write(completed_sentences[0].graphviz())

    with open('bs_1.dot', 'w') as f:
        f.write(completed_sentences[1].graphviz())

    print("\ncompleted sentences:")
    for candidate in completed_sentences:
        print("logprob: {}, sentence: {}".format(candidate.logprob, candidate.__str__()))
        # comment here for checking
        # logprob = [_.logprob for _ in candidate.nodes.values()]
        # print("logprob from every node: {}".format(reduce(lambda x, y: x + y, logprob)))
    print("\npartial sentences:")
    for candidate in candidates:
        print("logprob: {}, sentence: {}".format(candidate.logprob, candidate.__str__()))
Пример #26
0
from torchvision import transforms as trn

preprocess = trn.Compose([
    # trn.ToPILImage(),
    # trn.Scale(256),
    # trn.ToTensor(),
    trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

from misc.resnet_utils import myResnet

resnet = models.resnet101()
resnet.load_state_dict(
    torch.load('/home-nfs/rluo/rluo/model/resnet101-5d3b4d8f.pth'))
my_resnet = myResnet(resnet)
my_resnet.cuda()
my_resnet.eval()


def build_vocab(imgs, params):
    count_thr = params['word_count_threshold']

    # count up the number of words
    counts = {}
    for img in imgs:
        for sent in img['sentences']:
            for w in sent['tokens']:
                counts[w] = counts.get(w, 0) + 1
    cw = sorted([(count, w) for w, count in counts.iteritems()], reverse=True)
    print 'top words and their counts:'
def main(params):
    data = json.load(open(params['input_json'], 'r'))
    #imgs = imgs['images']

    seed(123)  # make reproducible
    #shuffle(imgs)  # shuffle the order
    imgs = data["images"]
    prepro_captions(imgs)

    # create the vocab
    vocab = build_vocab(imgs, params)
    itow = {i + 1: w for i, w in enumerate(vocab)}  # a 1-indexed vocab translation table
    wtoi = {w: i + 1 for i, w in enumerate(vocab)}  # inverse table

    # done 

    # assign the splits
    assign_splits(imgs, params)

    # encode captions in large arrays, ready to ship to hdf5 file
    L, label_start_ix, label_end_ix, label_length = encode_captions(imgs, params, wtoi)

    
    import misc.resnet as resnet
    resnet_type = 'resnet151'
    if resnet_type == 'resnet101':
        resnet = resnet.resnet101()
        resnet.load_state_dict(torch.load('resnet/resnet101.pth'))
    else:
        resnet = resnet.resnet152()
        resnet.load_state_dict(torch.load('resnet/resnet152.pth'))
    my_resnet = myResnet(resnet)
    my_resnet.cuda()
    my_resnet.eval()

    # create output h5 file
    N = len(imgs)
    f_lb = h5py.File(params['output_h5'] + '_'+ resnet_type +'_label.h5', "w")
    f_fc = h5py.File(params['output_h5'] + '_'+ resnet_type +'_fc.h5', "w")
    f_att = h5py.File(params['output_h5'] + '_'+ resnet_type +'_att.h5', "w")
    f_lb.create_dataset("labels", dtype='uint32', data=L)
    f_lb.create_dataset("label_start_ix", dtype='uint32', data=label_start_ix)
    f_lb.create_dataset("label_end_ix", dtype='uint32', data=label_end_ix)
    f_lb.create_dataset("label_length", dtype='uint32', data=label_length)
    f_lb.close()

    #exit()
    ### extract features
    dset_fc = f_fc.create_dataset("fc", (N, 2048), dtype='float32')
    dset_att = f_att.create_dataset("att", (N, 14, 14, 2048), dtype='float32')
    for i, img in enumerate(imgs):
        # load the image
        real_path = img['filepath'] + "/" + img['filename']
        I = skimage.io.imread(os.path.join(params['images_root'],real_path))  # note the path 
        # handle grayscale input images
        if len(I.shape) == 2:
            I = I[:, :, np.newaxis]
            I = np.concatenate((I, I, I), axis=2)

        I = I.astype('float32') / 255.0
        I = torch.from_numpy(I.transpose([2, 0, 1])).cuda()
        I = Variable(preprocess(I), volatile=True)
        tmp_fc, tmp_att = my_resnet(I)
        # write to h5
        dset_fc[i] = tmp_fc.data.cpu().float().numpy()
        dset_att[i] = tmp_att.data.cpu().float().numpy()
        if i % 1000 == 0:
            print 'processing %d/%d (%.2f%% done)' % (i, N, i * 100.0 / N)
    f_fc.close()
    f_att.close()
    print 'wrote ', params['output_h5']

    # create output json file
    out = {}
    out['ix_to_word'] = itow  # encode the (1-indexed) vocab
    out['images'] = []
    for i, img in enumerate(imgs):

        jimg = {}
        jimg['split'] = img['split']
        if 'filepath' in img: jimg['filepath'] = img['filepath']  # copy it over, might need
        if 'id' in img: jimg['id'] = img['id']  # copy over & mantain an id, if present (e.g. coco ids, useful)

        out['images'].append(jimg)

    json.dump(out, open(params['output_json'], 'w'))
    print 'wrote ', params['output_json']
Пример #28
0
    def __init__(self,
                 is_relative=True,
                 model_path=None,
                 image_feat_params=None):
        opt = object()
        # inputs specific to shoe dataset
        infos_path = os.path.join(model_path, 'infos_best.pkl')
        model_name = os.path.join(model_path, 'model_best.pth')

        opt.infos_path = infos_path
        opt.model = model_name
        opt.beam_size = 1
        opt.load_resnet = False

        with open(opt.infos_path, 'rb') as f:
            infos = cPickle.load(f)

        # override and collect parameters
        if len(opt.input_fc_dir) == 0:
            opt.input_fc_dir = infos['opt'].input_fc_dir
            opt.input_att_dir = infos['opt'].input_att_dir
            opt.input_label_h5 = infos['opt'].input_label_h5
        if len(opt.input_json) == 0:
            opt.input_json = infos['opt'].input_json
        if opt.batch_size == 0:
            opt.batch_size = infos['opt'].batch_size
        if len(opt.id) == 0:
            opt.id = infos['opt'].id
        ignore = [
            "id", "batch_size", "beam_size", "start_from", "language_eval",
            "model"
        ]
        for k in vars(infos['opt']).keys():
            if k not in ignore:
                if k in vars(opt):
                    assert vars(opt)[k] == vars(
                        infos['opt'])[k], k + ' option not consistent'
                else:
                    vars(opt).update({k: vars(infos['opt'])[k]
                                      })  # copy over options from model

        vocab = infos['vocab']  # ix -> word mapping

        # Setup the model
        model = models.setup(opt)
        if torch.cuda.is_available():
            model.load_state_dict(torch.load(opt.model))
            model.cuda()
        else:
            model.load_state_dict(
                torch.load(opt.model, map_location={'cuda:0': 'cpu'}))

        model.eval()

        self.is_relative = is_relative
        self.model = model
        self.vocab = vocab
        self.opt = vars(opt)

        if opt.load_resnet:
            net = getattr(resnet, image_feat_params['model'])()
            net.load_state_dict(
                torch.load(
                    os.path.join(image_feat_params['model_root'],
                                 image_feat_params['model'] + '.pth')))
            my_resnet = myResnet(net)
            if torch.cuda.is_available():
                my_resnet.cuda()
            my_resnet.eval()

            my_resnet_batch = ResNetBatch(net)
            if torch.cuda.is_available():
                my_resnet_batch.cuda()

            self.my_resnet_batch = my_resnet_batch
            self.my_resnet = my_resnet
        self.att_size = image_feat_params['att_size']
Пример #29
0
                              })  # copy over options from model

vocab = infos['vocab']  # ix -> word mapping

model = models.setup(opt)
if use_cuda == False:
    model.load_state_dict(torch.load(opt.model, map_location='cpu'))
else:
    model.load_state_dict(torch.load(opt.model))
    model.cuda()
model.eval()

my_resnet = getattr(misc.resnet, 'resnet101')()
my_resnet.load_state_dict(
    torch.load('./data/imagenet_weights/' + 'resnet101' + '.pth'))
my_resnet = myResnet(my_resnet)
if use_cuda:
    my_resnet.cuda()
my_resnet.eval()
batch_size = 1
info_struct = {}
info_struct['id'] = 0
info_struct['file_path'] = ''
infos = []
infos.append(info_struct)
data = {}
data['bounds'] = {'it_pos_now': 0, 'it_max': 1, 'wrapped': True}
data['infos'] = infos

img = skimage.io.imread(opt.image)
#img = skimage.io.imread('silicon_test_images/cellphone.jpg')