def prepare_for_rec(out_dir):

    meta = {}

    meanImg = readAndResize('/database/test.jpg',SIZE, CHANNELS)
    meanImgstd = readAndResize('/database/test.jpg', SIZE_STD, CHANNELS)

    meta['data_mean'] = meanImg
    meta['data_mean_std'] = meanImgstd
    util.pickle(os.path.join(out_dir,"batches.meta"), meta)
    

    trainingMeta = util.unpickle(os.path.join(out_dir, "trainingMeta.meta"))

    testMeta = util.unpickle(os.path.join(out_dir, "testMeta.meta"))

    print "prepare for training"
    random.shuffle(trainingMeta)
    make_list_batches(trainingMeta,out_dir,NUM_PER_PATCH)



    #for test
    print "prepare for test"
    random.shuffle(testMeta)
    make_list_batches(trainingMeta,out_dir,NUM_PER_PATCH,8000)
def collectImgByClass(inFolder, outFolder):
    subFolderList = os.listdir(inFolder)
    list.sort(subFolderList)
    for index, foldername in enumerate(subFolderList):
        print foldername
        dataCol = collectOneClass(os.path.join(inFolder, foldername))
        util.pickle(outFolder +"/"+ foldername, dataCol)
Exemplo n.º 3
0
def processTest(test_list, out_dir, startIdx):
    global NUM_PER_PATCH
    meta = util.unpickle(os.path.join(out_dir, "batches.meta"))
    allLabels = meta['label_names']


    fileList = open(test_list,'rb').readlines()
    random.shuffle(fileList)

    print "####### Got %d classes ######" % len(allLabels)
    print "####### Got %d images ######" % len(fileList)

    numImg = len(fileList)
    numBatches = numImg / NUM_PER_PATCH # the last batch keep the remainder
    if numImg % NUM_PER_PATCH != 0:
        numBatches += 1

    print 'Going to make %d baches' % numBatches
    for idx_batch in range(numBatches):
        #        if idx_batch < numBatches - 2:
        #            continue
        print "### Making the %dth batch ###" % idx_batch
        b_start = NUM_PER_PATCH * idx_batch
        b_end = NUM_PER_PATCH * (idx_batch + 1)
        if idx_batch == numBatches - 1:
            b_start = numImg - NUM_PER_PATCH
            b_end = numImg
        batchMeta = fileList[b_start:b_end]

        data, labels, imgnames = getBatch(batchMeta,allLabels)
        out_fname = os.path.join(out_dir, "data_batch_%04d" % (idx_batch+startIdx))
        print "saving to %s" % out_fname
        util.pickle(out_fname, {'data':data, 'labels':labels, 'images':imgnames})
def makeBatches(allImgMeta, out_dir, batchSize, startIdx = 0):
    numImg = len(allImgMeta)
    numBatches = numImg / batchSize # the last batch keep the remainder
    if numImg % batchSize != 0:
        numBatches += 1

    print 'Going to make %d baches' % numBatches
    for idx_batch in range(numBatches):
        #        if idx_batch < numBatches - 2:
        #            continue
        print "### Making the %dth batch ###" % idx_batch
        b_start = batchSize * idx_batch
        b_end = batchSize * (idx_batch + 1)
        if idx_batch == numBatches - 1:
            b_start = numImg - batchSize
            b_end = numImg
        batchMeta = allImgMeta[b_start:b_end]
        data, labels = getBatch(batchMeta)
        labels1 = labels//2
        #labels2 = labels//3
        #labels3 = labels//4

        out_fname = os.path.join(out_dir, "data_batch_%04d" % (idx_batch+startIdx))
        print "saving to %s" % out_fname
        util.pickle(out_fname, {'data':data, 'labels':labels, 'data1':data,} 'labels1':lablels1)
Exemplo n.º 5
0
def prepareTest(Test_dir, stdImgfolder, out_dir, meanImg_dir, startIdx):
    global NUM_PER_PATCH
  
    allImgMeta, allLabels = collectAndShuffle(Test_dir, stdImgfolder)
  
    makeBatches(allImgMeta, out_dir, NUM_PER_BATCH, startIdx)
    out_file = out_dir + "/imglist"
    util.pickle(out_file, allImgMeta)
Exemplo n.º 6
0
def collectImgByName(InputFolder, outFolder):
    flist = os.listdir(InputFolder)
    flist = [os.path.join(InputFolder, fname) for fname in flist]
    for  foldername in flist:
        print foldername
        dataCol = collectOneClass(foldername,SIZE,CHANNELS)
        ind = foldername.rfind("/")
        folder = foldername[ind+1:]
        util.pickle(outFolder +"/"+ folder, dataCol)
Exemplo n.º 7
0
    def create_model(self, model_class=LinearSVC, save=True):
        self.model = model_class()
        start = time()
        self.model.fit(self.x_train, self.y_train)
        log('Training time : {:.0f}s'.format(time() - start))
        log('Test accuracy : {:.2f}%'.format(100 * self.model.score(self.x_test, self.y_test)))

        if save:
            log('Pickling model')
            pickle(self.model, self.parameters["pickle_model"])
Exemplo n.º 8
0
def doGetMeanImgStd(stdImgfolder, meanImgFolder):
    global SIZE_STD, CHANNELS , SIZE
    #print SIZE_STD
    dataCol = collectOneClass(stdImgfolder,SIZE_STD,CHANNELS)
    dataSum = np.sum(dataCol, axis=0, dtype = np.float64)
    globalSum = dataSum
    globalCount = dataCol.shape[0]
    meanImg = globalSum / globalCount 
    #print globalSum
    #meanImg, info=getMeanImg(stdImgfolder)    
    util.pickle(meanImgFolder+"/meanImgStd", meanImg)
def prepareTrain(folderCls, imgStdCls, meanImg_dir, out_dir):
    global NUM_PER_BATCH
    
    meanImg = util.unpickle(meanImg_dir + '/meanImg')
    meanImgStd = util.unpickle(meanImg_dir + '/meanImgStd')
    allImgMeta = collectAndShuffle(folderCls, imgStdCls)
    meta = {}
    meta['data_mean'] = meanImg
    meta['data_mean_std'] = meanImgStd
    util.pickle( os.path.join(out_dir, "batches.meta"), meta)

    makeBatches(allImgMeta, out_dir, NUM_PER_BATCH)
    out_file = out_dir + "/imglist"
    util.pickle(out_file, allImgMeta)
Exemplo n.º 10
0
def prepareTrain(train_dir, out_dir, meanImg_dir):
    global NUM_PER_PATCH
    #train_dir = "/data1/LSVRC2010/train"
    #out_dir = "/data2/ILSVRC2010/train_batches"
    
    meanImg = util.unpickle(meanImg_dir + '/meanImg')
    allImgMeta, allLabels = collectAndShuffle(train_dir)
    meta = {}
    meta['data_mean'] = meanImg
    meta['label_names'] = allLabels
    util.pickle( os.path.join(out_dir, "batches.meta"), meta)

    makeBatches(allImgMeta, out_dir, NUM_PER_PATCH)
    out_file = out_dir + "/imglist"
    util.pickle(out_file, [allImgMeta, allLabels])
Exemplo n.º 11
0
def prepareTrain(train_dir, stdImgfolder, out_dir, meanImg_dir, startIdx):
    global NUM_PER_BATCH


    meanImg = util.unpickle(meanImg_dir + '/meanImg')
    meanImgStd = util.unpickle(meanImg_dir + '/meanImgStd')
    allImgMeta, allLabels = collectAndShuffle(train_dir, stdImgfolder)
    meta = {}
    meta['data_mean'] = meanImg
    meta['data_mean_std'] = meanImgStd
    util.pickle( os.path.join(out_dir, "batches.meta"), meta)

    makeBatches(allImgMeta, out_dir, NUM_PER_BATCH, startIdx)
    out_file = out_dir + "/imglist"
    util.pickle(out_file, allImgMeta)
def getMeanImgStd(imgStdCls, meanImgFolder):
    global SIZE_STD, STD_CHANNELS
    globalSum = np.zeros(SIZE_STD*SIZE_STD*STD_CHANNELS, dtype=np.float64)
    globalCount = 0
    data = np.zeros((1, SIZE_STD*SIZE_STD*STD_CHANNELS), dtype = np.uint8)
    for ind in range(len(imgStdCls)):
        fullname = imgStdCls[ind]
        print "Reading", fullname
        data[0,:] = readAndResize(fullname, SIZE_STD, STD_CHANNELS)
        dataSum = np.sum(data, axis=0, dtype = np.float64)
        globalSum += dataSum
        globalCount += data.shape[0]  
        
    meanImg = globalSum / globalCount
    util.pickle(meanImgFolder+"/meanImgStd", meanImg)
Exemplo n.º 13
0
    def create_features(self, save=True):
        param = self.parameters
        vehicles = param['vehicles']
        non_vehicles = param['non_vehicles']
        log('Vehicles     : {}'.format(len(vehicles)))
        log('Non-vehicles : {}'.format(len(non_vehicles)))

        orient, pix_per_cell, cell_per_block = param['orient'], param['pix_per_cell'], param['cell_per_block']
        extract_features_partial = partial(extract_features,
                                           color_space=param['color_space'],
                                           spatial_size=param['spatial_size'],
                                           hist_bins=param['hist_bins'],
                                           orient=param['orient'],
                                           pix_per_cell=param['pix_per_cell'],
                                           cell_per_block=param['cell_per_block'],
                                           hog_channel=param['hog_channel'],
                                           spatial_feat=param['spatial_feat'],
                                           hist_feat=param['hist_feat'],
                                           hog_feat=param['hog_feat'])

        vehicles_features = extract_features_partial(vehicles)
        non_vehicles_features = extract_features_partial(non_vehicles)

        x = np.vstack((vehicles_features, non_vehicles_features)).astype(np.float64)
        self.x_scaler = StandardScaler().fit(x)
        x_scaled = self.x_scaler.transform(x)

        # Define the labels vector
        y = np.hstack((np.ones(len(vehicles)), np.zeros(len(non_vehicles))))

        # Split up data into randomized training and test sets
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x_scaled, y,
                                                                                test_size=param['test_size'])

        log(
            'Using {} orientations {} pixels per cell {} cells per block'.format(orient, pix_per_cell, cell_per_block))
        log('Feature vector length: {}'.format(self.x_train.shape[0]))

        if save:
            log('Pickling features')
            pickle(
                {'x_train': self.x_train, 'x_test': self.x_test, 'y_train': self.y_train, 'y_test': self.y_test,
                 'x_scaler': self.x_scaler, 'parameters': param},
                param["pickle_features"]
            )
Exemplo n.º 14
0
def make_list_batches(allImgMeta, out_dir, batchSize, startIdx = 0):
    numImg = len(allImgMeta)
    numBatches = numImg / NUM_PER_PATCH # the last batch keep the remainder
    if numImg % NUM_PER_PATCH != 0:
        numBatches += 1

    print 'Going to make %d baches' % numBatches
    for idx_batch in range(numBatches):
        #        if idx_batch < numBatches - 2:
        #            continue
        print "### Making the %dth batch ###" % idx_batch
        b_start = NUM_PER_PATCH * idx_batch
        b_end = NUM_PER_PATCH * (idx_batch + 1)
        if idx_batch == numBatches - 1:
            b_start = numImg - NUM_PER_PATCH
            b_end = numImg
        batchMeta = allImgMeta[b_start:b_end]
        data, dataStd, imgnames = get_list_batch(batchMeta)

        out_fname = os.path.join(out_dir, "data_batch_%04d" % (idx_batch+startIdx))
        print "saving to %s" % out_fname
        util.pickle(out_fname, {'data':data, 'data_std':dataStd, 'name':imgnames})
Exemplo n.º 15
0
def main():
   num_args = len(sys.argv)
   # load result from file
   num_nets = num_args - 1

   assert( num_nets > 0 )
   errors = []

   # 0th net
   # result['labels']
   # result['preds']
   result = unpickle( sys.argv[1] ) 
   errors.append( evaluate_result( result, sys.argv[1] ) )
   num_batches = len( result['labels'] )

   #import pdb; pdb.set_trace()
   # collet all results
   for ii in range( num_nets - 1 ):
      result_ii = unpickle( sys.argv[ii+2] )
      # evaluate result_ii
      errors.append( evaluate_result( result_ii, sys.argv[ii+2] ) )
      # check num of batches is consistant
      num_batches_ii = len( result_ii['labels'] )
      for jj in range( num_batches ):
         # check label is consistant
         assert( np.array_equal( 
            result_ii['labels'][jj], result['labels'][jj] ) )
         # nc result['pred'][jj]
         result['preds'][jj] += result_ii['preds'][jj]
         
   pickle( 'combine_result', result )

   # classifier mean/std accuracy
   errors = np.array( errors )
   #import pdb; pdb.set_trace()
   print "mean: " , str(100*np.mean( errors )) , " std: " , str(100*(np.std( errors )))
   # evaluate result
   evaluate_result( result, "After combine" )
Exemplo n.º 16
0
def preprocessing():
    folder = r'D:\work\sunxiuyu\SVHN\large-lcn'
    #outfolder = r'D:\work\sunxiuyu\SVHN\svhn-valid-large-1'
    datasize = 32 * 32 * 3
    #meta = util.unpickle(metafile)
    #mean = np.zeros(datasize,np.double)
    num = 0
    begin = 25
    for i in range(begin, begin + 16):
        batch_file = os.path.join(folder, 'data_batch_' + str(i))
        print batch_file
        buffer = util.unpickle(batch_file)
        data = buffer['data']
        dim2 = len(data)
        data = np.transpose(data)
        dim1 = len(data)
        print dim1
        newbuffer = np.zeros((dim1, dim2), np.single)
        for i in range(0, len(data)):
            img1 = data[i].reshape(3, 32, 32)
            img = np.zeros((32, 32, 3), np.single)
            result = np.zeros((3, 32, 32), np.single)
            img[:, :, 0] = img1[0, :, :]
            img[:, :, 1] = img1[1, :, :]
            img[:, :, 2] = img1[2, :, :]
            #cv2.imshow( "img1",img )
            #cv2.waitKey(0)
            result[0, :, :] = img[:, :, 0]
            result[1, :, :] = img[:, :, 1]
            result[2, :, :] = img[:, :, 2]
            #print result[0,:,:]
            newbuffer[i] = result.reshape(3 * 32 * 32)
        newbuffer = np.transpose(newbuffer)
        buffer['data'] = newbuffer
        util.pickle(batch_file, buffer)
    return
def makeBatches(allImgMeta, out_dir, batchSize, startIdx = 0):
    numImg = len(allImgMeta)
    numBatches = numImg / batchSize # the last batch keep the remainder
    if numImg % batchSize != 0:
        numBatches += 1

    print 'Going to make %d baches' % numBatches
    for idx_batch in range(numBatches):
        #        if idx_batch < numBatches - 2:
        #            continue
        print "### Making the %dth batch ###" % idx_batch
        b_start = batchSize * idx_batch
        b_end = batchSize * (idx_batch + 1)
        if idx_batch == numBatches - 1:
            b_start = numImg - batchSize
            b_end = numImg
        batchMeta = allImgMeta[b_start:b_end]
        # modified by shao, 20140410
        data, dataStd, imgNames = getBatch(batchMeta)

        out_fname = os.path.join(out_dir, "data_batch_%04d" % (idx_batch+startIdx))
        print "saving to %s" % out_fname
        # modified by shao, 20140410
        util.pickle(out_fname, {'data':data, 'data_std':dataStd, 'img_name': imgNames})
Exemplo n.º 18
0
def prepareTrain(train_list, outFolder,startIdx):
    global NUM_PER_PATCH
    fileList = open(train_list,'rb').readlines()
    random.shuffle(fileList)

    if len(fileList) < Mean_Img_Num:
         num_mean_img = len(fileList)
    else:
         num_mean_img = Mean_Img_Num

    data = np.zeros((SIZE*SIZE*CHANNELS, num_mean_img), dtype=np.uint8)

    for i in range(0,num_mean_img):
        str1 = fileList[i].strip();
        tmp = str1.split(':')
        data[:,i] = load_img_data(tmp[0])


    

    tmp = data.reshape(data.shape[1],data.shape[0])
    dataSum = np.sum(tmp, axis=0, dtype = np.float64)

    globalCount = tmp.shape[0]
    meanImg = dataSum / globalCount
    util.pickle(outFolder+"/meanImg", meanImg)
    
    allLabels = []
    allImgMeta = []
    for line in fileList:
        str1 = line.strip()
        tmp = str1.split(':')
        length = len(tmp)
        label = tmp[1]
        #print label
        if label not in allLabels:
            allLabels.append(label)            

    print "####### Got %d classes ######" % len(allLabels)
    meta = {}
    meta['data_mean'] = meanImg
    meta['label_names'] = allLabels
    util.pickle( os.path.join(outFolder, "batches.meta"), meta)




    numImg = len(fileList)
    numBatches = numImg / NUM_PER_PATCH # the last batch keep the remainder
    if numImg % NUM_PER_PATCH != 0:
        numBatches += 1

    print 'Going to make %d baches' % numBatches
    for idx_batch in range(numBatches):
        #        if idx_batch < numBatches - 2:
        #            continue
        print "### Making the %dth batch ###" % idx_batch
        b_start = NUM_PER_PATCH * idx_batch
        b_end = NUM_PER_PATCH * (idx_batch + 1)
        if idx_batch == numBatches - 1:
            b_start = numImg - NUM_PER_PATCH
            b_end = numImg
        batchMeta = fileList[b_start:b_end]

        data, labels, imgnames = getBatch(batchMeta,allLabels)
        out_fname = os.path.join(outFolder, "data_batch_%04d" % (startIdx+idx_batch))
        print "saving to %s" % out_fname
        util.pickle(out_fname, {'data':data, 'labels':labels, 'images':imgnames})
Exemplo n.º 19
0
        image_path = out['image_path'][i]
        im = Image.open(image_path)
        #if cls_index == 6 and index == 0:
        #    im.show()
        #    import pdb; pdb.set_trace()
        assert (im.size == (IMAGE_SIZE, IMAGE_SIZE))
        im_value = PIL2array(im).astype(n.float32)
        m = m * (1.0 * num / (num + 1))
        m = m + im_value / (num + 1)
        num += 1
        if VERIFY_RESULT:
            sum_m += im_value
        #print "\r" + str(i) + "/" + str(len(provider.data_list))
        stdout.write("%4d/%%4d " % (cls_index + 1) % num_class)
        stdout.write("%8d/%%8d\r" % (index + 1) %
                     len(out['index_map_train'][cls_index]))
        stdout.flush()
print "\n"

if VERIFY_RESULT:
    sum_m /= num
    diff = sum_m - m
    plot_array_image(diff, (IMAGE_SIZE, IMAGE_SIZE, 3))
    print n.mean(abs(diff))
    plot_array_image(m, (IMAGE_SIZE, IMAGE_SIZE, 3))

mean_info = {}
mean_info['data'] = m
mean_info['file'] = INPUT_FILE
pickle(OUTPUT_FILE, mean_info)
Exemplo n.º 20
0
def cifar100toimages():
    file = r'D:\work\sunxiuyu\cifar-100-python\test'
    outfolder = r'D:\work\sunxiuyu\cifar-10-py-colmajor\tmp'
    dict = util.unpickle(file)
    numlcass = np.array(dict['fine_labels'], np.int).max()
    fine_classes = {}
    mean = np.zeros(32 * 32 * 3, dtype=np.double)
    for i in range(0, len(dict['data'])):
        mean += dict['data'][i]
    mean = mean / len(dict['data'])

    metafile = r'D:\work\sunxiuyu\cifar-10-py-colmajor\cifar10\batches.meta'
    outmetafile = r'D:\work\sunxiuyu\cifar-10-py-colmajor\cifar-100\batches.meta'
    meta = util.unpickle(metafile)
    for key in meta:
        print key
    meta['label_names'] = [str(i) for i in range(0, 100)]
    meta['data_mean'] = meta['data_mean'].reshape(3072)
    meta['data_mean'][0:1024] = mean[2048:3072]  #b
    meta['data_mean'][1024:2048] = mean[1024:2048]  #g
    meta['data_mean'][2048:3072] = mean[0:1024]  #r
    util.pickle(outmetafile, meta)
    return

    for i in range(0, len(dict['data'])):
        fine_classes_idx = dict['fine_labels'][i]
        #m_data = dict['data'][i] - mean
        m_data = dict['data'][i]
        if fine_classes_idx in fine_classes:
            fine_classes[fine_classes_idx].append({
                'data':
                m_data,
                'label':
                dict['fine_labels'][i],
                'filename':
                dict['filenames'][i]
            })
        else:
            fine_classes[fine_classes_idx] = [{
                'data': m_data,
                'label': dict['fine_labels'][i],
                'filename': dict['filenames'][i]
            }]
        pass

    # random shuffle
    for i in range(0, len(fine_classes)):
        indexs = range(0, len(fine_classes[i]))
        np.random.shuffle(indexs)
        fine_classes[i] = [fine_classes[i][x] for x in indexs]

    #save image patches
    if not os.path.exists(outfolder):
        os.makedirs(outfolder)
    for i in range(0, len(fine_classes)):
        class_folder = os.path.join(outfolder, str(i))
        if not os.path.exists(class_folder):
            os.makedirs(class_folder)
        for image in fine_classes[i]:
            #save image patch
            m_data = image['data']
            r = m_data[0:1024]
            g = m_data[1024:2048]
            b = m_data[2048:3072]

            r = r.reshape(32, 32)
            g = g.reshape(32, 32)
            b = b.reshape(32, 32)

            color_img = np.zeros((32, 32, 3), dtype=np.uint8)
            color_img[:, :, 0] = b[:, :]
            color_img[:, :, 1] = g[:, :]
            color_img[:, :, 2] = r[:, :]

            imagefile = os.path.join(class_folder, image['filename'])
            cv2.imwrite(imagefile, color_img)
for i,v in enumerate(val): s = s + str(i) + ': ' + str(len(v)) + ', '
print s
print 'Loaded ' + str(nval) + ' validation images in ' + str(nclasses) + ' classes'
print 'Validation blacklist: ' + str(n_blacklist)
print 'Validation total: ' + str(nval + n_blacklist)

# save data ####################################################################
out = {}
out['train'] = train
out['val'] = val
out['classes_map'] = classes_map
out['classes'] = classes
out['num_data'] = ntrain + nval
out['num_data_train'] = ntrain
out['num_data_val'] = nval
pickle(data_file, out)
call(["chmod", "777", data_file]) # change mode so that can be read by others
print 'Saved data to ' + data_file

# save smaller versions ########################################################
for n in [10, 100]:
   print 'Creating smaller dataset with ' + str(n) + ' classes'
   train_small = []
   val_small = []
   classes_small = []
   classes_map_small = []
   ntrain = 0
   nval = 0
   for i in range(0,1000,1000/n):
      train_small.append(train[i])
      val_small.append(val[i])
Exemplo n.º 22
0
    idx = 1
    word_vectors.append(np.zeros([1, word_vec_dim], np.float32))
    for word in vocab:
        try:
            word_vectors.append(model[word].reshape([1, -1]))
            word_to_idx.append(idx)
            idx += 1
        except Exception, e:
            word_to_idx.append(0)
    word_vectors = np.concatenate(word_vectors, axis=0)
    num = word_vectors.shape[0]
    assert (num == idx)
    assert (len(word_to_idx) == len(vocab))
    print('%d words in corpus' % idx)
    pickle('data/vqa_word2vec_model.pkl', {'vocab': vocab,
                                           'word2idx': word_to_idx,
                                           'word_vectors': word_vectors})


if __name__ == '__main__':
    # vocab = get_combined_vocabulary()
    # slice_word2vec_model(vocab)
    encoder = Word2VecEncoder('data/vqa_word2vec_model.pkl')


    def debug_encoder_once(s):
        print s
        print encoder.encode(s)


    debug_encoder_once(['<S>', '12', '</S>'])
Exemplo n.º 23
0
def test():
    # Build the inference graph.
    config = QuestionGeneratorConfig()
    reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key)

    # Create model creator
    model_creator = create_model_fn(FLAGS.model_type)

    # create multiple choice question manger
    mc_manager = MultiChoiceQuestionManger(
        subset='trainval', answer_coding=model_creator.ans_coding)

    # Create reader post-processing function
    reader_post_proc_fn = build_mc_reader_proc_fn(model_creator.ans_coding)

    g = tf.Graph()
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)
    with g.as_default():
        model = model_creator(config, phase='evaluate')
        model.build()
    # g.finalize()

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)

    filenames = []
    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    result, rescore_data, state_rescore_data = [], [], []
    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        saver = tf.train.Saver(var_list=tf.all_variables())
        saver.restore(sess, checkpoint_path)

        itr = 0
        while not reader.eof():
            if itr > 50000:  # cache at most 50k questions
                break
            outputs = reader.pop_batch()
            im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs
            mc_ans, mc_coding = mc_manager.get_candidate_answer_and_word_coding(
                quest_id)
            inputs = reader_post_proc_fn(outputs, mc_coding)
            perplexity, state = sess.run(
                [model.likelihood, model.final_decoder_state],
                feed_dict=model.fill_feed_dict(inputs))
            perplexity = perplexity.reshape(inputs[-1].shape)
            loss = perplexity[:, :-1].mean(axis=1)

            # generated = [generated[0]]  # sample 3
            question = to_sentence.index_to_question(quest_ids)
            answer = to_sentence.index_to_answer(ans_ids)
            top1_mc_ans = mc_ans[loss.argmin()]
            result.append({u'answer': top1_mc_ans, u'question_id': quest_id})

            # add hidden state saver
            label = mc_manager.get_binary_label(quest_id)
            state_sv = {'quest_id': quest_id, 'states': state, 'label': label}
            state_rescore_data.append(state_sv)

            if itr % 100 == 0:
                print('============== %d ============' % itr)
                print('image id: %d, question id: %d' % (im_ids, quest_id))
                print('question\t: %s' % question)
                print('answer\t: %s' % answer)
                top_k_ids = loss.argsort()[:3].tolist()
                for i, idx in enumerate(top_k_ids):
                    t_mc_ans = mc_ans[idx]
                    print('VAQ answer <%d>\t: %s (%0.2f)' %
                          (i, t_mc_ans, loss[idx]))

            itr += 1
            # save information for train classifier
            mc_label = np.array([a == answer for a in mc_ans],
                                dtype=np.float32)
            quest_target = inputs[-2]
            datum = {
                'quest_seq': quest_target,
                'perplex': perplexity,
                'label': mc_label,
                'quest_id': quest_id
            }
            rescore_data.append(datum)

        quest_ids = [res[u'question_id'] for res in result]
        # save results
        tf.logging.info('Saving results')
        res_file = FLAGS.result_file % get_model_iteration(checkpoint_path)
        json.dump(result, open(res_file, 'w'))
        tf.logging.info('Saving rescore data...')
        from util import pickle
        # pickle('data/rescore_dev.pkl', rescore_data)
        pickle('data/rescore_state_dev.pkl', state_rescore_data)
        tf.logging.info('Done!')
        return res_file, quest_ids
Exemplo n.º 24
0
def send(players, recipient='all'):
    pub_socket.send(recipient + ':' + util.pickle(players))
Exemplo n.º 25
0
def process_for_reconstruction(img_list, std_file, out_dir):
    #subFolderList = os.listdir(std_file)

    meta = {}

    meanImg = readAndResize('/database/test.jpg',SIZE, CHANNELS)
    meanImgstd = readAndResize('/database/test.jpg', SIZE_STD, CHANNELS)

    meta['data_mean'] = meanImg
    meta['data_mean_std'] = meanImgstd
    util.pickle(os.path.join(out_dir,"batches.meta"), meta)
    std_img = [fname for fname in os.listdir(std_file) if fname.endswith('.png')]
    #print len(subFolderList)
    #std_img.sort()


    fileList = open(img_list,'rb').readlines()

    random.shuffle(std_img)
    trainingMeta = []
    testMeta = []
    train_test_ratio = 0.8*len(std_img)
    #print train_test_ratio
    for i , std_name in enumerate(std_img):
        tmp = std_name.split('.')
        #length = len(std_name)
        label = tmp[0]
        num = 0
        meta = []
        for line in fileList:
            #nPos = line.index(label)
            # str = line.strip()
            # tmp = str.split('/')
            # length = len(tmp)
            # line_label = tmp[length - 2]
            if label in  line:
                meta.append(line.strip())
                num = num + 1

        if i < train_test_ratio:
            trainingMeta += zip(meta,[os.path.join(std_file,std_name)] * len(meta))
        else:
            testMeta += zip(meta,os.path.join(std_file,std_name) * len(meta))
        
        str = "i = %d, name = %s, trainmeta = %d, testmeta = %d" %(i,std_name,len(trainingMeta), len(testMeta))
        print str
        if num == 0:
            del std_img[i]
            # str = "i = %d, name = %s, trainmeta = %d" %(i,std_name,len(trainingMeta))
            # print str

    util.pickle(os.path.join(out_dir,"trainingMeta.meta"), trainingMeta)
    util.pickle(os.path.join(out_dir,"testMeta.meta"), testMeta)
    #for training
    print "prepare for training"
    random.shuffle(trainingMeta)
    make_list_batches(trainingMeta,out_dir,NUM_PER_PATCH)



    #for test
    print "prepare for test"
    random.shuffle(testMeta)
    make_list_batches(trainingMeta,out_dir,NUM_PER_PATCH,8000)
Exemplo n.º 26
0
        image_path = out['image_path'][i]
        im = Image.open( image_path )
        #if cls_index == 6 and index == 0:
        #    im.show()
        #    import pdb; pdb.set_trace()
        assert( im.size == ( IMAGE_SIZE, IMAGE_SIZE ) )
        im_value = PIL2array( im ).astype(n.float32)
        m = m * (1.0 * num/(num+1))
        m = m + im_value/(num+1)
        num += 1
        if VERIFY_RESULT:
            sum_m += im_value
        #print "\r" + str(i) + "/" + str(len(provider.data_list))
        stdout.write( "%4d/%%4d " % (cls_index+1) % num_class ) 
        stdout.write( "%8d/%%8d\r" % (index+1) % len(out['index_map_train'][cls_index]) )
        stdout.flush()
print "\n"

if VERIFY_RESULT:
    sum_m /= num
    diff = sum_m - m
    plot_array_image( diff, (IMAGE_SIZE, IMAGE_SIZE, 3 ) )
    print n.mean( abs(diff) )
    plot_array_image( m, (IMAGE_SIZE,IMAGE_SIZE,3) );

mean_info = {}
mean_info['data'] = m
mean_info['file'] = INPUT_FILE
pickle( OUTPUT_FILE, mean_info )

def collectImgByClass(folderCls, outFolder):
    for index, foldername in enumerate(folderCls):
        dataCol = collectOneClass(foldername)
        ind = foldername.rfind("/")
        folder = foldername[ind+1:]
        util.pickle(outFolder +"/"+ folder, dataCol)
Exemplo n.º 28
0
def make_batch(imagefolder,numclass,outfolder,ext='.bmp',sep=1, prev=None):
    batchname = 'data_batch_'
    #filenames -- [][(filename,basename,class)]
    filenames = getfiles(imagefolder,numclass,outfolder,ext)
    width,height = 32,32
    # for each batches
    for i in range(0,sep):
        filenames_by_batches =[]
        # seperate the data into batches
        for j in range(0, int(numclass)):
            import math
            step = int ( math.ceil( len(filenames[j]) * 1.0 / sep))
            #print step
            fbegin = step * i
            fend = step * (i + 1)
            if fend > len(filenames[j]) :
                fend = len(filenames[j])
            #print fbegin,fend
            fs = filenames[j][fbegin:fend]
            filenames_by_batches += fs
        #random_shuffle
        random.shuffle(filenames_by_batches)
        batch_data = []
        batch_filenames= []
        batch_labels = []
        bname = batchname + str(i + 1)
        #read img
        for f in filenames_by_batches:
            img = cv2.imread(f[0],1)
            width,height = img.shape[0],img.shape[1]
            #img = cv2.resize(img,(width,height))
            #bgr -> gray
            #gray = cv2.cvtColor(img,cv.CV_BGR2GRAY)
            #gray = cv2.GaussianBlur(gray,(3,3),0)
            #dx = cv2.Sobel(gray,cv2.CV_32F,1,0,ksize = 3)
            #dy = cv2.Sobel(gray,cv2.CV_32F,0,1,ksize = 3)
            #dm = cv2.convertScaleAbs(cv2.cartToPolar(dx,dy)[0])
            #cv2.imshow('a',img)
            if prev is not None:
                img = prev(img)
            #cv2.imshow('b',img)
            #print img
            img = np.array(img,np.single)
            b = img[:,:,0].reshape(width * height,order='C')
            g = img[:,:,1].reshape(width * height,order='C')
            r = img[:,:,2].reshape(width * height,order='C')
            #print b
            #print g
            #print r
            #raise
            #cv2.waitKey(0)
            #dm = dm.reshape(width * height,order='C')
            #im = np.array([b,g,r,dm]).reshape(width * height * 4,order='C')
            im = np.array([b,g,r]).reshape(width * height * 3,order='C')
            batch_data.append(im)
            batch_filenames.append(f[1])
            batch_labels.append(f[2])
        savefile = os.path.join(outfolder,bname)
        batch_data = np.transpose(np.array(batch_data,order='C'))
        print(len(batch_labels))
        print(len(batch_data))
        dict = {'batch_label':bname,'labels':batch_labels,'data':batch_data,'filenames':batch_filenames}
        util.pickle(savefile,dict)

    pass
Exemplo n.º 29
0
def doGetMeanImg(reszFolder, meanImgFolder):
    meanImg, info = getMeanImg(reszFolder)
    util.pickle(meanImgFolder+"/meanImg", meanImg)
print s
print 'Loaded ' + str(nval) + ' validation images in ' + str(
    nclasses) + ' classes'
print 'Validation blacklist: ' + str(n_blacklist)
print 'Validation total: ' + str(nval + n_blacklist)

# save data ####################################################################
out = {}
out['train'] = train
out['val'] = val
out['classes_map'] = classes_map
out['classes'] = classes
out['num_data'] = ntrain + nval
out['num_data_train'] = ntrain
out['num_data_val'] = nval
pickle(data_file, out)
call(["chmod", "777", data_file])  # change mode so that can be read by others
print 'Saved data to ' + data_file

# save smaller versions ########################################################
for n in [10, 100]:
    print 'Creating smaller dataset with ' + str(n) + ' classes'
    train_small = []
    val_small = []
    classes_small = []
    classes_map_small = []
    ntrain = 0
    nval = 0
    for i in range(0, 1000, 1000 / n):
        train_small.append(train[i])
        val_small.append(val[i])