def main(_): #[320,180,280,260], trump1 #[320,200,230,260], trump2 img = cv2.imread('car91.jpg', cv2.IMREAD_COLOR) #.astype(np.uint8) print('img shape: ', img.shape) print('tipo: ', type(img)) aux = np.array(img[:, :, 0]) img[:, :, 0] = img[:, :, 2] img[:, :, 2] = aux opts = configParams() opts = getOpts(opts) #add minimumSiameseNetPlaceHolder = tf.placeholder( tf.float32, [1, opts['minimumSize'], opts['minimumSize'], 3]) exemplarOp = tf.placeholder( tf.float32, [1, opts['exemplarSize'], opts['exemplarSize'], 3]) instanceOp = tf.placeholder( tf.float32, [opts['numScale'], opts['instanceSize'], opts['instanceSize'], 3]) exemplarOpBak = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['exemplarSize'], opts['exemplarSize'], 3 ]) instanceOpBak = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) isTrainingOp = tf.convert_to_tensor(False, dtype='bool', name='is_training') sn = SiameseNet() scoreOpBak = sn.buildTrainNetwork(exemplarOpBak, instanceOpBak, opts, isTraining=False) saver = tf.train.Saver() writer = tf.summary.FileWriter(opts['summaryFile']) sess = tf.Session() saver.restore(sess, opts['modelName']) zFeatOp = sn.buildExemplarSubNetwork(exemplarOp, opts, isTrainingOp) zMinimumPreTrained = sn.buildExemplarSubNetwork( minimumSiameseNetPlaceHolder, opts, isTrainingOp) generated = Generation(opts, sn) descritores = [] offset = 0 for i in range(-1, 2): descritores.append( generated.getDescriptor([340 + offset, 280 + offset, 160, 100], img)) offset += 50 diff = np.array(descritores[0]) - np.array(descritores[2]) print('diferenca de descritores: ', diff) print('similaridade entre descritores: ', detSimilarity(descritores[0], descritores[2])) print(len(descritores))
def main(): print('ilsvrc2015 curation...') params = p.configParams() imdbPath = params['curation_path'] + "imdb_video.mat" imdb = utils.vidSetupData(params['curation_path'], params['ilsvrc2015'], params['crops_train']) imdb = utils.loadImdbFromPkl(params['curation_path'], params['crops_train']) imageStats = utils.loadImageStatsFromMat(params['curation_path'])
def buildTrainNetwork(self, exemplar, instance, opts, isTraining=True, branchType="original"): params = configParams() isTrainingOp = tf.convert_to_tensor(isTraining, dtype='bool', name='is_training') with tf.variable_scope('siamese') as scope: aFeat = self.buildBranch(exemplar, opts, isTrainingOp, branchType=branchType) #, name='aFeat' scope.reuse_variables() score = self.buildBranch(instance, opts, isTrainingOp, branchType=branchType) #, name='xFeat' # the conv2d op in tf is used to implement xcorr directly, from theory, the implementation of conv2d is correlation. However, it is necessary to transpose the weights tensor to a input tensor # different scales are tackled with slicing the data. Now only 3 scales are considered, but in training, more samples in a batch is also tackled by the same mechanism. Hence more slices is to be implemented here!! with tf.variable_scope('score'): if(PRINT_SIAMESE_LOG): print("Building xcorr...") aFeat = tf.transpose(aFeat, perm=[1, 2, 3, 0]) batchAFeat = int(aFeat.get_shape()[-1]) batchScore = int(score.get_shape()[0]) # if batchAFeat > 1: groupConv = lambda i, k: tf.nn.conv2d(i, k, strides=[1, 1, 1, 1], padding='VALID') assert batchAFeat == params['trainBatchSize'] assert batchScore == params['trainBatchSize'] aFeats = tf.split(axis=3, num_or_size_splits=batchAFeat, value=aFeat) scores = tf.split(axis=0, num_or_size_splits=batchScore, value=score) scores = [groupConv(i, k) for i, k in zip(scores, aFeats)] score = tf.concat(axis=3, values=scores) score = tf.transpose(score, perm=[3, 1, 2, 0]) # else: with tf.variable_scope('adjust'): if(PRINT_SIAMESE_LOG): print("Building adjust...") weights = self.getVariable('weights', [1, 1, 1, 1], initializer=tf.constant_initializer(value=0.001, dtype=tf.float32), weightDecay=1.0*opts['trainWeightDecay'], dType=tf.float32, trainable=True) self.learningRates[weights.name] = 0.0 # tf.get_variable('weights', [1, 1, 1, 1], initializer=tf.constant_initializer(value=0.001, dtype=tf.float32)) biases = self.getVariable('biases', [1,], initializer=tf.constant_initializer(value=0, dtype=tf.float32), weightDecay=1.0*opts['trainWeightDecay'], dType=tf.float32, trainable=True) self.learningRates[biases.name] = 1.0 # tf.get_variable('biases', [1, ], initializer=tf.constant_initializer(value=0, dtype=tf.float32)) score = tf.nn.conv2d(score, weights, strides=[1, 1, 1, 1], padding='VALID') score = tf.add(score, biases) return score
import utils import parameters as p import numpy as np import tensorflow as tf params = p.configParams() # imdbPath = params['curation_path']+"imdb_video.mat" # imdb = utils.vidSetupData(params['curation_path'], params['ilsvrc2015'], params['crops_train']) # imdb = utils.loadImdbFromPkl(params['curation_path'], params['crops_train']) # for i in range(0, 4404): # print (imdb['n_valid_objects'][i][0]) test = tf.placeholder('bool', [], name='is_training') sess = tf.Session() sess.run(test) print 'aaa'
def main(_): print('run tracker...') opts = configParams() opts = getOpts(opts) exemplarOp = tf.placeholder( tf.float32, [1, opts['exemplarSize'], opts['exemplarSize'], 3]) instanceOp = tf.placeholder( tf.float32, [opts['numScale'], opts['instanceSize'], opts['instanceSize'], 3]) exemplarOpBak = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['exemplarSize'], opts['exemplarSize'], 3 ]) instanceOpBak = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) isTrainingOp = tf.convert_to_tensor(False, dtype='bool', name='is_training') sn = SiameseNet() scoreOpBak = sn.buildTrainNetwork(exemplarOpBak, instanceOpBak, opts, isTraining=False) saver = tf.train.Saver() writer = tf.summary.FileWriter(opts['summaryFile']) sess = tf.Session() saver.restore(sess, opts['modelName']) zFeatOp = sn.buildExemplarSubNetwork(exemplarOp, opts, isTrainingOp) imgs, targetPosition, targetSize = loadVideoInfo(opts['seq_base_path'], opts['video']) nImgs = len(imgs) startFrame = 0 im = imgs[startFrame] if (im.shape[-1] == 1): tmp = np.zeros([im.shape[0], im.shape[1], 3], dtype=np.float32) tmp[:, :, 0] = tmp[:, :, 1] = tmp[:, :, 2] = np.squeeze(im) im = tmp avgChans = np.mean( im, axis=(0, 1) ) # [np.mean(np.mean(img[:, :, 0])), np.mean(np.mean(img[:, :, 1])), np.mean(np.mean(img[:, :, 2]))] wcz = targetSize[1] + opts['contextAmount'] * np.sum(targetSize) hcz = targetSize[0] + opts['contextAmount'] * np.sum(targetSize) sz = np.sqrt(wcz * hcz) scalez = opts['exemplarSize'] / sz zCrop, _ = getSubWinTracking(im, targetPosition, (opts['exemplarSize'], opts['exemplarSize']), (np.around(sz), np.around(sz)), avgChans) if opts['subMean']: pass dSearch = (opts['instanceSize'] - opts['exemplarSize']) / 2 pad = dSearch / scalez sx = sz + 2 * pad minSx = 0.2 * sx maxSx = 5.0 * sx winSz = opts['scoreSize'] * opts['responseUp'] if opts['windowing'] == 'cosine': hann = np.hanning(winSz).reshape(winSz, 1) window = hann.dot(hann.T) elif opts['windowing'] == 'uniform': window = np.ones((winSz, winSz), dtype=np.float32) window = window / np.sum(window) scales = np.array([ opts['scaleStep']**i for i in range(int(np.ceil(opts['numScale'] / 2.0) - opts['numScale']), int(np.floor(opts['numScale'] / 2.0) + 1)) ]) zCrop = np.expand_dims(zCrop, axis=0) zFeat = sess.run(zFeatOp, feed_dict={exemplarOp: zCrop}) zFeat = np.transpose(zFeat, [1, 2, 3, 0]) zFeatConstantOp = tf.constant(zFeat, dtype=tf.float32) scoreOp = sn.buildInferenceNetwork(instanceOp, zFeatConstantOp, opts, isTrainingOp) writer.add_graph(sess.graph) resPath = os.path.join(opts['seq_base_path'], opts['video'], 'res') bBoxes = np.zeros([nImgs, 4]) tic = time.time() for i in range(startFrame, nImgs): if i > startFrame: im = imgs[i] if (im.shape[-1] == 1): tmp = np.zeros([im.shape[0], im.shape[1], 3], dtype=np.float32) tmp[:, :, 0] = tmp[:, :, 1] = tmp[:, :, 2] = np.squeeze(im) im = tmp scaledInstance = sx * scales scaledTarget = np.array([targetSize * scale for scale in scales]) xCrops = makeScalePyramid(im, targetPosition, scaledInstance, opts['instanceSize'], avgChans, None, opts) # sio.savemat('pyra.mat', {'xCrops': xCrops}) score = sess.run(scoreOp, feed_dict={instanceOp: xCrops}) sio.savemat('score.mat', {'score': score}) newTargetPosition, newScale = trackerEval(score, round(sx), targetPosition, window, opts) targetPosition = newTargetPosition sx = max( minSx, min(maxSx, (1 - opts['scaleLr']) * sx + opts['scaleLr'] * scaledInstance[newScale])) targetSize = (1 - opts['scaleLr']) * targetSize + opts[ 'scaleLr'] * scaledTarget[newScale] else: pass rectPosition = targetPosition - targetSize / 2. tl = tuple(np.round(rectPosition).astype(int)[::-1]) br = tuple(np.round(rectPosition + targetSize).astype(int)[::-1]) imDraw = im.astype(np.uint8) cv2.rectangle(imDraw, tl, br, (0, 255, 255), thickness=3) cv2.imshow("tracking", imDraw) cv2.waitKey(1) print(time.time() - tic) return
def _main(nome_do_video, nome_do_arquivo_de_saida, modo_selecionado, parametro): show = lp.getInJson('tracker', 'show') opts = configParams() opts = getOpts(opts) #add mode = int(modo_selecionado) caminhoDataset = lp.getInJson('tracker', 'datasetPath') caminhoVideo = os.path.join(caminhoDataset, nome_do_video) caminhoLog = os.path.join(caminhoVideo, '__log__') nome_log = nome_do_arquivo_de_saida FRAMES_TO_ACUMULATE_BEFORE_FEEDBACK = int(parametro) #REDE 1 exemplarOp = tf.placeholder( tf.float32, [1, opts['exemplarSize'], opts['exemplarSize'], 3]) instanceOp = tf.placeholder( tf.float32, [opts['numScale'], opts['instanceSize'], opts['instanceSize'], 3]) exemplarOpBak = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['exemplarSize'], opts['exemplarSize'], 3 ]) instanceOpBak = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) isTrainingOp = tf.convert_to_tensor(False, dtype='bool', name='is_training') sn = SiameseNet() scoreOpBak = sn.buildTrainNetwork(exemplarOpBak, instanceOpBak, opts, isTraining=False) saver = tf.train.Saver() #writer = tf.summary.FileWriter(opts['summaryFile']) sess = tf.Session() sess2 = tf.Session() saver.restore(sess, opts['modelName']) saver.restore(sess2, opts['modelName']) zFeatOp = sn.buildExemplarSubNetwork(exemplarOp, opts, isTrainingOp) #REDE2 exemplarOp2 = tf.placeholder( tf.float32, [1, opts['exemplarSize'], opts['exemplarSize'], 3]) instanceOp2 = tf.placeholder( tf.float32, [opts['numScale'], opts['instanceSize'], opts['instanceSize'], 3]) exemplarOpBak2 = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['exemplarSize'], opts['exemplarSize'], 3 ]) instanceOpBak2 = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) isTrainingOp2 = tf.convert_to_tensor(False, dtype='bool', name='is_training') sn2 = SiameseNet() # scoreOpBak2 = sn2.buildTrainNetwork(exemplarOpBak2, instanceOpBak2, opts, isTraining=False) saver2 = tf.train.Saver() #writer2 = tf.summary.FileWriter(opts['summaryFile']) sess2 = tf.Session() saver2.restore(sess2, opts['modelName']) zFeatOp2 = sn2.buildExemplarSubNetwork(exemplarOp2, opts, isTrainingOp2) #imgs, targetPosition, targetSize = loadVideoInfo(caminhoDataset, nome_do_video) imgFiles, targetPosition, targetSize = loadVideoInfo( caminhoDataset, nome_do_video) nImgs = len(imgFiles) #imgs_pil = [Image.fromarray(np.uint8(img)) for img in imgs] im = get_next_frame(imgFiles, POSICAO_PRIMEIRO_FRAME) if (im.shape[-1] == 1): tmp = np.zeros([im.shape[0], im.shape[1], 3], dtype=np.float32) tmp[:, :, 0] = tmp[:, :, 1] = tmp[:, :, 2] = np.squeeze(im) im = tmp avgChans = np.mean( im, axis=(0, 1) ) # [np.mean(np.mean(img[:, :, 0])), np.mean(np.mean(img[:, :, 1])), np.mean(np.mean(img[:, :, 2]))] wcz = targetSize[1] + opts['contextAmount'] * np.sum(targetSize) hcz = targetSize[0] + opts['contextAmount'] * np.sum(targetSize) sz = np.sqrt(wcz * hcz) scalez = opts['exemplarSize'] / sz zCrop, _ = getSubWinTracking(im, targetPosition, (opts['exemplarSize'], opts['exemplarSize']), (np.around(sz), np.around(sz)), avgChans) zCrop2, _ = getSubWinTracking(im, targetPosition, (opts['exemplarSize'], opts['exemplarSize']), (np.around(sz), np.around(sz)), avgChans) if opts['subMean']: pass dSearch = (opts['instanceSize'] - opts['exemplarSize']) / 2 pad = dSearch / scalez sx = sz + 2 * pad minSx = 0.2 * sx maxSx = 5.0 * sx winSz = opts['scoreSize'] * opts['responseUp'] if opts['windowing'] == 'cosine': hann = np.hanning(winSz).reshape(winSz, 1) window = hann.dot(hann.T) elif opts['windowing'] == 'uniform': window = np.ones((winSz, winSz), dtype=np.float32) window = window / np.sum(window) scales = np.array([ opts['scaleStep']**i for i in range(int(np.ceil(opts['numScale'] / 2.0) - opts['numScale']), int(np.floor(opts['numScale'] / 2.0) + 1)) ]) #REDE1 zCrop = np.expand_dims(zCrop, axis=0) zFeat = sess.run(zFeatOp, feed_dict={exemplarOp: zCrop}) zFeat = np.transpose(zFeat, [1, 2, 3, 0]) template = tf.constant(zFeat, dtype=tf.float32) oldTemplate = tf.constant(zFeat, dtype=tf.float32) scoreOp = sn.buildInferenceNetwork(instanceOp, template, opts, isTrainingOp) #writer.add_graph(sess.graph) #REDE2 zCrop_original = np.array(zCrop) zFeat_original = sess2.run(zFeatOp2, feed_dict={exemplarOp2: zCrop_original}) zFeat_original = np.transpose(zFeat_original, [1, 2, 3, 0]) template_original = tf.constant(zFeat_original, dtype=tf.float32) #template = np.array(template_original) template = tf.identity(template_original) oldTemplate = tf.identity(template_original) template_acumulado = np.array(template) scoreOp_original = sn.buildInferenceNetwork(instanceOp, template_original, opts, isTrainingOp) #writer2.add_graph(sess2.graph) teste1 = tf.constant(zFeat, dtype=tf.float32) teste2 = tf.Session().run(teste1) teste3 = tf.constant(teste2, dtype=tf.float32) #assert 2 == 1 tic = time.time() ltrb = [] superDescritor = SuperTemplate() superDescritor.addInstance(np.array(zFeat)) print('zfeat:', zFeat[0, 0, -10, 0]) for frame in range(POSICAO_PRIMEIRO_FRAME, nImgs): im = get_next_frame(imgFiles, frame) print(('frame ' + str(frame + 1) + ' / ' + str(nImgs)).center(80, '*')) if frame > POSICAO_PRIMEIRO_FRAME: zCrop, _ = getSubWinTracking( im, targetPosition, (opts['exemplarSize'], opts['exemplarSize']), (np.around(sz), np.around(sz)), avgChans) zCrop = np.expand_dims(zCrop, axis=0) zFeat = sess.run(zFeatOp, feed_dict={exemplarOp: zCrop}) zFeat = np.transpose(zFeat, [1, 2, 3, 0]) zFeat.reshape(1, NUMBER_OF_EXEMPLAR_DESCRIPTOR, NUMBER_OF_EXEMPLAR_DESCRIPTOR, SIAMESE_DESCRIPTOR_DIMENSION) if frame < FRAMES_TO_ACUMULATE_BEFORE_FEEDBACK: superDescritor.addInstance(np.array(zFeat_original)) else: superDescritor.addInstance(np.array(zFeat)) if (im.shape[-1] == 1): # se a imagem for em escala de cinza tmp = np.zeros([im.shape[0], im.shape[1], 3], dtype=np.float32) tmp[:, :, 0] = tmp[:, :, 1] = tmp[:, :, 2] = np.squeeze(im) im = tmp scaledInstance = sx * scales scaledTarget = np.array([targetSize * scale for scale in scales]) xCrops = makeScalePyramid(im, targetPosition, scaledInstance, opts['instanceSize'], avgChans, None, opts) template = superDescritor.mediaMovelGaussiana(size=frame, mode=mode) with tf.Session() as sess1: template = sess1.run(template) template = tf.constant(template, dtype=tf.float32) #template_espacial = spatialTemplate (targetPosition,im, opts, sz, avgChans,sess,zFeatOp,exemplarOp,FRAMES_COM_MEDIA_ESPACIAL,amplitude = 0, cumulative = False, adaptative = False ) #template = superDescritor.cummulativeTemplate() #template = superDescritor.progressiveTemplate() #template = superDescritor.nShotTemplate(3) # #template = template_original #filtro adaptativo logo abaixo: #template = filtroAdaptativo(template,zFeat,parametro) #~filtro adaptativo scoreOp = sn.buildInferenceNetwork(instanceOp, template, opts, isTrainingOp) score = sess.run(scoreOp, feed_dict={instanceOp: xCrops}) #sio.savemat('score.mat', {'score': score}) newTargetPosition, newScale = trackerEval(score, round(sx), targetPosition, window, opts) targetPosition = newTargetPosition sx = max( minSx, min(maxSx, (1 - opts['scaleLr']) * sx + opts['scaleLr'] * scaledInstance[newScale])) targetSize = (1 - opts['scaleLr']) * targetSize + opts[ 'scaleLr'] * scaledTarget[newScale] else: pass rectPosition = targetPosition - targetSize / 2. tl = tuple(np.round(rectPosition).astype(int)[::-1]) br = tuple(np.round(rectPosition + targetSize).astype(int)[::-1]) if show: # plot only if it is in a desktop that allows you to watch the video imDraw = im.astype(np.uint8) cv2.putText(imDraw, str(frame + 1) + '/' + str(nImgs), (0, 25), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 255), 2) cv2.rectangle(imDraw, tl, br, (0, 255, 255), thickness=3) cv2.imshow("tracking - siamese", imDraw) cv2.waitKey(1) ltrb.append(list(tl) + list(br)) with open(os.path.join(caminhoLog, nome_log), 'w') as file: linhas = [] for i in ltrb: linha = '' for cont, coord in enumerate(i): if cont == 3: linha = linha + str(coord) + '\n' else: linha = linha + str(coord) + ',' linhas.append(linha) for i in linhas: file.write(i) print(time.time() - tic) return
def main(_): opts = configParams() opts = getOpts(opts) # curation.py should be executed once before imdb = utils.loadImdbFromPkl(opts['curation_path'], opts['crops_train']) rgbMeanZ, rgbVarZ, rgbMeanX, rgbVarX = loadStats(opts['curation_path']) imdb, imdbInd = chooseValSet(imdb, opts) # random seed should be fixed here np.random.seed(opts['randomSeed']) exemplarOp = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['exemplarSize'], opts['exemplarSize'], 3 ]) instanceOp = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) lr = tf.placeholder(tf.float32, shape=()) sn = SiameseNet() scoreOp = sn.buildTrainNetwork(exemplarOp, instanceOp, opts) labels = np.ones([8], dtype=np.float32) respSz = int(scoreOp.get_shape()[1]) respSz = [respSz, respSz] respStride = 8 # calculated from stride of convolutional layers and pooling layers fixedLabel, instanceWeight = createLabels(respSz, opts['lossRPos'] / respStride, opts['lossRNeg'] / respStride, opts['trainBatchSize']) # sio.savemat('labels.mat', {'fixedLabel': fixedLabel, 'instanceWeight': instanceWeight}) opts['rgbMeanZ'] = rgbMeanZ opts['rgbVarZ'] = rgbVarZ opts['rgbMeanX'] = rgbMeanX opts['rgbVarX'] = rgbVarX instanceWeightOp = tf.constant(instanceWeight, dtype=tf.float32) yOp = tf.placeholder(tf.float32, fixedLabel.shape) with tf.name_scope("logistic_loss"): lossOp = sn.loss(scoreOp, yOp, instanceWeightOp) tf.summary.scalar('loss', lossOp) errDispVar = tf.Variable(0, 'tbVarErrDisp', dtype=tf.float32) errDispPH = tf.placeholder(tf.float32, shape=()) errDispSummary = errDispVar.assign(errDispPH) tf.summary.scalar("errDisp", errDispSummary) errMaxVar = tf.Variable(0, 'tbVarErrMax', dtype=tf.float32) errMaxPH = tf.placeholder(tf.float32, shape=()) errMaxSummary = errMaxVar.assign(errMaxPH) tf.summary.scalar("errMax", errMaxSummary) optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=opts['momentum']) # updateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for updateOp in updateOps: # tf.summary.histogram(updateOp.name, updateOp) # with tf.control_dependencies(updateOps): #it seems the variables from bn are already included # optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=opts['momentum']) # GradientDescentOptimizer(learning_rate=lr) grads = optimizer.compute_gradients(lossOp) # gradsLr = [] for grad, var in grads: if grad is not None: if var.name in sn.learningRates: grad *= sn.learningRates[var.name] # tf.summary.histogram(var.name, var) # tf.summary.histogram(var.name+'/gradient', grad) # gradsLr.append([grad, var]) gradsOp = optimizer.apply_gradients(grads_and_vars=grads) batchNormUpdates = tf.get_collection(UPDATE_OPS_COLLECTION) # for var in batchNormUpdates: # tf.summary.histogram(var.name, var) batchNormUpdatesOp = tf.group(*batchNormUpdates) trainOp = tf.group(gradsOp, batchNormUpdatesOp) summaryOp = tf.summary.merge_all() writer = tf.summary.FileWriter(opts['summaryFile']) saver = tf.train.Saver(max_to_keep=40) sess = tf.Session() sess.run(tf.global_variables_initializer()) writer.add_graph(sess.graph) step = 0 epochStep = opts['numPairs'] / opts['trainBatchSize'] for i in range(opts['start'], opts['trainNumEpochs']): trainSamples = opts['numPairs'] * (1 - opts['validation']) sampleNum = 0 errDisp = 0 errMax = 0 sampleIdx = np.random.permutation(int(trainSamples)) while sampleNum < trainSamples: t0 = time.clock() batch = sampleIdx[sampleNum:sampleNum + opts['trainBatchSize']] imoutZ, imoutX = vidGetRandBatch(imdbInd, imdb, batch, opts) score = sess.run(scoreOp, feed_dict={ exemplarOp: imoutZ, instanceOp: imoutX }) errDisp = centerThrErr(score, labels, errDisp, sampleNum) errMax = maxScoreErr(score, labels, errMax, sampleNum) sess.run(trainOp, feed_dict={ exemplarOp: imoutZ, instanceOp: imoutX, yOp: fixedLabel, lr: opts['trainLr'][i] }) _, _, s = sess.run( [errDispSummary, errMaxSummary, summaryOp], feed_dict={ errDispPH: errDisp, errMaxPH: errMax, exemplarOp: imoutZ, instanceOp: imoutX, yOp: fixedLabel, lr: opts['trainLr'][i] }) writer.add_summary(s, step) sampleNum = sampleNum + opts['trainBatchSize'] step = step + 1 print('the %d epoch %d round training is finished in %f' % (i, np.mod(step, epochStep), time.clock() - t0)) if not os.path.exists(opts['ckptPath']): os.mkdir(opts['ckptPath']) ckptName = os.path.join(opts['ckptPath'], 'model_epoch' + str(i) + '.ckpt') saveRes = saver.save(sess, ckptName) valSamples = opts['numPairs'] * opts['validation'] sampleNum = 0 errDisp = 0 errMax = 0 sampleIdx = np.random.permutation(int(valSamples)) + int(trainSamples) while sampleNum < valSamples: t0 = time.clock() batch = sampleIdx[sampleNum:sampleNum + opts['trainBatchSize']] imoutZ, imoutX = vidGetRandBatch(imdbInd, imdb, batch, opts) score = sess.run(scoreOp, feed_dict={ exemplarOp: imoutZ, instanceOp: imoutX }) errDisp = centerThrErr(score, labels, errDisp, sampleNum) errMax = maxScoreErr(score, labels, errMax, sampleNum) _, _, s = sess.run( [errDispSummary, errMaxSummary, summaryOp], feed_dict={ errDispPH: errDisp, errMaxPH: errMax, exemplarOp: imoutZ, instanceOp: imoutX, yOp: fixedLabel, lr: opts['trainLr'][i] }) writer.add_summary(s, step) sampleNum = sampleNum + opts['trainBatchSize'] step = step + 1 print('the %d epoch %d round validation is finished in %f' % (i, np.mod(step, epochStep), time.clock() - t0)) return
def main(_): print('run tracker...') opts = configParams() opts = getOpts(opts) #add #REDE 1 exemplarOp = tf.placeholder( tf.float32, [1, opts['exemplarSize'], opts['exemplarSize'], 3]) instanceOp = tf.placeholder( tf.float32, [opts['numScale'], opts['instanceSize'], opts['instanceSize'], 3]) exemplarOpBak = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['exemplarSize'], opts['exemplarSize'], 3 ]) instanceOpBak = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) isTrainingOp = tf.convert_to_tensor(False, dtype='bool', name='is_training') sn = SiameseNet() scoreOpBak = sn.buildTrainNetwork(exemplarOpBak, instanceOpBak, opts, isTraining=False) saver = tf.train.Saver() writer = tf.summary.FileWriter(opts['summaryFile']) sess = tf.Session() sess2 = tf.Session() saver.restore(sess, opts['modelName']) saver.restore(sess2, opts['modelName']) zFeatOp = sn.buildExemplarSubNetwork(exemplarOp, opts, isTrainingOp) #REDE2 exemplarOp2 = tf.placeholder( tf.float32, [1, opts['exemplarSize'], opts['exemplarSize'], 3]) instanceOp2 = tf.placeholder( tf.float32, [opts['numScale'], opts['instanceSize'], opts['instanceSize'], 3]) exemplarOpBak2 = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['exemplarSize'], opts['exemplarSize'], 3 ]) instanceOpBak2 = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) isTrainingOp2 = tf.convert_to_tensor(False, dtype='bool', name='is_training') sn2 = SiameseNet() # scoreOpBak2 = sn2.buildTrainNetwork(exemplarOpBak2, instanceOpBak2, opts, isTraining=False) saver2 = tf.train.Saver() writer2 = tf.summary.FileWriter(opts['summaryFile']) sess2 = tf.Session() saver2.restore(sess2, opts['modelName']) zFeatOp2 = sn2.buildExemplarSubNetwork(exemplarOp2, opts, isTrainingOp2) imgs, targetPosition, targetSize = loadVideoInfo(opts['seq_base_path'], opts['video']) nImgs = len(imgs) imgs_pil = [Image.fromarray(np.uint8(img)) for img in imgs] im = imgs[POSICAO_PRIMEIRO_FRAME] if (im.shape[-1] == 1): tmp = np.zeros([im.shape[0], im.shape[1], 3], dtype=np.float32) tmp[:, :, 0] = tmp[:, :, 1] = tmp[:, :, 2] = np.squeeze(im) im = tmp avgChans = np.mean( im, axis=(0, 1) ) # [np.mean(np.mean(img[:, :, 0])), np.mean(np.mean(img[:, :, 1])), np.mean(np.mean(img[:, :, 2]))] wcz = targetSize[1] + opts['contextAmount'] * np.sum(targetSize) hcz = targetSize[0] + opts['contextAmount'] * np.sum(targetSize) sz = np.sqrt(wcz * hcz) scalez = opts['exemplarSize'] / sz zCrop, _ = getSubWinTracking(im, targetPosition, (opts['exemplarSize'], opts['exemplarSize']), (np.around(sz), np.around(sz)), avgChans) zCrop2, _ = getSubWinTracking(im, targetPosition, (opts['exemplarSize'], opts['exemplarSize']), (np.around(sz), np.around(sz)), avgChans) if opts['subMean']: pass dSearch = (opts['instanceSize'] - opts['exemplarSize']) / 2 pad = dSearch / scalez sx = sz + 2 * pad minSx = 0.2 * sx maxSx = 5.0 * sx winSz = opts['scoreSize'] * opts['responseUp'] if opts['windowing'] == 'cosine': hann = np.hanning(winSz).reshape(winSz, 1) window = hann.dot(hann.T) elif opts['windowing'] == 'uniform': window = np.ones((winSz, winSz), dtype=np.float32) window = window / np.sum(window) scales = np.array([ opts['scaleStep']**i for i in range(int(np.ceil(opts['numScale'] / 2.0) - opts['numScale']), int(np.floor(opts['numScale'] / 2.0) + 1)) ]) #REDE1 zCrop = np.expand_dims(zCrop, axis=0) zFeat = sess.run(zFeatOp, feed_dict={exemplarOp: zCrop}) zFeat = np.transpose(zFeat, [1, 2, 3, 0]) zFeatConstantOp = tf.constant(zFeat, dtype=tf.float32) scoreOp = sn.buildInferenceNetwork(instanceOp, zFeatConstantOp, opts, isTrainingOp) writer.add_graph(sess.graph) #REDE2 zCrop_original = np.array(zCrop) zFeat_original = sess2.run(zFeatOp2, feed_dict={exemplarOp2: zCrop_original}) zFeat_original = np.transpose(zFeat_original, [1, 2, 3, 0]) zFeat_original_const = tf.constant(zFeat_original, dtype=tf.float32) zFeatConstantOp_original = tf.constant(zFeat_original, dtype=tf.float32) zFeatConstantOp_10_first = tf.constant(zFeat_original, dtype=tf.float32) scoreOp_original = sn.buildInferenceNetwork(instanceOp, zFeatConstantOp_original, opts, isTrainingOp) writer2.add_graph(sess2.graph) resPath = os.path.join(opts['seq_base_path'], opts['video'], 'res') tic = time.time() ltrb = [] zFeatConstantOp_Norm = np.array(zFeatConstantOp_original.shape) for frame in range(POSICAO_PRIMEIRO_FRAME, nImgs): print(('Estamos no frame ' + str(frame + 1)).center(80, '*')) if frame > POSICAO_PRIMEIRO_FRAME: im = imgs[frame] if (im.shape[-1] == 1): tmp = np.zeros([im.shape[0], im.shape[1], 3], dtype=np.float32) tmp[:, :, 0] = tmp[:, :, 1] = tmp[:, :, 2] = np.squeeze(im) im = tmp scaledInstance = sx * scales scaledTarget = np.array([targetSize * scale for scale in scales]) xCrops = makeScalePyramid(im, targetPosition, scaledInstance, opts['instanceSize'], avgChans, None, opts) cv2.imshow('xcrops', xCrops[1, :, :, :] / 255) zCrop, _ = getSubWinTracking( im, targetPosition, (opts['exemplarSize'], opts['exemplarSize']), (np.around(sz), np.around(sz)), avgChans) zCrop = np.expand_dims(zCrop, axis=0) zFeat = sess.run(zFeatOp, feed_dict={exemplarOp: zCrop}) zFeat = np.transpose(zFeat, [1, 2, 3, 0]) zFeat.reshape(1, 6, 6, 256) #zFeatConstantOp = tf.constant(zFeat, dtype=tf.float32)*(1/(frame+1 )) + zFeatConstantOp*(frame/(frame+1 )) zFeatConstantOp = (zFeat) * (1 / (frame + 1)) + zFeatConstantOp * ( frame / (frame + 1)) print('zFeatConstantOp shape: ', zFeatConstantOp.shape) print('media zFeatConstantOp: ', zFeatConstantOp) zFeatConstantOp_Norm.append(zFeatConstantOp) if frame < 50: scoreOp = sn.buildInferenceNetwork(instanceOp, zFeat_original_const, opts, isTrainingOp) else: scoreOp = sn.buildInferenceNetwork(instanceOp, zFeatConstantOp, opts, isTrainingOp) score = sess.run(scoreOp, feed_dict={instanceOp: xCrops}) sio.savemat('score.mat', {'score': score}) newTargetPosition, newScale = trackerEval(score, round(sx), targetPosition, window, opts) targetPosition = newTargetPosition sx = max( minSx, min(maxSx, (1 - opts['scaleLr']) * sx + opts['scaleLr'] * scaledInstance[newScale])) targetSize = (1 - opts['scaleLr']) * targetSize + opts[ 'scaleLr'] * scaledTarget[newScale] else: pass rectPosition = targetPosition - targetSize / 2. tl = tuple(np.round(rectPosition).astype(int)[::-1]) br = tuple(np.round(rectPosition + targetSize).astype(int)[::-1]) imDraw = im.astype(np.uint8) cv2.rectangle(imDraw, tl, br, (0, 255, 255), thickness=3) ''' try: cv2.rectangle(imDraw,tuple([round(i) for i in bbTLD[:2]] ), tuple([round(i) for i in bbTLD[2:]] ),(255,255,0), thickness=2) except: cv2.circle(imDraw,(30,30), 6,(0,50,255), 7) ''' cv2.imshow("tracking - siamese", imDraw) cv2.waitKey(1) ltrb.append(list(tl) + list(br)) print(time.time() - tic) return
def main(_): opts = configParams() opts = getOpts(opts) # opts['trainBatchSize'] = 4 # curation.py should be executed once before imdb = utils.loadImdbFromPkl(opts['curation_path'], opts['crops_train']) rgbMeanZ, rgbVarZ, rgbMeanX, rgbVarX = loadStats(opts['curation_path']) imdb, imdbInd = chooseValSet(imdb, opts) # random seed should be fixed here np.random.seed(opts['randomSeed']) exemplarOp1 = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['exemplarSize'], opts['exemplarSize'], 3 ]) instanceOpt = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) instanceOp = tf.placeholder(tf.float32, [ opts['trainBatchSize'], opts['instanceSize'], opts['instanceSize'], 3 ]) lr = tf.placeholder(tf.float32, shape=()) sn = SiameseNet() # with tf.variable_scope('model') as scope: scoreOp1, l2Op = sn.buildTrainNetwork(exemplarOp1, instanceOp, opts) labels = np.ones([8], dtype=np.float32) respSz = int(scoreOp1.get_shape()[1]) respSz = [respSz, respSz] respStride = 8 # calculated from stride of convolutional layers and pooling layers fixedLabel, instanceWeight = createLabels(respSz, opts['lossRPos'] / respStride, opts['lossRNeg'] / respStride, opts['trainBatchSize']) # fixedLabel, instanceWeight = createLabels_G(respSz, opts['trainBatchSize']) # sio.savemat('labels.mat', {'fixedLabel': fixedLabel, 'instanceWeight': instanceWeight}) opts['rgbMeanZ'] = rgbMeanZ opts['rgbVarZ'] = rgbVarZ opts['rgbMeanX'] = rgbMeanX opts['rgbVarX'] = rgbVarX instanceWeightOp = tf.constant(instanceWeight, dtype=tf.float32) yOp = tf.placeholder(tf.float32, fixedLabel.shape) with tf.name_scope("logistic_loss"): lossOp1 = sn.loss(scoreOp1, yOp, instanceWeightOp) grad1 = tf.gradients(lossOp1, l2Op) scoreOp2 = sn.buildTrainNetwork1(l2Op, grad1[0], instanceOpt, opts) lossOp2 = sn.loss(scoreOp2, yOp, instanceWeightOp) tf.summary.scalar('loss', lossOp2) tf.summary.image('res1', scoreOp1) tf.summary.image('res2', scoreOp2) errDispVar = tf.Variable(0, 'tbVarErrDisp', dtype=tf.float32) errDispPH = tf.placeholder(tf.float32, shape=()) errDispSummary = errDispVar.assign(errDispPH) tf.summary.scalar("errDisp", errDispSummary) errMaxVar = tf.Variable(0, 'tbVarErrMax', dtype=tf.float32) errMaxPH = tf.placeholder(tf.float32, shape=()) errMaxSummary = errMaxVar.assign(errMaxPH) tf.summary.scalar("errMax", errMaxSummary) '''train and save part network''' scala1_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='siamese/scala1/') scala2_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='siamese/scala2/') scala3_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='siamese/scala3/') scala4_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='siamese/scala4/') scala5_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='siamese/scala5/') siamese_original_list = scala1_list + scala2_list + scala3_list + scala4_list + \ scala5_list saver_restore = tf.train.Saver(var_list=siamese_original_list) all_vars = tf.trainable_variables() optimizer4 = tf.train.MomentumOptimizer(learning_rate=lr, momentum=opts['momentum']) var_list4 = [var for var in all_vars if ('_z' in var.name)] grads4 = optimizer4.compute_gradients(lossOp2, var_list=var_list4) gradsOp4 = optimizer4.apply_gradients(grads_and_vars=grads4) batchNormUpdates = tf.get_collection(UPDATE_OPS_COLLECTION) batchNormUpdates_res = [] for var in batchNormUpdates: if '_z' in var.name: batchNormUpdates_res.append(var) batchNormUpdatesOp_res = tf.group(*batchNormUpdates_res) trainOp = tf.group(gradsOp4, batchNormUpdatesOp_res) summaryOp = tf.summary.merge_all() writer = tf.summary.FileWriter(opts['summaryFile']) saver = tf.train.Saver(max_to_keep=40) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) saver_restore.restore(sess, './ckpt/base_256/model_epoch45.ckpt') writer.add_graph(sess.graph) step = 0 epochStep = opts['numPairs'] / opts['trainBatchSize'] for i in range(opts['start'], opts['trainNumEpochs']): trainSamples = opts['numPairs'] * (1 - opts['validation']) sampleNum = 0 errDisp = 0 errMax = 0 sampleIdx = np.random.permutation(int(trainSamples)) j = 0 while sampleNum < trainSamples - 9: j += 1 t0 = time.clock() batch = sampleIdx[sampleNum:sampleNum + opts['trainBatchSize']] imoutZ1, imoutX, imoutX1 = vidGetRandBatch(imdbInd, imdb, batch, opts) score = sess.run(scoreOp2, feed_dict={ exemplarOp1: imoutZ1, instanceOp: imoutX, instanceOpt: imoutX, yOp: fixedLabel, lr: opts['trainLr1'][i] }) #opts['trainLr1'][i] errDisp = centerThrErr(score, labels, errDisp, sampleNum) errMax = maxScoreErr(score, labels, errMax, sampleNum) sess.run(trainOp, feed_dict={ exemplarOp1: imoutZ1, instanceOp: imoutX, instanceOpt: imoutX, yOp: fixedLabel, lr: opts['trainLr1'][i] }) _, _, s, l2, l1, g1 = sess.run( [ errDispSummary, errMaxSummary, summaryOp, lossOp2, lossOp1, grad1 ], feed_dict={ errDispPH: errDisp, errMaxPH: errMax, exemplarOp1: imoutZ1, instanceOp: imoutX, instanceOpt: imoutX, yOp: fixedLabel, lr: opts['trainLr1'][i] }) writer.add_summary(s, step) sampleNum = sampleNum + opts['trainBatchSize'] step = step + 1 print( 'the %d epoch %d round training is finished in %f, errDisp: %f, loss1: %f, loss2: %f' % (i, np.mod(step, epochStep), time.clock() - t0, errDisp, l1, l2)) if not os.path.exists(opts['ckptPath']): os.mkdir(opts['ckptPath']) ckptName = os.path.join(opts['ckptPath'], 'model_epoch' + str(i) + '.ckpt') saveRes = saver.save(sess, ckptName) valSamples = opts['numPairs'] * opts['validation'] sampleNum = 0 errDisp = 0 errMax = 0 sampleIdx = np.random.permutation(int(valSamples)) + int(trainSamples) while sampleNum < valSamples - 9: t0 = time.clock() batch = sampleIdx[sampleNum:sampleNum + opts['trainBatchSize']] imoutZ1, imoutX, imoutX1 = vidGetRandBatch(imdbInd, imdb, batch, opts) score = sess.run(scoreOp2, feed_dict={ exemplarOp1: imoutZ1, instanceOp: imoutX, instanceOpt: imoutX, yOp: fixedLabel, lr: opts['trainLr1'][i] }) errDisp = centerThrErr(score, labels, errDisp, sampleNum) errMax = maxScoreErr(score, labels, errMax, sampleNum) _, _, s, l2, l1 = sess.run( [errDispSummary, errMaxSummary, summaryOp, lossOp2, lossOp1], feed_dict={ errDispPH: errDisp, errMaxPH: errMax, exemplarOp1: imoutZ1, instanceOp: imoutX, instanceOpt: imoutX, yOp: fixedLabel, lr: opts['trainLr1'][i] }) writer.add_summary(s, step) sampleNum = sampleNum + opts['trainBatchSize'] step = step + 1 print( 'the %d epoch %d round training is finished in %f, errDisp: %f, loss1: %f, loss2: %f' % (i, np.mod(step, epochStep), time.clock() - t0, errDisp, l1, l2)) return
rect.set_xy(results[i, :2]) rect.set_width(results[i, 2]) rect.set_height(results[i, 3]) if display: plt.pause(.01) plt.draw() plt.close() return results, gt, nImgs / (time.time() - tic) if __name__ == '__main__': os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['C_CPP_MIN_LOG_LEVEL'] = '3' opts = configParams() opts = getOpts(opts) '''define input tensors and network''' exemplarOp_init = tf.placeholder( tf.float32, [1, opts['exemplarSize'], opts['exemplarSize'], 3]) instanceOp_init = tf.placeholder( tf.float32, [1, opts['instanceSize'], opts['instanceSize'], 3]) instanceOp = tf.placeholder( tf.float32, [3, opts['instanceSize'], opts['instanceSize'], 3]) template_Op = tf.placeholder(tf.float32, [1, 6, 6, 256]) search_tr_Op = tf.placeholder(tf.float32, [3, 22, 22, 32]) isTrainingOp = tf.convert_to_tensor(False, dtype='bool', name='is_training') lr = tf.constant(0.0001, dtype='float32') sn = SiameseNet()