def get_data_queue(args, coord, is_training=True): h, w = map(int, args.input_size.split(',')) input_size_img = (h, w) input_size_label = (h / FEATSTRIDE, w / FEATSTRIDE) # Load reader. if is_training: with tf.name_scope("create_train_inputs"): reader_train = ImageReader( args.data_dir, args.data_train_list, input_size_img, input_size_label, RANDOM_SCALE, IMG_MEAN, coord) image_batch_train, label_batch_train = reader_train.dequeue(args.batch_size) return image_batch_train, label_batch_train else: with tf.name_scope("create_val_inputs"): reader_val = ImageReader( args.data_dir, args.data_val_list, input_size_img, input_size_label, False, IMG_MEAN, coord) image_batch_val, label_batch_val = reader_val.dequeue(args.batch_size, is_training=False) return image_batch_val, label_batch_val
def eval(net, data_dict, ensemble_num, recalls): net.eval() data_set = ImageReader(data_dict, get_transform(DATA_NAME, 'test')) data_loader = DataLoader(data_set, BATCH_SIZE, shuffle=False, num_workers=8) features = [] with torch.no_grad(): for inputs, labels in data_loader: out = net(inputs.to(DEVICE)) out = F.normalize(out) features.append(out.cpu()) features = torch.cat(features, 0) torch.save( features, 'results/{}_test_features_{:03}.pth'.format(DATA_NAME, ensemble_num)) # load feature vectors features = [ torch.load('results/{}_test_features_{:03}.pth'.format(DATA_NAME, d)) for d in range(1, ensemble_num + 1) ] features = torch.cat(features, 1) acc_list = recall(features, data_set.labels, rank=recalls) desc = '' for index, recall_id in enumerate(recalls): desc += 'R@{}:{:.2f}% '.format(recall_id, acc_list[index] * 100) print(desc)
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) # Predictions. global raw_output_ raw_output = tf.nn.softmax(net.outputs, axis=-1) prior1 = self.conf.prior prior = 1 - prior1 class0, class1 = tf.split(raw_output, 2, -1) class0 = (class0 * prior) / (class0 * prior + class1 * prior1) class1 = (class1 * prior1) / (class0 * prior + class1 * prior1) raw_output = tf.concat([class0, class1], -1) raw_output_ = raw_output raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir + '/prediction' + '/' + str(prior1)): #os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction' + '/' + str(prior1)) if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction' + '/' + str(prior1)) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def test_setup(self): # Create queue coordinator. num_layers = 50 self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0) # Create network net, end_points = deeplabv3(self.image_batch, num_classes=self.conf.num_classes, depth=num_layers, is_training=True) raw_output = end_points['resnet{}/logits'.format(num_layers)] # predictions #raw_output = net.o # [batch_size, 41, 41, 21] raw_output = tf.image.resize_bilinear( raw_output, tf.shape(self.image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(self.label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def test_setup(self): # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.valid_data_list, None, False, False, self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name) pass # predictions raw_output = net.outputs raw_output = tf.image.resize_bilinear( raw_output, tf.shape(self.image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(self.label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tcm.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tcm.streaming_mean_iou( self.pred, gt, self.conf.num_classes, weights) # confusion matrix self.confusion_matrix = tcm.confusion_matrix( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables()) pass
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2]) image_batch_075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))])) image_batch_05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))])) # Create network if self.conf.encoder_name not in ['res101', 'res50']: print('encoder_name ERROR!') print("Please input: res101, res50") sys.exit(-1) else: with tf.variable_scope('', reuse=False): net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, False, self.conf.encoder_name) # predictions # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0) raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def predict_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.test_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) # Predictions. raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) print("Vor dem argmax: ", type(raw_output)) # array = raw_output.eval(session=self.sess) # print("Array ist:",array) # raw_output = tf.argmax(raw_output, axis=3) print("nach dem argmax", raw_output) raw_output_sm = tf.nn.softmax(raw_output) self.pred = tf.cast(tf.expand_dims(raw_output_sm, dim=3), tf.float32) print("Prediction is: ", self.pred) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def detect_image_folder(image_folder, output_path, model_config_path, model_checkpoint_path): """ Runs object detection on a folder of images. Saves the results to a csv """ img_reader = ImageReader(image_folder) model = Detector(model_config_path, model_checkpoint_path) results = model.detect_images(img_reader.load_images(), img_reader.filenames) results.to_csv(output_path, index=None)
def predict_setup(self): # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.test_data_list, None, False, False, self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name) pass # Predictions. raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8) # Create directory if not os.path.exists(self.conf.out_dir): os.makedirs(self.conf.out_dir) os.makedirs(self.conf.out_dir + '/prediction') if self.conf.visual: os.makedirs(self.conf.out_dir + '/visual_prediction') # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables()) pass
def __init__(self, images, config, norm=None, shuffle=True): self.images = images # self.true_box_buffer = config['true_box_buffer'] # Maximun objects per box!! self.batch_size = config['batch_size'] self.anchors = config['anchors'] self.nb_anchors = len(config['anchors']) self.img_w, self.img_h = config['image_shape'] self.grid = config['grid'] self.img_encoder = ImageReader(img_width=self.img_w, img_height=self.img_h, norm=norm, grid=self.grid) self.labels = np.array(config['labels']) self.shuffle = shuffle if self.shuffle: np.random.shuffle(self.images)
def train(net, data_dict, optim): net.train() data_set = ImageReader(data_dict, get_transform(DATA_NAME, 'train')) data_loader = DataLoader(data_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8) l_data, t_data, n_data = 0.0, 0, 0 for inputs, labels in data_loader: optim.zero_grad() out = net(inputs.to(DEVICE)) loss = criterion(out, labels.to(DEVICE)) print('loss:{:.4f}'.format(loss.item()), end='\r') loss.backward() optim.step() _, pred = torch.max(out, 1) l_data += loss.item() t_data += torch.sum(pred.cpu() == labels).item() n_data += len(labels) return l_data / n_data, t_data / n_data
def test_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims( image, dim=0), tf.expand_dims(label, dim=0) h_orig, w_orig = tf.to_float(tf.shape( self.image_batch)[1]), tf.to_float(tf.shape(self.image_batch)[2]) image_batch_075 = tf.image.resize_images( self.image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75)) ])) image_batch_05 = tf.image.resize_images( self.image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5)) ])) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': with tf.variable_scope('', reuse=False): net = Deeplab_v2(self.image_batch, self.conf.num_classes, False) with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, False) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, False) else: with tf.variable_scope('', reuse=False): net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, False, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, False, self.conf.encoder_name) # predictions # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) raw_output = tf.image.resize_bilinear( raw_output, tf.shape(self.image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(self.label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # confusion matrix self.confusion_matrix = tf.contrib.metrics.confusion_matrix( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size h, w = (self.conf.input_height, self.conf.input_width) input_size = (h, w) # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue( self.conf.batch_size) image_batch_075 = tf.image.resize_images( self.image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch_05 = tf.image.resize_images( self.image_batch, [int(h * 0.5), int(w * 0.5)]) # #testWWang # image = self.image_batch[0] # label = self.label_batch[0] # utils.save_image(image, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images", # name = image ,mean = IMG_MEAN) # utils.save_image(label, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images", # name=label, mean=IMG_MEAN) # # #end # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': with tf.variable_scope('', reuse=False): net = Deeplab_v2(self.image_batch, self.conf.num_classes, True) with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, True) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name ] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'fc' not in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [v for v in all_trainable if 'fc' in v.name] else: with tf.variable_scope('', reuse=False): net = ResNet_segmentation(self.image_batch, self.conf.num_classes, True, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, True, self.conf.encoder_name) with tf.variable_scope('', reuse=True): net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, True, self.conf.encoder_name) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'resnet_v1' in v.name ] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'resnet_v1' in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [ v for v in all_trainable if 'decoder' in v.name ] decoder_w_trainable = [ v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name ] # lr * 10.0 decoder_b_trainable = [ v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name ] # lr * 20.0 # Check assert (len(all_trainable) == len(decoder_trainable) + len(encoder_trainable)) assert (len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) # [batch_size, h, w] label_proc075 = prepare_label(self.label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) label_proc05 = prepare_label(self.label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [ -1, ]) raw_gt075 = tf.reshape(label_proc075, [ -1, ]) raw_gt05 = tf.reshape(label_proc05, [ -1, ]) indices = tf.squeeze( tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) indices075 = tf.squeeze( tf.where(tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1) indices05 = tf.squeeze( tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, self.conf.num_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, self.conf.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction100, labels=gt) loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction075, labels=gt075) loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction05, labels=gt05) # L2 regularization l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name ] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean( loss100) + tf.reduce_mean(loss075) + tf.reduce_mean( loss05) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # Gradient accumulation # Define a variable to accumulate gradients. accum_grads = [ tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in encoder_trainable + decoder_w_trainable + decoder_b_trainable ] # Define an operation to clear the accumulated gradients for next batch. self.zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads] # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) # Accumulate and normalise the gradients. self.accum_grads_op = [ accum_grads[i].assign_add(grad / self.conf.grad_update_every) for i, grad in enumerate(grads) ] grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) grads_encoder = accum_grads[:len(encoder_trainable)] grads_decoder_w = accum_grads[len(encoder_trainable):( len(encoder_trainable) + len(decoder_w_trainable))] grads_decoder_b = accum_grads[(len(encoder_trainable) + len(decoder_w_trainable)):] # Update params train_op_conv = opt_encoder.apply_gradients( zip(grads_encoder, encoder_trainable)) train_op_fc_w = opt_decoder_w.apply_gradients( zip(grads_decoder_w, decoder_w_trainable)) train_op_fc_b = opt_decoder_b.apply_gradients( zip(grads_decoder_b, decoder_b_trainable)) # Finally, get the train_op! update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model self.loader = tf.train.Saver(var_list=restore_var) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 1, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [self.label_batch, 1, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 1, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=20) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter( self.conf.logdir, graph=tf.get_default_graph())
def main(): args = get_arguments() print('SETUP TrainConfig...') train_cfg = TrainConfig(args) train_cfg.display() # print('SETUP EvalConfig...') eval_cfg = EvalConfig(args) # eval_cfg.display() train_reader = ImageReader(train_cfg) eval_reader = ImageReader(eval_cfg) train_net = ICNet(train_cfg, train_reader, eval_reader) _train_op, _losses, _summaries, _Preds, _IoUs, _Images = train_net.optimizer( ) vis = Visualizer(eval_cfg) global_step = train_net.start_step epoch_step = int( len(train_reader.attribute_list) / train_cfg.BATCH_SIZE + 0.5) start_epoch = int(global_step / epoch_step) save_step = int(epoch_step * train_cfg.SAVE_PERIOD) all_steps = int(len(eval_reader.attribute_list) / (eval_cfg.BATCH_SIZE)) g_eval_step = 0 train_fd = {train_net.handle: train_net.train_handle} eval_fd = {train_net.handle: train_net.eval_handle} for epochs in range(start_epoch, train_cfg.TRAIN_EPOCHS): epoch_loss = None start_batch = global_step % epoch_step print(f'Start batch - {start_batch}') print(f'Epoch step - {epoch_step}') for steps in range(start_batch, epoch_step): start_time = time.time() _, losses = train_net.sess.run([_train_op, _losses], feed_dict=train_fd) if epoch_loss is None: epoch_loss = np.array(losses) else: epoch_loss += np.array(losses) if global_step % save_step == 0: train_net.save(global_step) global_step += 1 duration = time.time() - start_time msg = ( f'''step {global_step} \t total loss = {losses[3]:.3f}, sub4 = {losses[0]:.3f}, ''' f'''sub24 = {losses[1]:.3f}, sub124 = {losses[2]:.3f}, val_loss: {losses[4]:.3f}''' f'''({duration:.3f} sec/step)''') print(msg) epoch_loss /= (epoch_step - start_batch) accuracy = None for steps in range(all_steps - 1): start_time = time.time() IoUs = train_net.sess.run(_IoUs, feed_dict=eval_fd) if accuracy is None: accuracy = np.array(IoUs) else: accuracy += np.array(IoUs) g_eval_step += 1 duration = time.time() - start_time msg = ( f'''step {steps} \t mean_IoU = {IoUs[0]:.3f}, Person_IoU = {IoUs[1]:.3f}, ''' f'''Rider_IoU = {IoUs[2]:.3f}, ({duration:.3f} sec/step)''') print(msg) IoUs, Preds, Images = train_net.sess.run([_IoUs, _Preds, _Images], feed_dict=eval_fd) accuracy += np.array(IoUs) accuracy /= all_steps g_eval_step += 1 vis.save_and_show(Images, Preds, g_eval_step) feed_dict = { train_net.sum_loss: epoch_loss, train_net.sum_acc: accuracy } summaries = train_net.sess.run(_summaries, feed_dict=feed_dict) train_net.writer.add_summary(summaries, epochs)
def train_setup(self, reuse=False): tf.set_random_seed(self.conf.random_seed) num_layers = 50 #----------------------------------------------------------------------------------------- # Create queue coordinator. self.coord = tf.train.Coordinator() self.n_gpu = self.conf.n_gpu # Input size self.input_size = (self.conf.input_height, self.conf.input_width) j_step = 0 with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, self.input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) # print "1"*22 # print reader image_data, image_label = reader.dequeue(self.conf.batch_size) self.image_data = image_data if tf.__version__.startswith('1.'): split_train_data_node = tf.split(image_data, self.n_gpu) split_train_labels_node = tf.split(image_label, self.n_gpu) else: split_train_data_node = tf.split(0, self.n_gpu, image_data) split_train_labels_node = tf.split(0, self.n_gpu, image_label) with tf.variable_scope(tf.get_variable_scope()): all_loss = [] for device_index, (i, self.image_batch, self.label_batch) in enumerate( zip([1], split_train_data_node, split_train_labels_node)): with tf.device('/gpu:%d' % i): #print i with tf.name_scope('%s_%d' % ("gpu", i)) as scope: if j_step == 0: j_step = 1 pass else: reuse = True # net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes, # is_training=self.conf.is_training ,reuse=reuse) net, end_points = deeplabv3( self.image_batch, num_classes=self.conf.num_classes, depth=num_layers, is_training=True, reuse=reuse) self.raw_output = end_points[ 'gpu_{}/resnet{}/logits'.format(i, num_layers)] # Network raw output # [batch_size, 41, 41, 21] output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value) label_proc = prepare_label( self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) # [batch_size, 41, 41] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze( tf.where( tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) raw_prediction = tf.reshape( self.raw_output, [-1, self.conf.num_classes]) # print raw_prediction # print gt prediction = raw_prediction # prediction = tf.expand_dims(raw_prediction, 3) # prediction = tl.act.pixel_wise_softmax(prediction) # print prediction # print label_proc # loss = 1 - tl.cost.dice_coe(prediction, label_proc, axis=[1, 2, 3, 4]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] # Loss function all_loss.append( tf.reduce_mean(loss) + tf.add_n(l2_losses)) tf.get_variable_scope().reuse_variables() # Output size #output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value) # Variables that load from pre-trained model. # For training, last few layers should not be loaded. if self.conf.pretrain_file is not None: restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name ] original_step = int(self.conf.pretrain_file.split("-")[-1]) else: original_step = 0 num_steps = self.conf.num_steps + original_step # Trainable Variables # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # So we remove them from the list. all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] # Fine-tune part conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 # ASPP part fc_trainable = [v for v in all_trainable if 'fc' in v.name] # fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 # fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 # check #assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) #assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Groud Truth: ignoring all labels greater or equal than n_classes #label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, #one_hot=False) # [batch_size, 41, 41] #raw_gt = tf.reshape(label_proc, [-1, ]) #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) #gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) #raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes]) #prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax_cross_entropy loss #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # L2 regularization #l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] # Loss function self.reduced_loss = tf.add_n(all_loss) / self.n_gpu #self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) self.loss_trans = tf.placeholder(dtype=tf.float32, shape=()) self.final_loss = (self.reduced_loss + self.loss_trans) / 2 learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / num_steps), self.conf.power)) #print self.conf.power self.learning_rate = learning_rate #print learning_rate # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt = tf.train.AdamOptimizer(learning_rate, self.conf.momentum, 0.98) #opt= tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) #opt_fc_w = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98) #opt_fc_b = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98) #opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) #opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) #opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads_conv = tf.gradients(self.final_loss, conv_trainable) # train_op = opt.apply_gradients(zip(grads_conv, conv_trainable)) #grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads_conv[:len(conv_trainable)] # grads_fc_w = grads[len(conv_trainable): (len(conv_trainable) + len(fc_w_trainable))] # grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] # Update params train_op_conv = opt.apply_gradients(zip(grads_conv, conv_trainable)) # train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) # train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) # train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) # Finally, get the train_op! self.train_op = train_op_conv # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model if self.conf.pretrain_file is not None: self.loader = tf.train.Saver(var_list=restore_var)
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size self.input_size = (self.conf.input_height, self.conf.input_width) # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.data_list, self.input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size) # Create network net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes, is_training=self.conf.is_training) #net = DeepLabVGGModel(self.image_batch, num_classes=self.conf.num_classes, # is_training=self.conf.is_training) # Network raw output self.raw_output = net.o # [batch_size, 41, 41, 21] self.raw_output=tf.image.resize_bilinear(self.raw_output, [350,350]) print(tf.shape(self.image_batch)) # Output size output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value) # Variables that load from pre-trained model. # For training, last few layers should not be loaded. #restore_var = [v for v in tf.global_variables() if 'fc' not in v.name] #这个是对INIT初始化模型用的 restore_var = [v for v in tf.global_variables() ] #恢复所有的参数。 # Trainable Variables # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # So we remove them from the list. all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name] # Fine-tune part conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 # ASPP part fc_trainable = [v for v in all_trainable if 'fc' in v.name] fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 # check print(len(fc_trainable)) print(len(fc_w_trainable) + len(fc_b_trainable)) assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) # [batch_size, 41, 41] raw_gt = tf.reshape(label_proc, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # L2 regularization l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) #learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (15000+self.curr_step) /(15000+ self.conf.num_steps)), self.conf.power)) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (self.curr_step) /(self.conf.num_steps)), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] # Update params train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) # Finally, get the train_op! self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) #self.train_op = tf.group(train_op_fc_w, train_op_fc_b) #只优化全连接部分 # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5) # Loader for loading the pre-trained model self.loader = tf.train.Saver(var_list=restore_var)
parser.add_argument('--warm_up', default=2, type=int, help='warm up number') parser.add_argument('--recalls', default='1,2,4,8', type=str, help='selected recall') opt = parser.parse_args() # args parse data_path, data_name, backbone_type = opt.data_path, opt.data_name, opt.backbone_type feature_dim, batch_size, num_epochs = opt.feature_dim, opt.batch_size, opt.num_epochs warm_up, recalls = opt.warm_up, [int(k) for k in opt.recalls.split(',')] save_name_pre = '{}_{}_{}'.format(data_name, backbone_type, feature_dim) results = {'train_loss': [], 'train_accuracy': []} for recall_id in recalls: results['test_recall@{}'.format(recall_id)] = [] # dataset loader train_data_set = ImageReader(data_path, data_name, 'train', backbone_type) train_data_loader = DataLoader(train_data_set, batch_size, shuffle=True, num_workers=8) test_data_set = ImageReader(data_path, data_name, 'test', backbone_type) test_data_loader = DataLoader(test_data_set, batch_size, shuffle=False, num_workers=8) # model setup, optimizer config and loss definition model = Model(backbone_type, feature_dim, len(train_data_set.class_to_idx)).cuda() optimizer = AdamW([{'params': model.backbone.parameters()}, {'params': model.refactor.parameters()}, {'params': model.fc.parameters(), 'lr': 1e-2}], lr=1e-4, weight_decay=1e-4) lr_scheduler = StepLR(optimizer, step_size=5, gamma=0.5) loss_criterion = ProxyAnchorLoss() data_base = {'test_images': test_data_set.images, 'test_labels': test_data_set.labels} best_recall = 0.0 for epoch in range(1, num_epochs + 1):
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. # Create network. if args.encoder_name not in ['res101', 'res50']: print('encoder_name ERROR!') print("Please input: res101, res50") sys.exit(-1) else: net = ResNet_segmentation(image_batch, args.num_classes, False, args.encoder_name) # predictions raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) pred = tf.reshape(pred, [ -1, ]) # labels gt = tf.reshape(label_batch, [ -1, ]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, args.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. pred = tf.reshape(pred, [ -1, ]) #groud truth gt = tf.reshape(label_batch, [ -1, ]) indexes = tf.less_equal(gt, args.num_classes - 1) gt = tf.where(indexes, gt, tf.cast(temp, tf.uint8)) weights = tf.cast( indexes, tf.int32) # Ignoring all labels greater than or equal to n_classes. # mIoU mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=args.num_classes, weights=weights) # Pixel accuracy accu, accu_update_op = tf.contrib.metrics.streaming_accuracy( pred, gt, weights=weights) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): preds, _, _ = sess.run([pred, update_op, accu_update_op]) if step % 100 == 0: print('step {:d}'.format(step)) print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) print('Pixel Accuracy: {:.3f}'.format(accu.eval(session=sess))) coord.request_stop() coord.join(threads)
def test_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) self.image_batch = tf.identity( self.image_batch, name='image_batch') self.image_batch -= IMG_MEAN # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name) # predictions raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(self.image_batch)[1:3,]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [-1,], name="predictions") # labels gt = tf.reshape(self.label_batch, [-1,]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # f1 score pred = tf.cast(self.pred, tf.int32) gt = tf.cast(gt, tf.int32) self.areaOverlap = tf.count_nonzero(pred * gt) self.areaGTObj = tf.count_nonzero(gt) self.areaPredicted = tf.count_nonzero(pred) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
data_path, data_name, crop_type, backbone_type = opt.data_path, opt.data_name, opt.crop_type, opt.backbone_type gd_config, feature_dim, smoothing, temperature = opt.gd_config, opt.feature_dim, opt.smoothing, opt.temperature margin, recalls, batch_size = opt.margin, [ int(k) for k in opt.recalls.split(',') ], opt.batch_size num_epochs = opt.num_epochs save_name_pre = '{}_{}_{}_{}_{}_{}_{}_{}_{}'.format( data_name, crop_type, backbone_type, gd_config, feature_dim, smoothing, temperature, margin, batch_size) results = {'train_loss': [], 'train_accuracy': []} for recall_id in recalls: results['test_recall@{}'.format(recall_id)] = [] # dataset loader train_data_set = ImageReader(data_path, data_name, 'train', crop_type) train_sample = MPerClassSampler(train_data_set.labels, batch_size) train_data_loader = DataLoader(train_data_set, batch_sampler=train_sample, num_workers=8) test_data_set = ImageReader(data_path, data_name, 'query' if data_name == 'isc' else 'test', crop_type) test_data_loader = DataLoader(test_data_set, batch_size, shuffle=False, num_workers=8) eval_dict = {'test': {'data_loader': test_data_loader}} if data_name == 'isc': gallery_data_set = ImageReader(data_path, data_name, 'gallery', crop_type)
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size h, w = (self.conf.input_height, self.conf.input_width) input_size = (h, w) # Devices gpu_list = get_available_gpus() zip_encoder, zip_decoder_b, zip_decoder_w, zip_crf = [], [], [], [] previous_crf_names = [] restore_vars = [] self.loaders = [] self.im_list = [] for i in range(len(gpu_list)): with tf.device(gpu_list[i]): # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue( self.conf.batch_size) self.im_list.append(self.image_batch) image_batch_075 = tf.image.resize_images( self.image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch_05 = tf.image.resize_images( self.image_batch, [int(h * 0.5), int(w * 0.5)]) # Create network with tf.variable_scope('', reuse=False): net = Deeplab_v2(self.image_batch, self.conf.num_classes, True, rescale075=False, rescale05=False, crf_type=self.conf.crf_type) with tf.variable_scope('', reuse=True): net075 = Deeplab_v2(image_batch_075, self.conf.num_classes, True, rescale075=True, rescale05=False, crf_type=self.conf.crf_type) with tf.variable_scope('', reuse=True): net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True, rescale075=False, rescale05=True, crf_type=self.conf.crf_type) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if ('fc' not in v.name and 'crfrnn' not in v.name) ] restore_vars.append(restore_var) # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part for name in previous_crf_names: for v in all_trainable: if v.name == name: all_trainable.remove(v) crf_trainable = [ v for v in all_trainable if ('crfrnn' in v.name and v.name not in previous_crf_names ) ] previous_crf_names.extend(v.name for v in crf_trainable) encoder_trainable = [ v for v in all_trainable if 'fc' not in v.name and 'crfrnn' not in v.name ] # lr * 1.0 # Remove encoder_trainable from all_trainable #all_trainable = [v for v in all_trainable if v not in encoder_trainable] # Decoder part decoder_trainable = [ v for v in all_trainable if 'fc' in v.name and 'crfrnn' not in v.name ] decoder_w_trainable = [ v for v in decoder_trainable if ('weights' in v.name or 'gamma' in v.name) and 'crfrnn' not in v.name ] # lr * 10.0 decoder_b_trainable = [ v for v in decoder_trainable if ('biases' in v.name or 'beta' in v.name) and 'crfrnn' not in v.name ] # lr * 20.0 # Check assert (len(all_trainable) == len(decoder_trainable) + len(crf_trainable)) + len(encoder_trainable) assert (len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output100 = net.outputs raw_output075 = net075.outputs raw_output05 = net05.outputs raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) # Ground Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, tf.stack( raw_output.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) # [batch_size, h, w] label_proc075 = prepare_label( self.label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) label_proc05 = prepare_label( self.label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=self.conf.num_classes, one_hot=True) raw_gt = tf.reshape(label_proc, [ -1, ]) raw_gt075 = tf.reshape(label_proc075, [ -1, ]) raw_gt05 = tf.reshape(label_proc05, [ -1, ]) indices = tf.squeeze( tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) indices075 = tf.squeeze( tf.where( tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1) indices05 = tf.squeeze( tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, self.conf.num_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, self.conf.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax_cross_entropy loss #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=raw_prediction, labels=tf.reshape(label_proc[0], (h * w, self.conf.num_classes))) ''' coefficients = [0.01460247, 1.25147725, 2.88479363, 1.20348121, 1.65261654, 1.67514772, 0.62338799, 0.7729363, 0.42038501, 0.98557268, 1.31867536, 0.85313332, 0.67227604, 1.21317965, 1. , 0.24263748, 1.80877607, 1.3082213, 0.79664027, 0.72543945, 1.27823374] ''' #loss = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction) #loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt) loss100 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=raw_prediction100, labels=tf.reshape(label_proc[0], (h * w, self.conf.num_classes))) #loss100 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction100) #loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075) loss075 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=raw_prediction075, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes))) #loss075 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes)), logits=raw_prediction075) #loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05) loss05 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=raw_prediction05, labels=tf.reshape( label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes))) #loss05 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)), logits=raw_prediction05) # L2 regularization l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name ] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean( loss100) + tf.reduce_mean(loss075) + tf.reduce_mean( loss05) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer( learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer( learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer( learning_rate * 20.0, self.conf.momentum) opt_crf = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) # Gradient accumulation # Define a variable to accumulate gradients. accum_grads = [ tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in encoder_trainable + decoder_w_trainable + decoder_b_trainable + crf_trainable ] # Define an operation to clear the accumulated gradients for next batch. self.zero_op = [ v.assign(tf.zeros_like(v)) for v in accum_grads ] # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable + crf_trainable) # Accumulate and normalise the gradients. self.accum_grads_op = [ accum_grads[i].assign_add(grad / self.conf.grad_update_every) for i, grad in enumerate(grads) ] grads_encoder = accum_grads[:len(encoder_trainable)] grads_decoder_w = accum_grads[len(encoder_trainable ):len(encoder_trainable) + len(decoder_w_trainable)] grads_decoder_b = accum_grads[( len(encoder_trainable) + len(decoder_w_trainable)):(len(encoder_trainable) + len(decoder_w_trainable) + len(decoder_b_trainable))] grads_crf = accum_grads[ len(encoder_trainable) + len(decoder_w_trainable) + len(decoder_b_trainable ):] # assuming crf gradients are appended to the end zip_encoder.append(list(zip(grads_encoder, encoder_trainable))) zip_decoder_b.append( list(zip(grads_decoder_b, decoder_b_trainable))) zip_decoder_w.append( list(zip(grads_decoder_w, decoder_w_trainable))) zip_crf.append(list(zip(grads_crf, crf_trainable))) avg_grads_encoder = average_gradients(zip_encoder) avg_grads_decoder_w = average_gradients(zip_decoder_w) avg_grads_decoder_b = average_gradients(zip_decoder_b) avg_grads_crf = average_gradients(zip_crf) for i in range(len(gpu_list)): with tf.device(gpu_list[i]): # Update params train_op_conv = opt_encoder.apply_gradients(avg_grads_encoder) train_op_fc_w = opt_decoder_w.apply_gradients( avg_grads_decoder_w) train_op_fc_b = opt_decoder_b.apply_gradients( avg_grads_decoder_b) train_op_crf = opt_crf.apply_gradients(avg_grads_crf) # Finally, get the train_op! update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_fc_w, train_op_fc_b, train_op_crf) # train_op_conv # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model for i in range(len(gpu_list)): with tf.device(gpu_list[i]): self.loaders.append(tf.train.Saver(var_list=restore_vars[i])) #self.loaders.append(tf.train.Saver(var_list=tf.global_variables())) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, axis=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 1, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [self.label_batch, 1, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 1, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=1) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter( self.conf.logdir, graph=tf.get_default_graph())
int(k) for k in opt.recalls.split(',') ], opt.batch_size num_epochs = opt.num_epochs save_name_pre = '{}_{}_{}_{}_{}_{}_{}_{}'.format(data_name, backbone_type, gd_config, feature_dim, smoothing, temperature, margin, batch_size) results = {'train_loss': [], 'train_accuracy': []} for recall_id in recalls: results['test_recall@{}'.format(recall_id)] = [] process_sop_data(opt.data_dir, opt.df_path) # dataset loader train_data_set = ImageReader(data_path, data_name, 'train') train_sample = MPerClassSampler(train_data_set.labels, batch_size) train_data_loader = DataLoader(train_data_set, batch_sampler=train_sample, num_workers=opt.workers, pin_memory=True) test_data_set = ImageReader(data_path, data_name, 'test') test_data_loader = DataLoader(test_data_set, batch_size, shuffle=False, num_workers=opt.workers, pin_memory=True) eval_dict = {'test': {'data_loader': test_data_loader}} # model setup, model profile, optimizer config and loss definition model = Model(backbone_type,
def train_setup(self): tf.set_random_seed(self.conf.random_seed) # Create queue coordinator. self.coord = tf.train.Coordinator() # Input size input_size = (self.conf.input_height, self.conf.input_width) # Load reader with tf.name_scope("create_inputs"): reader = ImageReader(self.conf.data_dir, self.conf.data_list, input_size, self.conf.random_scale, self.conf.random_mirror, self.conf.ignore_label, IMG_MEAN, self.coord) self.image_batch, self.label_batch = reader.dequeue( self.conf.batch_size) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, True) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name ] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'fc' not in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [v for v in all_trainable if 'fc' in v.name] else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, True, self.conf.encoder_name) # Variables that load from pre-trained model. restore_var = [ v for v in tf.global_variables() if 'resnet_v1' in v.name ] # Trainable Variables all_trainable = tf.trainable_variables() # Fine-tune part encoder_trainable = [ v for v in all_trainable if 'resnet_v1' in v.name ] # lr * 1.0 # Decoder part decoder_trainable = [ v for v in all_trainable if 'decoder' in v.name ] decoder_w_trainable = [ v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name ] # lr * 10.0 decoder_b_trainable = [ v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name ] # lr * 20.0 # Check assert (len(all_trainable) == len(decoder_trainable) + len(encoder_trainable)) assert (len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable)) # Network raw output raw_output = net.outputs # [batch_size, h, w, 21] # Output size output_shape = tf.shape(raw_output) output_size = (output_shape[1], output_shape[2]) # Groud Truth: ignoring all labels greater or equal than n_classes label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze( tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes]) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax_cross_entropy loss loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) # L2 regularization l2_losses = [ self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name ] # Loss function self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Define optimizers # 'poly' learning rate base_lr = tf.constant(self.conf.learning_rate) self.curr_step = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power)) # We have several optimizers here in order to handle the different lr_mult # which is a kind of parameters in Caffe. This controls the actual lr for each # layer. opt_encoder = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum) opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum) opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum) # To make sure each layer gets updated by different lr's, we do not use 'minimize' here. # Instead, we separate the steps compute_grads+update_params. # Compute grads grads = tf.gradients( self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable) grads_encoder = grads[:len(encoder_trainable)] grads_decoder_w = grads[len(encoder_trainable):( len(encoder_trainable) + len(decoder_w_trainable))] grads_decoder_b = grads[(len(encoder_trainable) + len(decoder_w_trainable)):] # Update params train_op_conv = opt_encoder.apply_gradients( zip(grads_encoder, encoder_trainable)) train_op_fc_w = opt_decoder_w.apply_gradients( zip(grads_decoder_w, decoder_w_trainable)) train_op_fc_b = opt_decoder_b.apply_gradients( zip(grads_decoder_b, decoder_b_trainable)) # Finally, get the train_op! update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # for collecting moving_mean and moving_variance with tf.control_dependencies(update_ops): self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Saver for storing checkpoints of the model self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0) # Loader for loading the pre-trained model self.loader = tf.train.Saver(var_list=restore_var) # Training summary # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, input_size) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [self.image_batch, 2, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [self.label_batch, 2, self.conf.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [self.pred, 2, self.conf.num_classes], tf.uint8) self.total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=2) # Concatenate row-wise. if not os.path.exists(self.conf.logdir): os.makedirs(self.conf.logdir) self.summary_writer = tf.summary.FileWriter( self.conf.logdir, graph=tf.get_default_graph())
def full_run_single(video_id, video_dir, static_dir, frame_by_frame_results_dir, static_results_dir, crop_boxes_dir, ignore_mask_dir, detector_config_path, detector_model_path, reid_model_path, reid_model_backbone, crop_results_dir, anomaly_results_dir, bg_interval=4, bg_alpha=0.05, bg_start_frame=1, bg_threshold=5, raw_detect_interval=30, crop_min_obj_size=8, crop_row_capacity=3, crop_box_aspect_ratio=2, ignore_count_thresh=0.08, ignore_area_thresh=2000, ignore_score_thresh=0.1, ignore_gau_sigma=3, abnormal_duration_thresh=60, detect_duration_thresh=6, undetect_duration_thresh=8, bbox_score_thresh=0.3, light_thresh=0.8, anomaly_thresh=0.8, similarity_thresh=0.95, suspicious_duration_thresh=18, detector_verbose_interval=20, verbose=True): """ Runs the full anomaly detection pipeline on a video video_id: video id/name video_dir: folder the video is in static_dir: folder to put the background images in frame_by_frame_results_dir: folder to put the raw video detection results in static_results_dir: folder to put the background image detection results in crop_boxes_dir: folder to put the crop boxes in ignore_mask_dir: folder to put the ignore region mask in detector_config_path: path to detector configuration file detector_model_path: path to detector model checkpoint reid_model_path: path to re-ID model checkpoint reid_model_backbone: re-ID model backbone. eg. "resnet50" bg_interval, bg_alpha, bg_start_frame, bg_threshold: see calc_bg_full_video function raw_detect_interval: number of frames between detection on raw video crop_min_obj_size, crop_row_capacity, crop_box_aspect_ratio: see create_crop_boxes function ignore_count_thresh, ignore_area_thresh, ignore_score_thresh, ignore_gau_sigma: see create_ignore_mask function abnormal_duration_thresh, detect_duration_thresh, undetect_duration_thresh, bbox_score_thresh, light_thresh, anomaly_thresh, similarity_thresh, suspicious_duration_thresh: See get_anomalies function detector_verbose_interval: detector progress printing interval verbose: verbose printing """ # Set up file paths video_path = os.path.join(video_dir, f"{video_id}.mp4") static_images_folder = os.path.join(static_dir, f"{video_id}") fbf_results_path = os.path.join(frame_by_frame_results_dir, f"{video_id}.csv") static_results_path = os.path.join(static_results_dir, f"{video_id}.csv") crop_boxes_path = os.path.join(crop_boxes_dir, f"{video_id}.csv") crop_results_path = os.path.join(crop_results_dir, f"{video_id}.csv") ignore_mask_path = os.path.join(ignore_mask_dir, f"{video_id}.npy") anomaly_results_path = os.path.join(anomaly_results_dir, f"{video_id}.csv") # Create folders os.makedirs(static_images_folder, exist_ok=True) os.makedirs(frame_by_frame_results_dir, exist_ok=True) os.makedirs(static_results_dir, exist_ok=True) os.makedirs(crop_boxes_dir, exist_ok=True) os.makedirs(crop_results_dir, exist_ok=True) os.makedirs(ignore_mask_dir, exist_ok=True) os.makedirs(anomaly_results_dir, exist_ok=True) # Read Video raw_video = VideoReader(video_path) # bg modeling print("Creating background...") calc_bg_full_video(video_path, static_images_folder, bg_interval, bg_alpha, bg_start_frame, bg_threshold, verbose) # Detection detector = Detector(detector_config_path, detector_model_path, detector_verbose_interval, class_restrictions=None) # class_names = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', # 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', # 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', # 'tvmonitor') # detector.model.CLASSES = class_names # detector.class_labels = class_names ## Raw Video print("Detecting raw video...") raw_images, raw_frame_nums = raw_video.load_video(raw_detect_interval) fbf_results = detector.detect_images(raw_images, raw_frame_nums) fbf_results.to_csv(fbf_results_path, index=False) ## Static Images static_reader = ImageReader(static_images_folder) static_frame_names = list( map(lambda f: int(f[:-4]), static_reader.filenames)) # "123.jpg" -> 123 print("Detecting background...") static_results = detector.detect_images(static_reader.load_images(), static_frame_names) static_results.to_csv(static_results_path, index=False) # Perspective Cropping print("Creating crop boxes...") create_crop_boxes( fbf_results_path, crop_boxes_path, raw_video.img_shape, crop_min_obj_size, crop_row_capacity, crop_box_aspect_ratio) # either static/fbf results should work # Should be able to use this in place of normal static images. Doesnt look feasable atm, way too long detection time crop_boxes = pd.read_csv(crop_boxes_path).values print("Detecting cropped background...") crop_detect_results = detector.detect_images(static_reader.load_images(), static_frame_names, crop_boxes=crop_boxes) crop_detect_results.to_csv(crop_results_path) # # Ignore Region print("Creating ingore mask...") create_ignore_mask(fbf_results_path, ignore_mask_path, raw_video.img_shape, ignore_count_thresh, ignore_area_thresh, ignore_score_thresh, ignore_gau_sigma) # Detect anomalies print("Detecting anomalies...") anomalies = get_anomalies_preprocessed( video_path, reid_model_path, fbf_results_path, static_results_path, ignore_mask_path, reid_model_backbone, bg_start_frame, bg_interval, abnormal_duration_thresh, detect_duration_thresh, undetect_duration_thresh, bbox_score_thresh, light_thresh, anomaly_thresh, similarity_thresh, suspicious_duration_thresh, verbose) if anomalies is not None: anomaly_event_times = get_overlapping_time(anomalies) # Save results print("Saving Results...") anomalies.to_csv(anomaly_results_path, index=False) return anomalies, anomaly_event_times else: return [], []