def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() # inp = (torch.ones([1, 3, 320, 320]).cuda(), # torch.ones([1, 3, 320, 320]).cuda(), # torch.ones([1, 1, 320, 320]).cuda()) # pytorch_to_caffe.trans_net(self.model, inp, 'res18') # pytorch_to_caffe.save_prototxt('{}.prototxt'.format('res18')) # pytorch_to_caffe.save_caffemodel('{}.caffemodel'.format('res18')) self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.pause = not opt.no_pause self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)
def input_fn(): all_anchors, num_anchors_list = anchor_creator.get_all_anchors() anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( all_anchors, num_classes=FLAGS.num_classes, allowed_borders=[0.05], positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) list_from_batch, _ = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.data_dir, image_preprocessing_fn, file_pattern=None, reader=None, batch_size=FLAGS.batch_size, num_readers=FLAGS.num_readers, num_preprocessing_threads=FLAGS.num_preprocessing_threads, num_epochs=FLAGS.train_epochs, anchor_encoder=anchor_encoder_decoder.encode_all_anchors) return list_from_batch[-1], { 'targets': list_from_batch[:-1], 'decode_fn': lambda pred: anchor_encoder_decoder.decode_all_anchors([pred])[0], 'num_anchors_list': num_anchors_list }
def input_fn(): all_anchors, num_anchors_list = anchor_creator.get_all_anchors() anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(all_anchors, num_classes = FLAGS.num_classes, allowed_borders = [0.], positive_threshold = FLAGS.rpn_match_threshold, ignore_threshold = FLAGS.rpn_neg_threshold, prior_scaling=[1., 1., 1., 1.],#[0.1, 0.1, 0.2, 0.2], rpn_fg_thres = FLAGS.match_threshold, rpn_bg_high_thres = FLAGS.neg_threshold_high, rpn_bg_low_thres = FLAGS.neg_threshold_low) list_from_batch, _ = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.data_dir, image_preprocessing_fn, file_pattern = None, reader = None, batch_size = FLAGS.batch_size, num_readers = FLAGS.num_readers, num_preprocessing_threads = FLAGS.num_preprocessing_threads, num_epochs = FLAGS.train_epochs, anchor_encoder = anchor_encoder_decoder.encode_all_anchors) #print(list_from_batch[-4], list_from_batch[-3]) return list_from_batch[-1], {'targets': list_from_batch[:-1], 'rpn_decode_fn': lambda pred : anchor_encoder_decoder.decode_all_anchors([pred], squeeze_inner=True)[0], 'head_decode_fn': lambda rois, pred : anchor_encoder_decoder.ext_decode_rois(rois, pred, head_prior_scaling=[1., 1., 1., 1.]), 'rpn_encode_fn': lambda rois : anchor_encoder_decoder.ext_encode_rois(rois, list_from_batch[-4], list_from_batch[-3], FLAGS.roi_one_image, FLAGS.fg_ratio, 0.1, head_prior_scaling=[1., 1., 1., 1.]), 'num_anchors_list': num_anchors_list}
def test(model_name: str, dataset_folder: str, save_folder: str, hypers: HyperParameters, batch_size: Optional[int], max_num_batches: Optional[int], series: DataSeries = DataSeries.TEST): # Create the dataset dataset = get_dataset(hypers.dataset_type, dataset_folder) # Build model and restore trainable parameters model = get_model(hypers, save_folder=save_folder, is_train=False) model.restore(name=model_name, is_train=False, is_frozen=False) # Test the model print('Starting evaluation on {0} set...'.format(series.name.capitalize())) test_results = model.predict(dataset=dataset, test_batch_size=batch_size, max_num_batches=max_num_batches, series=series) # Close the dataset dataset.close() if series == DataSeries.TRAIN: result_file = os.path.join(save_folder, FINAL_TRAIN_LOG_PATH.format(model_name)) elif series == DataSeries.VALID: result_file = os.path.join(save_folder, FINAL_VALID_LOG_PATH.format(model_name)) else: result_file = os.path.join(save_folder, TEST_LOG_PATH.format(model_name)) save_by_file_suffix([test_results], result_file) print('Completed evaluation.')
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.pause = not opt.no_pause self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset)
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.pause = not opt.no_pause self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset) self.motion = opt.motion if self.motion == 'transformer': import sys M3_PATH = '/u/jozhang/code/motion3d/' sys.path.insert(0, M3_PATH) from models.transformer import DPTransformer # motion = DPTransformer(2, 64, {'depth': 3, 'heads': 8, 'dim_head': 8, 'mlp_dim': 64, 'dropout': 0.}) # trans_path = '/scratch/cluster/jozhang/logs/hydra/2021-01-30/15-36-54/models/ckpt-latest.dat' ckpt = torch.load(opt.transformer_load_path) self.transformer = ckpt['model'].cuda() print( f'Using transformer motion loaded from {opt.transformer_load_path}' ) elif self.motion == 'zero': print(f'Using no motion model') elif self.motion == 'cttrack': print(f'Using cttrack motion model') else: assert False, f'Do not recognize such motion model {self.motion}' self.negate_motion = opt.negate_motion if self.negate_motion: logging.warning('Motion is being negated! Are you sure?') self.all_pre_images = []
def make_dataset(model_name: str, save_folder: str, dataset_type: str, dataset_folder: Optional[str]) -> Dataset: metadata_file = os.path.join(save_folder, METADATA_PATH.format(model_name)) metadata = read_by_file_suffix(metadata_file) # Infer the dataset if dataset_folder is None: dataset_folder = os.path.dirname(metadata['data_folders'][TRAIN.upper()]) # Validate the dataset folder assert os.path.exists(dataset_folder), 'The dataset folder {0} does not exist!'.format(dataset_folder) return get_dataset(dataset_type=dataset_type, data_folder=dataset_folder)
def main(opt): print('Creating model...') torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) if not opt.not_set_cuda_env: os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) optimizer = get_optimizer(opt, model) if opt.load_model != '': model, optimizer, start_epoch = load_model( model, opt.load_model, opt, optimizer)
def init_dataset(self): ######################################################## ##### 创建一个dataset容器来读取数据集 ####################################################### # select the dataset print("load dataset", self.dataset_path) self.dataset = dataset_factory.get_dataset( self.dataset_name, self.dataset_path, ) ############################ #### 选择图像增强的方式 ############################ image_processing_fn = preprocessing_factory.get_preprocessing( self.model_name, is_training=True, ) with tf.device(self.deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( self.dataset, num_readers=4, common_queue_capacity=20 * self.batch_size, common_queue_min=10 * self.batch_size) [image, label] = provider.get(['image', 'label']) train_image_size = self.image_size or self.network_fn.default_image_size image = image_processing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch([image, label], batch_size=self.batch_size, num_threads=4, capacity=5 * self.batch_size) labels = slim.one_hot_encoding(labels, self.dataset.num_classes) self.batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * self.deploy_config.num_clones) print("Load dataset")
def train(data_folder: str, save_folder: str, hypers: HyperParameters, should_print: bool, max_epochs: Optional[int] = None) -> str: model = get_model(hypers, save_folder=save_folder, is_train=True) # Create dataset dataset = get_dataset(hypers.dataset_type, data_folder) if max_epochs is not None: hypers.epochs = max_epochs # Train the model train_label = model.train(dataset=dataset, should_print=should_print) # Close the dataset files dataset.close() return train_label
def __init__(self, opt): if opt.gpus[0] >= 0: opt.device = torch.device("cuda") else: opt.device = torch.device("cpu") print("Creating model...") self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(opt.device) self.model.eval() self.opt = opt self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) # self.pause = not opt.no_pause self.rest_focal_length = (self.trained_dataset.rest_focal_length if self.opt.test_focal_length < 0 else self.opt.test_focal_length) self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.dataset = opt.dataset if self.dataset == "nuscenes": self.tracker = {} for class_name in NUSCENES_TRACKING_NAMES: self.tracker[class_name] = Tracker(opt, self.model) else: self.tracker = Tracker(opt, self.model) self.debugger = Debugger(opt=opt, dataset=self.trained_dataset) self.img_height = 100 self.img_width = 100
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset, opt.task) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) logger = Logger(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') print('Creating model...') # Model creation -> pick backbone, heads, and head convolution model = create_model(opt.arch, opt.heads, opt.head_conv) optimizer = torch.optim.Adam(model.parameters(), opt.lr) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model( model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) Trainer = train_factory[opt.task] trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up data...') val_loader = torch.utils.data.DataLoader( Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True ) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return if opt.export_onnx: print('Exporting onnx model') # TODO: adapt the input size to the onnx width = opt.input_res height = opt.input_res # create a dummy input that would be used to export the model #dummy_input = torch.randn(10, 3, width, height, device='cuda') # this method does not support variable input sizes #torch.onnx.export(model, dummy_input, # os.path.join(opt.save_dir, 'model.onnx'), # verbose=True) flops, params = profile(model, input_size=(1,3,width, height), device='cuda') print(width, height, flops, params) print('Model exported. Done!') return train_loader = torch.utils.data.DataLoader( Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True ) print('Starting training...') best = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Create global_step global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label, filename] = provider.get(['image', 'label', 'filename']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) #################### # Define the model # #################### # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) # Variables to train. variables_to_train = _get_variables_to_train() predictions, _ = network_fn(images) labels = tf.squeeze(labels) total_loss = slim.losses.softmax_cross_entropy(predictions, labels) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) train_op = slim.learning.create_train_op(total_loss, optimizer) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') saver = tf.train.Saver(max_to_keep=20) ########################### # Kicks off the training. # ########################### slim.learning.train( train_op, saver=saver, logdir=FLAGS.train_dir, init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size, ) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size, ) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), # 'Recall_5': slim.metrics.streaming_recall_at_k( # logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path # # checkpoint_path_list = ['/home/cp/git/tensorflow/models/research/slim/logs/train/vgg16/model.ckpt-330','/home/cp/git/tensorflow/models/research/slim/logs/train/vgg16/model.ckpt-382'] # # for checkpoint_path in checkpoint_path_list: tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
def __init__(self, camera_stream, obstacle_tracking_stream, flags, camera_setup): from dataset.dataset_factory import get_dataset from model.model import create_model, load_model from opts import opts from utils.tracker import Tracker camera_stream.add_callback(self.on_frame_msg, [obstacle_tracking_stream]) self._flags = flags self._logger = erdos.utils.setup_logging(self.config.name, self.config.log_file_name) self._csv_logger = erdos.utils.setup_csv_logging( self.config.name + '-csv', self.config.csv_log_file_name) self._camera_setup = camera_setup # TODO(ionel): Might have to filter labels when running with a coco # and a nuscenes model. num_classes = { 'kitti_tracking': 3, 'coco': 90, 'mot': 1, 'nuscenes': 10 } # Other flags: # 1) --K ; max number of output objects. # 2) --fix_short ; resizes the height of the image to fix short, and # the width such the aspect ratio is maintained. # 3) --pre_hm ; pre heat map. # 4) --input_w; str(camera_setup.width) # 5) --input_h; str(camera_setup.height) args = [ 'tracking', '--load_model', flags.center_track_model_path, '--dataset', flags.center_track_model, '--test_focal_length', str(int(camera_setup.get_focal_length())), '--out_thresh', str(flags.obstacle_detection_min_score_threshold), '--pre_thresh', str(flags.obstacle_detection_min_score_threshold), '--new_thresh', str(flags.obstacle_detection_min_score_threshold), '--track_thresh', str(flags.obstacle_detection_min_score_threshold), '--max_age', str(flags.obstacle_track_max_age), '--num_classes', str(num_classes[flags.center_track_model]), '--tracking', '--hungarian' ] opt = opts().init(args) gpu = True if gpu: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') self.opt = opt self.model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) self.model = load_model(self.model, opt.load_model, opt) self.model = self.model.to(self.opt.device) self.model.eval() self.trained_dataset = get_dataset(opt.dataset) self.mean = np.array(self.trained_dataset.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(self.trained_dataset.std, dtype=np.float32).reshape(1, 1, 3) self.rest_focal_length = self.trained_dataset.rest_focal_length \ if self.opt.test_focal_length < 0 else self.opt.test_focal_length self.flip_idx = self.trained_dataset.flip_idx self.cnt = 0 self.pre_images = None self.pre_image_ori = None self.tracker = Tracker(opt)
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) if not opt.not_set_cuda_env: os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') logger = Logger(opt) print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) optimizer = get_optimizer(opt, model) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, opt, optimizer) ############################################3333 #freezing backbone and one head for param in model.parameters(): # print(param) param.requires_grad = False req_grad = ["model.hm_bdd", "model.wh_bdd", "model.reg_bdd"] # for hd in model.reg_tl: for custom_head in (req_grad): for hd in eval(custom_head): # print(hd.parameters()) for wt in hd.parameters(): # print(wt) wt.requires_grad = True ###################################################### trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) if opt.val_intervals < opt.num_epochs or opt.test: print('Setting up validation data...') val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return print('Setting up train data...') train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) if opt.eval_val: val_loader.dataset.run_eval(preds, opt.save_dir) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') # if epoch in opt.save_point: if epoch % opt.save_point[0] == 0: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) if epoch in opt.lr_step: lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset, prediction_model=True) if not opt.not_set_cuda_env: os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpus_str opt.device = torch.device("cuda" if opt.gpus[0] >= 0 else "cpu") device = opt.device logger = Logger(opt) print("Creating model...") model = DecoderRNN(128, opt) optimizer = get_optimizer(opt, model) start_epoch = 0 if opt.load_model_traj != "": model, optimizer, start_epoch = load_model(model, opt.load_model, opt, optimizer) loss_function = torch.nn.SmoothL1Loss() for i, param in enumerate(model.parameters()): param.requires_grad = True train_loader = torch.utils.data.DataLoader( Dataset(opt, "train"), batch_size=1, shuffle=True, num_workers=16, pin_memory=True, drop_last=True, ) for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.to(device=device, non_blocking=True) model = model.to(device) loss_function = loss_function.to(device) print("Starting training...") for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else "last" for iter_id, (inputs, targets) in enumerate(train_loader): inputs = inputs.to(device=device).float() targets = targets.to(device=device).view(1, -1).float() outputs = model(inputs) loss = loss_function(outputs, targets) if 100 * loss.item() < 20: loss = 100 * loss else: loss = 10 * loss optimizer.zero_grad() loss.backward() optimizer.step() del outputs, loss save_model(os.path.join(opt.save_dir, "model_last.pth"), epoch, model, optimizer) logger.write("\n") save_model( os.path.join(opt.save_dir, "model_{}.pth".format(epoch)), epoch, model, optimizer, ) if epoch in opt.lr_step: lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) for param_group in optimizer.param_groups: param_group["lr"] = lr logger.close()
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() #"""Build PNASNet Large model for the ImageNet Dataset.""" logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add( tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None if FLAGS.quantize_delay >= 0: tf.contrib.quantize.create_training_graph( quant_delay=FLAGS.quantize_delay) ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, total_num_replicas=FLAGS.worker_replicas, variable_averages=variable_averages, variables_to_average=moving_average_variables) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') print(optimizer) ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, startup_delay_steps=GetModelStep(FLAGS.train_dir), number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
save_by_file_suffix([comparison_log], comparison_log_path) if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('--adaptive-model-paths', type=str, required=True, nargs='+', help='Paths to the Budget RNNs. Can be a directory containing the models.') parser.add_argument('--adaptive-log', type=str, required=True, help='Path to the merged Budget RNN simulation log.') parser.add_argument('--baseline-logs', type=str, required=True, nargs='+', help='Paths to the baseline logs to compare against.') parser.add_argument('--dataset-folder', type=str, required=True, help='Path to the dataset folder.') parser.add_argument('--sensor-type', type=str, choices=['bluetooth', 'temp'], required=True, help='The sensor type. Should align with the simulation results.') parser.add_argument('--should-print', action='store_true', help='Whether to print to stdout during execution.') args = parser.parse_args() # Load the target data-set dataset = get_dataset(dataset_type='standard', data_folder=args.dataset_folder) # Unpack the power system type power_type = PowerType[args.sensor_type.upper()] # Load the adaptive model results and controllers adaptive_result_dict: Dict[str, ModelResults] = dict() adaptive_system_dict: Dict[str, RuntimeSystem] = dict() power_system_dict: Dict[str, PowerSystem] = dict() # Expand the model paths by unpacking directories model_paths: List[str] = [] for model_path in args.adaptive_model_paths: if os.path.isdir(model_path): model_paths.extend(iterate_files(model_path, pattern=r'.*model-SAMPLE_RNN-.*')) model_paths.extend(iterate_files(model_path, pattern=r'.*model-BUDGET_RNN-.*'))
import os import torch import torch.utils.data from opts import opts from model.model import create_model, load_model, save_model from model.data_parallel import DataParallel from logger import Logger from dataset.dataset_factory import get_dataset from trainer import Trainer from main import get_optimizer if __name__ == '__main__': opt = opts().parse() torch.manual_seed(opt.seed) Dataset = get_dataset(opt.dataset) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) path_1 = '/mnt/3dvision-cpfs/zhuoyu/CenterTrack/exp/ddd/nu_3d_det_uni/model_last.pth' path_2 = '/mnt/3dvision-cpfs/zhuoyu/CenterTrack/exp/ddd/nu_3d_det_fix_param/model_last.pth' model_1 = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) model_2 = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) optimizer = get_optimizer(opt, model_1) model_1, _, _ = load_model(model_1, path_1, opt, optimizer) model_2, _, _ = load_model(model_2, path_2, opt, optimizer) for p1, p2 in zip(model_1.parameters(), model_2.parameters()): if p1.data.ne(p2.data).sum() > 0: print(False)
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) # Log our parameters into mlflow for key, value in vars(opt).items(): mlflow.log_param(key, value) if not opt.not_set_cuda_env: os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') logger = Logger(opt) print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) optimizer = get_optimizer(opt, model) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, opt, optimizer) trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) if opt.val_intervals < opt.num_epochs or opt.test: print('Setting up validation data...') val_loader = torch.utils.data.DataLoader(Dataset( opt, 'val', opt.data_name), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return print('Setting up train data...') train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train', opt.data_name), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') best = 1e10 best_epoch = 1e10 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) mlflow.log_metric('train_{}'.format(k), v, step=epoch) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) if opt.eval_val: val_loader.dataset.run_eval(preds, opt.save_dir) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) mlflow.log_metric('val_{}'.format(k), v, step=epoch) if log_dict_val[opt.metric] < best: best = log_dict_val[opt.metric] best_epoch = epoch save_model(os.path.join(opt.save_dir, 'model_best.pth'), epoch, model) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.save_point: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) # early stopping if isinstance(opt.early_stopping, int): if epoch - best_epoch > opt.early_stopping: msg = 'Stopped {} epoch. Best epoch is {}, score is {}.'.format( epoch, best_epoch, best) print(msg) logger.write(msg) break if epoch in opt.lr_step: lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) if not opt.not_set_cuda_env: os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') logger = Logger(opt) print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) optimizer = get_optimizer(opt, model) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, opt, optimizer) trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) print('Setting up train data...') train_loader = torch.utils.data.DataLoader(Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') # for each epoch, record scale bestmota = 0 bestepoch = 0 for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) # with torch.no_grad(): # log_dict_val, preds = trainer.val(epoch, val_loader) # if opt.eval_val: # val_loader.dataset.run_eval(preds, opt.save_dir) # for k, v in log_dict_val.items(): # logger.scalar_summary('val_{}'.format(k), v, epoch) # logger.write('{} {:8f} | '.format(k, v)) valset = '17halfval' mota, motp = prefetch_test(opt, valset) if mota > bestmota: bestmota = mota bestepoch = epoch print('mota = {}, motp = {}, bestmota = {}, bestepoch = {}'.format( mota, motp, bestmota, bestepoch)) logger.write('\n') if epoch in opt.save_point: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) if epoch in opt.lr_step: lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def input_fn(): out_shape = [FLAGS.train_image_size] * 2 anchor_creator = anchor_manipulator_v2.AnchorCreator( out_shape, layers_shapes=[(24, 24), (12, 12), (6, 6)], anchor_scales=[(0.1, ), (0.2, 0.375, 0.55), (0.725, 0.9)], extra_anchor_scales=[(0.1414, ), (0.2739, 0.4541, 0.6315), (0.8078, 0.9836)], anchor_ratios=[(2., .5), (2., 3., .5, 0.3333), (2., .5)], layer_steps=[16, 32, 64]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors( ) num_anchors_per_layer = [] for ind in range(len(all_anchors)): num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) anchor_encoder_decoder = anchor_manipulator_v2.AnchorEncoder( allowed_borders=[1.0] * 6, positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) image_preprocessing_fn = lambda image_, shape_, glabels_, gbboxes_: preprocessing_factory.get_preprocessing( 'xdet_resnet', is_training=True )(image_, glabels_, gbboxes_, out_shape=out_shape, data_format=('NCHW' if FLAGS.data_format == 'channels_first' else 'NHWC')) anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors( glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) image, shape, loc_targets, cls_targets, match_scores = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.data_dir, image_preprocessing_fn, file_pattern=None, reader=None, batch_size=FLAGS.batch_size, num_readers=FLAGS.num_readers, num_preprocessing_threads=FLAGS.num_preprocessing_threads, num_epochs=FLAGS.train_epochs, anchor_encoder=anchor_encoder_fn) global global_anchor_info global_anchor_info = { 'decode_fn': lambda pred: anchor_encoder_decoder.decode_all_anchors( pred, num_anchors_per_layer), 'num_anchors_per_layer': num_anchors_per_layer, 'all_num_anchors_depth': all_num_anchors_depth } return image, { 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores }
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.eval Dataset = get_dataset(opt.dataset) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) if not opt.not_set_cuda_env: os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') logger = Logger(opt) print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) optimizer = get_optimizer(opt, model) start_epoch = 0 lr = opt.lr if opt.load_model != '': model, optimizer, start_epoch = load_model( model, opt.load_model, opt, optimizer) trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) if opt.val_intervals < opt.num_epochs or opt.eval: print('Setting up validation data...') val_loader = torch.utils.data.DataLoader( Dataset(opt, opt.val_split), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) if opt.eval: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir, n_plots=opt.eval_n_plots, render_curves=opt.eval_render_curves) return print('Setting up train data...') train_loader = torch.utils.data.DataLoader( Dataset(opt, opt.train_split), batch_size=opt.batch_size, shuffle=opt.shuffle_train, num_workers=opt.num_workers, pin_memory=True, drop_last=True ) print('Starting training...') for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' # log learning rate for param_group in optimizer.param_groups: lr = param_group['lr'] logger.scalar_summary('LR', lr, epoch) break # train one epoch log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) # log train results for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) # evaluate if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) # evaluate val set using dataset-specific evaluator if opt.run_dataset_eval: out_dir = val_loader.dataset.run_eval(preds, opt.save_dir, n_plots=opt.eval_n_plots, render_curves=opt.eval_render_curves) # log dataset-specific evaluation metrics with open('{}/metrics_summary.json'.format(out_dir), 'r') as f: metrics = json.load(f) logger.scalar_summary('AP/overall', metrics['mean_ap']*100.0, epoch) for k,v in metrics['mean_dist_aps'].items(): logger.scalar_summary('AP/{}'.format(k), v*100.0, epoch) for k,v in metrics['tp_errors'].items(): logger.scalar_summary('Scores/{}'.format(k), v, epoch) logger.scalar_summary('Scores/NDS', metrics['nd_score'], epoch) # log eval results for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) # save this checkpoint else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.save_point: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) # update learning rate if epoch in opt.lr_step: lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) if not opt.not_set_cuda_env: os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpus_str opt.device = torch.device("cuda" if opt.gpus[0] >= 0 else "cpu") logger = Logger(opt) print("Creating model...") model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) optimizer = get_optimizer(opt, model) start_epoch = 0 if opt.load_model != "": model, optimizer, start_epoch = load_model( model, opt.load_model, opt, optimizer ) for i, param in enumerate(model.parameters()): param.requires_grad = True trainer = Trainer(opt, model, optimizer) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) if opt.val_intervals < opt.num_epochs or opt.test: print("Setting up validation data...") val_loader = torch.utils.data.DataLoader( Dataset(opt, "val"), batch_size=1, shuffle=False, num_workers=1, pin_memory=True, ) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return print("Setting up train data...") train_loader = torch.utils.data.DataLoader( Dataset(opt, "train"), batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True, ) print("Starting training...") for epoch in range(start_epoch + 1, opt.num_epochs + 1): save_model( os.path.join(opt.save_dir, "model_{}.pth".format(epoch)), epoch, model, optimizer, ) mark = epoch if opt.save_all else "last" log_dict_train, _ = trainer.train(epoch, train_loader) logger.write("epoch: {} |".format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary("train_{}".format(k), v, epoch) logger.write("{} {:8f} | ".format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model( os.path.join(opt.save_dir, "model_{}.pth".format(mark)), epoch, model, optimizer, ) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) if opt.eval_val: val_loader.dataset.run_eval(preds, opt.save_dir) for k, v in log_dict_val.items(): logger.scalar_summary("val_{}".format(k), v, epoch) logger.write("{} {:8f} | ".format(k, v)) else: save_model( os.path.join(opt.save_dir, "model_last.pth"), epoch, model, optimizer ) logger.write("\n") # if epoch in opt.save_point: save_model( os.path.join(opt.save_dir, "model_{}.pth".format(epoch)), epoch, model, optimizer, ) if epoch in opt.lr_step: lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) print("Drop LR to", lr) for param_group in optimizer.param_groups: param_group["lr"] = lr logger.close()
def main(opt): torch.manual_seed(opt.seed) torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test Dataset = get_dataset(opt.dataset) print(Dataset) opt = opts().update_dataset_info_and_set_heads(opt, Dataset) print(opt) if not opt.not_set_cuda_env: os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') logger = Logger(opt) print('Creating model...') model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) if opt.fix_backbone: for param in model.backbone.parameters(): param.requires_grad = False if opt.fix_dla_up: for param in model.neck.dla_up.parameters(): param.requires_grad = False if opt.fix_ida_up: for param in model.neck.ida_up.parameters(): param.requires_grad = False optimizer = get_optimizer(opt, model) start_epoch = 0 if opt.load_model != '': model, optimizer, start_epoch = load_model(model, opt.load_model, opt, optimizer) trainer = Trainer(opt, model, optimizer, logger) trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) if opt.val_intervals < opt.num_epochs or opt.test: print('Setting up validation data...') val_loader = torch.utils.data.DataLoader(Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) if opt.test: _, preds = trainer.val(0, val_loader) val_loader.dataset.run_eval(preds, opt.save_dir) return print('Setting up train data...') if opt.using_randomly_half: test_data = Dataset(opt, 'train') length = len(test_data) torch.random.manual_seed(opt.seed) actual_dataset, _ = torch.utils.data.random_split( test_data, [ int(length * opt.use_percent), length - int(length * opt.use_percent) ]) else: actual_dataset = Dataset(opt, 'train') train_loader = torch.utils.data.DataLoader(actual_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True) print('Starting training...') for epoch in range(start_epoch + 1, opt.num_epochs + 1): mark = epoch if opt.save_all else 'last' log_dict_train, _ = trainer.train(epoch, train_loader) logger.write('epoch: {} |'.format(epoch)) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), epoch, model, optimizer) with torch.no_grad(): log_dict_val, preds = trainer.val(epoch, val_loader) if opt.eval_val: val_loader.dataset.run_eval(preds, opt.save_dir) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) else: save_model(os.path.join(opt.save_dir, 'model_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.save_point: save_model( os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), epoch, model, optimizer) if epoch in opt.lr_step: lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr logger.close()
def main(_): print(tf.gfile.Glob('./debug/example_01?.jpg')) if not FLAGS.data_dir: raise ValueError( 'You must supply the dataset directory with --data_dir') tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): global_step = slim.create_global_step() #print(tf.gfile.Glob('./debug/example_01?.jpg')) preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = lambda image_, shape_, glabels_, gbboxes_: preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True)(image_, glabels_, gbboxes_, out_shape= [FLAGS.train_image_size] * 2, data_format=DATA_FORMAT) anchor_creator = anchor_manipulator.AnchorCreator( [FLAGS.train_image_size] * 2, layers_shapes=[(38, 38), (19, 19), (10, 10)], anchor_scales=[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], extra_anchor_scales=[[0.15], [0.35], [0.55]], anchor_ratios=[[2, .5], [2, .5, 3, 1. / 3], [2, .5, 3, 1. / 3]], layer_steps=[8, 16, 32]) all_anchors = anchor_creator.get_all_anchors()[0] # sess = tf.Session() # print(all_anchors) # print(sess.run(all_anchors)) anchor_operator = anchor_manipulator.AnchorEncoder( all_anchors, num_classes=FLAGS.num_classes, ignore_threshold=0., prior_scaling=[0.1, 0.1, 0.2, 0.2]) #anchor_encoder_fn = lambda next_iter, _ = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.data_dir, image_preprocessing_fn, file_pattern=None, reader=None, batch_size=FLAGS.batch_size, num_readers=FLAGS.num_readers, num_preprocessing_threads=FLAGS.num_preprocessing_threads, anchor_encoder=anchor_operator.encode_all_anchors) sess = tf.Session() sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer())) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) count = 0 start_time = time.time() try: while not coord.should_stop(): count += 1 _ = sess.run([next_iter]) if count % 10 == 0: time_elapsed = time.time() - start_time print('time: {}'.format(time_elapsed / 10.)) start_time = time.time() except tf.errors.OutOfRangeError: log.info('Queue Done!') finally: pass # Wait for threads to finish. coord.join(threads) sess.close() for i in range(6): list_from_batch = sess.run(next_iter) # imsave('./debug/example_%03d.jpg' % (i,), list_from_batch[0][0]) # imsave('./debug/example_%03d_.jpg' % (i,), list_from_batch[1][0]) image = list_from_batch[-1] shape = list_from_batch[-2] glabels = list_from_batch[:len(all_anchors)] gtargets = list_from_batch[len(all_anchors):2 * len(all_anchors)] gscores = list_from_batch[2 * len(all_anchors):3 * len(all_anchors)] imsave('./debug/example_%03d.jpg' % (i, ), image[0]) print(image.shape, shape.shape, glabels[0].shape, gtargets[0].shape, gscores[0].shape)