ssd_log_path = io_utils.get_log_path(args.backbone) ssd_model_path = io_utils.get_model_path(args.backbone) hyper_params = train_utils.get_hyper_params(args.backbone) _, info = data_utils.get_dataset("voc/2007", "train+validation") _, voc_2012_info = data_utils.get_dataset("voc/2012", "train+validation") voc_2012_total_items = data_utils.get_total_item_size( voc_2012_info, "train+validation") train_total_items = data_utils.get_total_item_size(info, "train+validation") val_total_items = data_utils.get_total_item_size(info, "test") if args.with_voc12: train_total_items += voc_2012_total_items labels = data_utils.get_labels(info) labels = ["bg"] + labels hyper_params["total_labels"] = len(labels) step_size_train = train_utils.get_step_size(train_total_items, args.batch_size) step_size_val = train_utils.get_step_size(val_total_items, args.batch_size) num_train_steps = 10 if args.smoke_test else step_size_train num_eval_steps = 10 if args.smoke_test else step_size_val trainer = TFTrainer(model_creator=model_creator, data_creator=dataset_creator, num_replicas=args.num_replicas, use_gpu=args.use_gpu, verbose=True,
def main(): args = config() args.do_train = args.do_train.lower() args.do_test = args.do_test.lower() if ( os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir ): raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format( args.output_dir ) ) # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, ) formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s", "%m/%d/%Y %H:%M:%S") logging_fh = logging.FileHandler(os.path.join(args.output_dir, 'log.txt')) logging_fh.setLevel(logging.DEBUG) logging_fh.setFormatter(formatter) logger.addHandler(logging_fh) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) # Set seed set_seed(args) labels = get_labels(args.data_dir, args.dataset) num_labels = len(labels) # Use cross entropy ignore index as padding label id so that only real label ids contribute to the loss later pad_token_label_id = CrossEntropyLoss().ignore_index # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab tokenizer = RobertaTokenizer.from_pretrained( args.tokenizer_name, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, ) if args.local_rank == 0: torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab logger.info("Training/evaluation parameters %s", args) # Training if args.do_train=="true": train_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, mode="train") global_step, tr_loss, best_results = train(args, train_dataset, tokenizer, labels, pad_token_label_id) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Testing if args.do_test=="true" and args.local_rank in [-1, 0]: best_test = [0, 0, 0] for tors in MODEL_NAMES: best_test = predict(args, tors, labels, pad_token_label_id, best_test)
use_custom_images = False custom_image_path = "data/images/" # If you have trained faster rcnn model you can load weights from faster rcnn model load_weights_from_frcnn = False backbone = args.backbone io_utils.is_valid_backbone(backbone) if backbone == "mobilenet_v2": from models.rpn_mobilenet_v2 import get_model else: from models.rpn_vgg16 import get_model hyper_params = train_utils.get_hyper_params(backbone) test_data, dataset_info = data_utils.get_dataset("voc/2007", "test") labels = data_utils.get_labels(dataset_info) labels = ["bg"] + labels hyper_params["total_labels"] = len(labels) img_size = hyper_params["img_size"] data_types = data_utils.get_data_types() data_shapes = data_utils.get_data_shapes() padding_values = data_utils.get_padding_values() if use_custom_images: img_paths = data_utils.get_custom_imgs(custom_image_path) total_items = len(img_paths) test_data = tf.data.Dataset.from_generator( lambda: data_utils.custom_data_generator(img_paths, img_size, img_size ), data_types, data_shapes) else:
print("Tensorflow version: ", tf.__version__) print(physical_devices, "\n") enable_amp() fine = True n_classes = 20 img_height = 512 img_width = 1024 BATCH_SIZE = 12 BUFFER_SIZE = 512 labels = get_labels() trainid2label = {label.trainId: label for label in labels} catid2label = {label.categoryId: label for label in labels} pipeline = CityscapesLoader(img_height=img_height, img_width=img_width, n_classes=n_classes) dataset, info = tfds.load(name='cityscapes/semantic_segmentation', data_dir='/workspace/tensorflow_datasets/', with_info=True, shuffle_files=True) train = dataset['train'].map(pipeline.load_image_train, num_parallel_calls=tf.data.AUTOTUNE) valid = dataset['validation'].map(pipeline.load_image_test,