def _decode_cfg_value(v): """Decodes a raw config value (e.g., from a yaml config files or command line argument) into a Python object. """ # Configs parsed from raw yaml will contain dictionary keys that need to be # converted to AttrDict objects if isinstance(v, dict): return AttrDict(v) # All remaining processing is only applied to strings if not isinstance(v, basestring): return v # Try to interpret `v` as a: # string, number, tuple, list, dict, boolean, or None try: v = literal_eval(v) # The following two excepts allow v to pass through when it represents a # string. # # Longer explanation: # The type of v is always a string (before calling literal_eval), but # sometimes it *represents* a string and other times a data structure, like # a list. In the case that v represents a string, what we got back from the # yaml parser is 'foo' *without quotes* (so, not '"foo"'). literal_eval is # ok with '"foo"', but will raise a ValueError if given 'foo'. In other # cases, like paths (v = 'foo/bar' and not v = '"foo/bar"'), literal_eval # will raise a SyntaxError. except ValueError: pass except SyntaxError: pass return v
def merge_cfg_from_file(cfg_filename, config): """Load a yaml config file and merge it into the global config.""" with open(cfg_filename, 'r') as f: yaml_cfg = AttrDict(yaml.load(f)) _config = _cls2dict(config) _merge_a_into_b(yaml_cfg, _config) _dict2cls(_config, config)
def _cls2dict(config): output = AttrDict() for a in dir(config): value = getattr(config, a) if not a.startswith("__") and not callable(value): assert isinstance(value, AttrDict) output[a] = value return output
from tools.collections import AttrDict __C = AttrDict() cfg = __C __C.net_type = 'mbv2' # mbv2 / res __C.net_config = """[[16, 16], 'mbconv_k3_t1', [], 0, 1]| [[16, 24], 'mbconv_k3_t6', [], 0, 2]| [[24, 48], 'mbconv_k7_t6', ['mbconv_k3_t3'], 1, 2]| [[48, 72], 'mbconv_k5_t6', ['mbconv_k3_t6', 'mbconv_k3_t3'], 2, 2]| [[72, 128], 'mbconv_k3_t6', ['mbconv_k3_t3', 'mbconv_k3_t3'], 2, 1]| [[128, 160], 'mbconv_k3_t6', ['mbconv_k7_t3', 'mbconv_k5_t6', 'mbconv_k7_t3'], 3, 2]| [[160, 176], 'mbconv_k3_t3', ['mbconv_k3_t6', 'mbconv_k3_t6', 'mbconv_k3_t6'], 3, 1]| [[176, 384], 'mbconv_k7_t6', [], 0, 1]| [[384, 1984], 'conv1_1']""" __C.train_params = AttrDict() __C.train_params.epochs = 240 __C.train_params.use_seed = True __C.train_params.seed = 0 __C.optim = AttrDict() __C.optim.init_lr = 0.5 __C.optim.min_lr = 1e-5 __C.optim.lr_schedule = 'cosine' # cosine poly __C.optim.momentum = 0.9 __C.optim.weight_decay = 4e-5 __C.optim.use_grad_clip = False __C.optim.grad_clip = 10 __C.optim.label_smooth = True
from tools.collections import AttrDict __C = AttrDict() cfg = __C __C.net_type = 'mbv2' # mbv2 / res __C.net_config = "" __C.train_params = AttrDict() __C.train_params.epochs = 240 __C.train_params.use_seed = False __C.train_params.seed = 0 __C.optim = AttrDict() __C.optim.init_lr = 0.5 __C.optim.min_lr = 1e-5 __C.optim.lr_schedule = 'cosine' # cosine poly __C.optim.momentum = 0.9 __C.optim.weight_decay = 4e-5 __C.optim.use_grad_clip = False __C.optim.grad_clip = 10 __C.optim.label_smooth = True __C.optim.smooth_alpha = 0.1 __C.optim.if_resume = False __C.optim.resume = AttrDict() __C.optim.resume.load_path = '' __C.optim.resume.load_epoch = 0 __C.data = AttrDict()
class Config(object): # ================================== MODEL = AttrDict() # Path to pretrained imagenet model MODEL.PRETRAIN_IMAGENET_MODEL = os.path.join('datasets/pretrain_model', "resnet50_imagenet.pth") # Path to pretrained weights file MODEL.PRETRAIN_COCO_MODEL = os.path.join('datasets/pretrain_model', 'mask_rcnn_coco.pth') MODEL.INIT_FILE_CHOICE = 'last' # or file (xxx.pth) MODEL.INIT_MODEL = None # set in 'utils.py' MODEL.BACKBONE = 'resnet101' MODEL.BACKBONE_STRIDES = [] MODEL.BACKBONE_SHAPES = [] # ================================== DATASET = AttrDict() # Number of classification classes (including background) DATASET.NUM_CLASSES = 81 DATASET.YEAR = '2014' DATASET.PATH = 'datasets/coco' # ================================== RPN = AttrDict() # Length of square anchor side in pixels RPN.ANCHOR_SCALES = (32, 64, 128, 256, 512) # Ratios of anchors at each cell (width/height) # A value of 1 represents a square anchor, and 0.5 is a wide anchor RPN.ANCHOR_RATIOS = [0.5, 1, 2] # Anchor stride # If 1 then anchors are created for each cell in the backbone feature map. # If 2, then anchors are created for every other cell, and so on (stride=2,3,4...). RPN.ANCHOR_STRIDE = 1 # Non-max suppression threshold to filter RPN proposals. # You can reduce this during training to generate more proposals. RPN.NMS_THRESHOLD = 0.7 # How many anchors per image to use for RPN training RPN.TRAIN_ANCHORS_PER_IMAGE = 256 # ROIs kept after non-maximum suppression for RPN part RPN.PRE_NMS_LIMIT = 6000 RPN.POST_NMS_ROIS_TRAINING = 2000 RPN.POST_NMS_ROIS_INFERENCE = 1000 RPN.TARGET_POS_THRES = .7 RPN.TARGET_NEG_THRES = .3 # ================================== MRCNN = AttrDict() # If enabled, resize instance masks to a smaller size to reduce # memory load. Recommended when using high-resolution images. MRCNN.USE_MINI_MASK = True MRCNN.MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask # Pooled ROIs MRCNN.POOL_SIZE = 7 # cls/bbox stream MRCNN.MASK_POOL_SIZE = 14 # mask stream MRCNN.MASK_SHAPE = [28, 28] # ================================== DATA = AttrDict() # Input image resize # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't # be satisfied together the IMAGE_MAX_DIM is enforced. DATA.IMAGE_MIN_DIM = 800 DATA.IMAGE_MAX_DIM = 1024 # If True, pad images with zeros such that they're (max_dim by max_dim) DATA.IMAGE_PADDING = True # currently, the False option is not supported # Image mean (RGB) DATA.MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) # Maximum number of ground truth instances to use in one image DATA.MAX_GT_INSTANCES = 100 # Bounding box refinement standard deviation for RPN and final detections. DATA.BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) DATA.IMAGE_SHAPE = [] # Quote from "roytseng-tw/Detectron.pytorch": # Number of Python threads to use for the data loader (warning: using too many # threads can cause GIL-based interference with Python Ops leading to *slower* # training; 4 seems to be the sweet spot in our experience) DATA.LOADER_WORKER_NUM = 2 # ================================== ROIS = AttrDict() # Number of ROIs per image to feed to classifier/mask heads # The Mask RCNN paper uses 512 but often the RPN doesn't generate # enough positive proposals to fill this and keep a positive:negative # ratio of 1:3. You can increase the number of proposals by adjusting the RPN NMS threshold. ROIS.TRAIN_ROIS_PER_IMAGE = 200 # Percent of positive ROIs used to train classifier/mask heads ROIS.ROI_POSITIVE_RATIO = 0.33 # Eqn.(1) in FPN paper # useless when DEV.ASSIGN_BOX_ON_ALL_SCALE is True ROIS.ASSIGN_ANCHOR_BASE = 224. ROIS.METHOD = 'roi_align' # or roi_pool # ================================== TEST = AttrDict() TEST.BATCH_SIZE = 0 # set in _set_value() # Max number of final detections TEST.DET_MAX_INSTANCES = 100 # Minimum probability value to accept a detected instance # ROIs below this threshold are skipped TEST.DET_MIN_CONFIDENCE = 0 # Non-maximum suppression threshold for detection TEST.DET_NMS_THRESHOLD = 0.3 TEST.SAVE_IM = False # ================================== TRAIN = AttrDict() TRAIN.BATCH_SIZE = 6 # Learning rate and momentum # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes # weights to explode. Likely due to differences in optimzer implementation. TRAIN.OPTIM_METHOD = 'sgd' TRAIN.INIT_LR = 0.01 TRAIN.MOMENTUM = 0.9 # Weight decay regularization TRAIN.WEIGHT_DECAY = 0.0001 TRAIN.GAMMA = 0.1 TRAIN.LR_POLICY = 'steps_with_decay' TRAIN.END2END = False # in epoch TRAIN.SCHEDULE = [6, 4, 3] TRAIN.LR_WARM_UP = False TRAIN.LR_WP_ITER = 500 TRAIN.LR_WP_FACTOR = 1. / 3. TRAIN.CLIP_GRAD = True TRAIN.MAX_GRAD_NORM = 5.0 # let bn learn and also apply the same weight decay when setting up optimizer TRAIN.BN_LEARN = False # evaluate mAP after each stage TRAIN.DO_VALIDATION = True TRAIN.SAVE_FREQ_WITHIN_EPOCH = 10 TRAIN.FORCE_START_EPOCH = 0 # when you resume training and change the batch size, this is useful # apply OT loss in FPN heads TRAIN.FPN_OT_LOSS = False TRAIN.FPN_OT_LOSS_FAC = 1. # ============================== DEV = AttrDict() DEV.SWITCH = False DEV.INIT_BUFFER_WEIGHT = 'scratch' # currently only support this # set to 1 if use all historic data DEV.BUFFER_SIZE = 1000 # set to <= 0 if trained from the very first iter DEV.EFFECT_AFER_EP_PERCENT = 0. DEV.MULTI_UPSAMPLER = False # does not affect much # if 1, standard conv DEV.UPSAMPLE_FAC = 2. DEV.LOSS_CHOICE = 'l1' DEV.OT_ONE_DIM_FORM = 'conv' # effective if loss_choice is 'ot' DEV.LOSS_FAC = 0.5 # compute meta_los of small boxes on an instance or class level DEV.INST_LOSS = False DEV.FEAT_BRANCH_POOL_SIZE = 14 # ignore regression loss (only for **DEBUG**); # doomed if you use it during deployment DEV.DIS_REG_LOSS = False # assign anchors on all scales and split anchor based on roi-pooling output size # if used, then ROIS.ASSIGN_ANCHOR_BASE is inactivated DEV.ASSIGN_BOX_ON_ALL_SCALE = False # provide a baseline (no meta_loss) to compare DEV.BASELINE = False DEV.BIG_SUPERVISE = False DEV.BIG_LOSS_CHOICE = 'ce' # default setting (currently only support this) DEV.BIG_FC_INIT = 'scratch' # or 'coco_pretrain' DEV.BIG_LOSS_FAC = 1. DEV.BIG_FC_INIT_LIST = dict() DEV.STRUCTURE = 'alpha' # 'beta' DEV.DIS_UPSAMPLER = False DEV.BIG_FEAT_DETACH = True # merge compare_feat output into classifier DEV.CLS_MERGE_FEAT = False DEV.CLS_MERGE_MANNER = 'simple_add' # 'linear_add' DEV.CLS_MERGE_FAC = .5 # ============================== CTRL = AttrDict() CTRL.CONFIG_NAME = '' CTRL.PHASE = '' CTRL.DEBUG = None # train on minival and test also on minival CTRL.QUICK_VERIFY = False CTRL.SHOW_INTERVAL = 50 CTRL.PROFILE_ANALYSIS = False # show time for some pass # ============================== TSNE = AttrDict() TSNE.SKIP_INFERENCE = True # skip the evaluation (compute mAP) TSNE.A_FEW = False TSNE.PERPLEXITY = 30 TSNE.METRIC = 'euclidean' TSNE.N_TOPICS = 2 TSNE.BATCH_SZ = 1024 # 1024 # bigger bs is more sparse TSNE.TOTAL_EP = 150 TSNE.ELLIPSE = True TSNE.SAMPLE_CHOICE = 'set1' # for detailed config, see 'def prepare_data()' in tools/tsne/prepare_data.py TSNE.FIG_FOLDER_SUX = 'debug5' # custom folder name # ============================== MISC = AttrDict() MISC.SEED = 2000 MISC.USE_VISDOM = False MISC.VIS = AttrDict() MISC.VIS.PORT = -1 # must be passed from configs on different servers # the following will be set somewhere else MISC.LOG_FILE = None MISC.DET_RESULT_FILE = None MISC.SAVE_IMAGE_DIR = None MISC.RESULT_FOLDER = None MISC.DEVICE_ID = [] MISC.GPU_COUNT = -1 def display(self, log_file, quiet=False): """Display *final* configuration values.""" print_log("Configurations:", file=log_file, quiet_termi=quiet) for a in dir(self): if not a.startswith("__") and not callable(getattr(self, a)): value = getattr(self, a) if isinstance(value, AttrDict): print_log("{}:".format(a), log_file, quiet_termi=quiet) for _, key in enumerate(value): print_log("\t{:30}\t\t{}".format(key, value[key]), log_file, quiet_termi=quiet) else: print_log("{}\t{}".format(a, value), log_file, quiet_termi=quiet) print_log("\n", log_file, quiet_termi=quiet) def _set_value(self): """Set values of computed attributes. Override all previous settings.""" random.seed(self.MISC.SEED) torch.manual_seed(self.MISC.SEED) if self.CTRL.QUICK_VERIFY: self.CTRL.SHOW_INTERVAL = 5 self.TRAIN.SAVE_FREQ_WITHIN_EPOCH = 2 if self.CTRL.DEBUG: self.CTRL.SHOW_INTERVAL = 1 self.DATA.IMAGE_MIN_DIM = 320 self.DATA.IMAGE_MAX_DIM = 512 self.CTRL.PROFILE_ANALYSIS = False self.TSNE.A_FEW = True # set MISC.RESULT_FOLDER, 'results/base_101/train (or inference)/' self.MISC.RESULT_FOLDER = os.path.join( 'results', self.CTRL.CONFIG_NAME.lower(), self.CTRL.PHASE) if not os.path.exists(self.MISC.RESULT_FOLDER): os.makedirs(self.MISC.RESULT_FOLDER) self.TEST.BATCH_SIZE = 2 * self.TRAIN.BATCH_SIZE # MUST be left **at the end** # The strides of each layer of the FPN Pyramid. if self.MODEL.BACKBONE == 'resnet101': self.MODEL.BACKBONE_STRIDES = [4, 8, 16, 32, 64] else: raise Exception('unknown backbone structure') # Input image size self.DATA.IMAGE_SHAPE = np.array( [self.DATA.IMAGE_MAX_DIM, self.DATA.IMAGE_MAX_DIM, 3]) # Compute backbone size from input image size self.MODEL.BACKBONE_SHAPES = np.array( [[int(math.ceil(self.DATA.IMAGE_SHAPE[0] / stride)), int(math.ceil(self.DATA.IMAGE_SHAPE[1] / stride))] for stride in self.MODEL.BACKBONE_STRIDES]) if self.MISC.USE_VISDOM: if self.CTRL.DEBUG: self.MISC.VIS.PORT = 2042 assert self.MISC.VIS.PORT > 0, 'vis_port not designated!!!' print('\n[visdom is activated] remember to execute ' '**python -m visdom.server -port={:d}** on server (or pc)!\n'.format(self.MISC.VIS.PORT)) self.MISC.VIS.LINE = 100 self.MISC.VIS.TXT = 200 self.MISC.VIS.IMG = 300 self.MISC.VIS.LOSS_LEGEND = [ 'total_loss', 'rpn_cls', 'rpn_bbox', 'mrcnn_cls', 'mrcnn_bbox', 'mrcnn_mask_loss'] if self.DEV.SWITCH and not self.DEV.BASELINE: self.MISC.VIS.LOSS_LEGEND.append('meta_loss') if self.DEV.SWITCH and self.DEV.BIG_SUPERVISE: self.MISC.VIS.LOSS_LEGEND.append('big_loss') if self.TRAIN.FPN_OT_LOSS: self.MISC.VIS.LOSS_LEGEND.append('fpn_ot_loss') if self.MISC.GPU_COUNT == 8: self.DATA.LOADER_WORKER_NUM = 32 elif self.MISC.GPU_COUNT == 4: self.DATA.LOADER_WORKER_NUM = 16 if self.DEV.BIG_FC_INIT == 'coco_pretrain': self.DEV.BIG_FC_INIT_LIST = { # target network vs pretrain network 'dev_roi.big_fc_layer.weight': 'classifier.linear_class.weight', 'dev_roi.big_fc_layer.bias': 'classifier.linear_class.bias', } # TODO (low): add more here; delete some config for brevity if not self.TRAIN.LR_WARM_UP: del self.TRAIN['LR_WP_ITER'] del self.TRAIN['LR_WP_FACTOR'] if not self.DEV.BIG_SUPERVISE: del self.DEV['BIG_LOSS_FAC'] del self.DEV['BIG_FC_INIT'] del self.DEV['BIG_LOSS_CHOICE'] del self.DEV['BIG_FC_INIT_LIST'] if self.DEV.LOSS_CHOICE != 'ot': del self.DEV['OT_ONE_DIM_FORM']
from tools.collections import AttrDict __C = AttrDict() cfg = __C __C.net_config = """[[16, 16], 'mbconv_k3_t1', [], 0, 1]| [[16, 24], 'mbconv_k5_t3', ['mbconv_k5_t3', 'mbconv_k3_t3'], 2, 2]| [[24, 48], 'mbconv_k5_t6', [], 0, 2]| [[48, 80], 'mbconv_k5_t6', ['mbconv_k7_t3', 'mbconv_k5_t3', 'mbconv_k3_t3'], 3, 2]| [[80, 112], 'mbconv_k3_t3', ['mbconv_k3_t3'], 1, 1]| [[112, 160], 'mbconv_k7_t6', ['mbconv_k7_t3', 'mbconv_k7_t3'], 2, 2]| [[160, 352], 'mbconv_k5_t6', ['mbconv_k3_t3'], 1, 1]| [[352, 416], 'mbconv_k3_t3', [], 0, 1]| [[416, 480], 'mbconv_k3_t3', [], 0, 1]""" __C.train_params = AttrDict() __C.train_params.batch_size = 256 __C.train_params.num_workers = 8 __C.optim = AttrDict() __C.optim.last_dim = 1728 __C.optim.init_dim = 16 __C.optim.bn_momentum = 0.1 __C.optim.bn_eps = 0.001 __C.data = AttrDict() __C.data.dataset = 'imagenet' # cifar10 imagenet
from tools.collections import AttrDict __C = AttrDict() cfg = __C __C.net_config = "" __C.train_params = AttrDict() __C.train_params.batch_size = 256 __C.train_params.num_workers = 8 __C.optim = AttrDict() __C.optim.init_dim = 16 __C.optim.bn_momentum = 0.1 __C.optim.bn_eps = 0.001 __C.data = AttrDict() __C.data.dataset = 'imagenet' __C.data.train_data_type = 'lmdb' __C.data.val_data_type = 'img' __C.data.patch_dataset = False __C.data.num_examples = 1281167 __C.data.input_size = (3, 224, 224)
from tools.collections import AttrDict __C = AttrDict() cfg = __C __C.train_params = AttrDict() __C.train_params.epochs = 300 __C.train_params.use_seed = False __C.train_params.seed = 0 __C.optim = AttrDict() __C.optim.init_lr = 0.025 __C.optim.min_lr = 1e-5 __C.optim.lr_schedule = 'cosine' # cosine poly __C.optim.momentum = 0.9 __C.optim.weight_decay = 4e-5 __C.optim.use_grad_clip = False __C.optim.grad_clip = 10 __C.optim.label_smooth = False # False, True __C.optim.smooth_alpha = 0.25 __C.optim.auxiliary = True __C.optim.auxiliary_weight = 0.4 __C.optim.if_resume = False __C.optim.resume = AttrDict() __C.optim.resume.load_path = '' __C.optim.resume.load_epoch = 0
from tools.collections import AttrDict __C = AttrDict() cfg = __C __C.net_type = 'resnet' # choose resnet or mobilenet __C.train_params = AttrDict() __C.train_params.epochs = 100 __C.train_params.use_seed = False __C.train_params.seed = 0 __C.train_params.print_freq = 50 __C.optim = AttrDict() __C.optim.init_lr = 0.1 __C.optim.min_lr = 0 __C.optim.momentum = 0.9 __C.optim.weight_decay = 1e-4 __C.optim.use_grad_clip = False __C.optim.grad_clip = 10 __C.optim.label_smooth = False __C.optim.smooth_alpha = 0.1 __C.optim.if_resume = False __C.optim.resume_path = '' __C.data = AttrDict() __C.data.data_path = 'PATH/to/DataSet' __C.data.num_workers = 32 __C.data.batch_size = 256
from tools.collections import AttrDict __C = AttrDict() cfg = __C __C.net_config = "" __C.data = AttrDict() __C.data.seed = 0 __C.data.num_workers = 16 __C.data.batch_size = 256 __C.data.dataset = 'imagenet' __C.data.train_data_type = 'img' __C.data.val_data_type = 'img' __C.data.patch_dataset = False __C.data.num_examples = 1281167 __C.data.input_size = (3, 224, 224)
from tools.collections import AttrDict __C = AttrDict() search_cfg = __C __C.search_params = AttrDict() __C.search_params.arch_update_epoch = 10 __C.search_params.val_start_epoch = 120 __C.search_params.sample_policy = 'prob' # prob uniform __C.search_params.weight_sample_num = 1 __C.search_params.softmax_temp = 1. __C.search_params.adjoin_connect_nums = [] __C.search_params.net_scale = AttrDict() __C.search_params.net_scale.chs = [] __C.search_params.net_scale.fm_sizes = [] __C.search_params.net_scale.stage = [] __C.search_params.net_scale.num_layers = [] __C.search_params.PRIMITIVES_stack = [ 'mbconv_k3_t3', 'mbconv_k3_t6', 'mbconv_k5_t3', 'mbconv_k5_t6', 'mbconv_k7_t3', 'mbconv_k7_t6', 'skip_connect', ] __C.search_params.PRIMITIVES_head = [ 'mbconv_k3_t3', 'mbconv_k3_t6',
from tools.collections import AttrDict __C = AttrDict() cfg = __C __C.train_params = AttrDict() __C.train_params.epochs = 250 __C.train_params.use_seed = False __C.train_params.seed = 0 __C.optim = AttrDict() __C.optim.init_lr = 0.5 # 0.5, 0.1 __C.optim.min_lr = 1e-5 __C.optim.lr_schedule = 'linear' # cosine poly, linear __C.optim.momentum = 0.9 __C.optim.weight_decay = 3e-5 __C.optim.use_grad_clip = False __C.optim.grad_clip = 5 __C.optim.label_smooth = True __C.optim.smooth_alpha = 0.1 __C.optim.auxiliary = True __C.optim.auxiliary_weight = 0.4 __C.optim.if_resume = False __C.optim.resume = AttrDict() __C.optim.resume.load_path = '' __C.optim.resume.load_epoch = 0