示例#1
0
def _decode_cfg_value(v):
    """Decodes a raw config value (e.g., from a yaml config files or command
    line argument) into a Python object.
    """
    # Configs parsed from raw yaml will contain dictionary keys that need to be
    # converted to AttrDict objects
    if isinstance(v, dict):
        return AttrDict(v)
    # All remaining processing is only applied to strings
    if not isinstance(v, basestring):
        return v
    # Try to interpret `v` as a:
    #   string, number, tuple, list, dict, boolean, or None
    try:
        v = literal_eval(v)
    # The following two excepts allow v to pass through when it represents a
    # string.
    #
    # Longer explanation:
    # The type of v is always a string (before calling literal_eval), but
    # sometimes it *represents* a string and other times a data structure, like
    # a list. In the case that v represents a string, what we got back from the
    # yaml parser is 'foo' *without quotes* (so, not '"foo"'). literal_eval is
    # ok with '"foo"', but will raise a ValueError if given 'foo'. In other
    # cases, like paths (v = 'foo/bar' and not v = '"foo/bar"'), literal_eval
    # will raise a SyntaxError.
    except ValueError:
        pass
    except SyntaxError:
        pass
    return v
示例#2
0
def merge_cfg_from_file(cfg_filename, config):
    """Load a yaml config file and merge it into the global config."""
    with open(cfg_filename, 'r') as f:
        yaml_cfg = AttrDict(yaml.load(f))
    _config = _cls2dict(config)
    _merge_a_into_b(yaml_cfg, _config)
    _dict2cls(_config, config)
示例#3
0
def _cls2dict(config):
    output = AttrDict()
    for a in dir(config):
        value = getattr(config, a)
        if not a.startswith("__") and not callable(value):
            assert isinstance(value, AttrDict)
            output[a] = value
    return output
示例#4
0
from tools.collections import AttrDict

__C = AttrDict()

cfg = __C

__C.net_type = 'mbv2'  # mbv2 / res
__C.net_config = """[[16, 16], 'mbconv_k3_t1', [], 0, 1]|
[[16, 24], 'mbconv_k3_t6', [], 0, 2]|
[[24, 48], 'mbconv_k7_t6', ['mbconv_k3_t3'], 1, 2]|
[[48, 72], 'mbconv_k5_t6', ['mbconv_k3_t6', 'mbconv_k3_t3'], 2, 2]|
[[72, 128], 'mbconv_k3_t6', ['mbconv_k3_t3', 'mbconv_k3_t3'], 2, 1]|
[[128, 160], 'mbconv_k3_t6', ['mbconv_k7_t3', 'mbconv_k5_t6', 'mbconv_k7_t3'], 3, 2]|
[[160, 176], 'mbconv_k3_t3', ['mbconv_k3_t6', 'mbconv_k3_t6', 'mbconv_k3_t6'], 3, 1]|
[[176, 384], 'mbconv_k7_t6', [], 0, 1]|
[[384, 1984], 'conv1_1']"""

__C.train_params = AttrDict()
__C.train_params.epochs = 240
__C.train_params.use_seed = True
__C.train_params.seed = 0

__C.optim = AttrDict()
__C.optim.init_lr = 0.5
__C.optim.min_lr = 1e-5
__C.optim.lr_schedule = 'cosine'  # cosine poly
__C.optim.momentum = 0.9
__C.optim.weight_decay = 4e-5
__C.optim.use_grad_clip = False
__C.optim.grad_clip = 10
__C.optim.label_smooth = True
示例#5
0
from tools.collections import AttrDict

__C = AttrDict()

cfg = __C

__C.net_type = 'mbv2'  # mbv2 / res
__C.net_config = ""

__C.train_params = AttrDict()
__C.train_params.epochs = 240
__C.train_params.use_seed = False
__C.train_params.seed = 0

__C.optim = AttrDict()
__C.optim.init_lr = 0.5
__C.optim.min_lr = 1e-5
__C.optim.lr_schedule = 'cosine'  # cosine poly
__C.optim.momentum = 0.9
__C.optim.weight_decay = 4e-5
__C.optim.use_grad_clip = False
__C.optim.grad_clip = 10
__C.optim.label_smooth = True
__C.optim.smooth_alpha = 0.1

__C.optim.if_resume = False
__C.optim.resume = AttrDict()
__C.optim.resume.load_path = ''
__C.optim.resume.load_epoch = 0

__C.data = AttrDict()
示例#6
0
class Config(object):
    # ==================================
    MODEL = AttrDict()
    # Path to pretrained imagenet model
    MODEL.PRETRAIN_IMAGENET_MODEL = os.path.join('datasets/pretrain_model', "resnet50_imagenet.pth")
    # Path to pretrained weights file
    MODEL.PRETRAIN_COCO_MODEL = os.path.join('datasets/pretrain_model', 'mask_rcnn_coco.pth')
    MODEL.INIT_FILE_CHOICE = 'last'  # or file (xxx.pth)
    MODEL.INIT_MODEL = None   # set in 'utils.py'
    MODEL.BACKBONE = 'resnet101'
    MODEL.BACKBONE_STRIDES = []
    MODEL.BACKBONE_SHAPES = []

    # ==================================
    DATASET = AttrDict()
    # Number of classification classes (including background)
    DATASET.NUM_CLASSES = 81
    DATASET.YEAR = '2014'
    DATASET.PATH = 'datasets/coco'

    # ==================================
    RPN = AttrDict()
    # Length of square anchor side in pixels
    RPN.ANCHOR_SCALES = (32, 64, 128, 256, 512)

    # Ratios of anchors at each cell (width/height)
    # A value of 1 represents a square anchor, and 0.5 is a wide anchor
    RPN.ANCHOR_RATIOS = [0.5, 1, 2]

    # Anchor stride
    # If 1 then anchors are created for each cell in the backbone feature map.
    # If 2, then anchors are created for every other cell, and so on (stride=2,3,4...).
    RPN.ANCHOR_STRIDE = 1

    # Non-max suppression threshold to filter RPN proposals.
    # You can reduce this during training to generate more proposals.
    RPN.NMS_THRESHOLD = 0.7

    # How many anchors per image to use for RPN training
    RPN.TRAIN_ANCHORS_PER_IMAGE = 256

    # ROIs kept after non-maximum suppression for RPN part
    RPN.PRE_NMS_LIMIT = 6000
    RPN.POST_NMS_ROIS_TRAINING = 2000
    RPN.POST_NMS_ROIS_INFERENCE = 1000

    RPN.TARGET_POS_THRES = .7
    RPN.TARGET_NEG_THRES = .3

    # ==================================
    MRCNN = AttrDict()
    # If enabled, resize instance masks to a smaller size to reduce
    # memory load. Recommended when using high-resolution images.
    MRCNN.USE_MINI_MASK = True
    MRCNN.MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
    # Pooled ROIs
    MRCNN.POOL_SIZE = 7         # cls/bbox stream
    MRCNN.MASK_POOL_SIZE = 14   # mask stream
    MRCNN.MASK_SHAPE = [28, 28]

    # ==================================
    DATA = AttrDict()
    # Input image resize
    # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
    # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
    # be satisfied together the IMAGE_MAX_DIM is enforced.
    DATA.IMAGE_MIN_DIM = 800
    DATA.IMAGE_MAX_DIM = 1024
    # If True, pad images with zeros such that they're (max_dim by max_dim)
    DATA.IMAGE_PADDING = True  # currently, the False option is not supported

    # Image mean (RGB)
    DATA.MEAN_PIXEL = np.array([123.7, 116.8, 103.9])

    # Maximum number of ground truth instances to use in one image
    DATA.MAX_GT_INSTANCES = 100

    # Bounding box refinement standard deviation for RPN and final detections.
    DATA.BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
    DATA.IMAGE_SHAPE = []
    # Quote from "roytseng-tw/Detectron.pytorch":
    # Number of Python threads to use for the data loader (warning: using too many
    # threads can cause GIL-based interference with Python Ops leading to *slower*
    # training; 4 seems to be the sweet spot in our experience)
    DATA.LOADER_WORKER_NUM = 2

    # ==================================
    ROIS = AttrDict()
    # Number of ROIs per image to feed to classifier/mask heads
    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
    # enough positive proposals to fill this and keep a positive:negative
    # ratio of 1:3. You can increase the number of proposals by adjusting the RPN NMS threshold.
    ROIS.TRAIN_ROIS_PER_IMAGE = 200

    # Percent of positive ROIs used to train classifier/mask heads
    ROIS.ROI_POSITIVE_RATIO = 0.33
    # Eqn.(1) in FPN paper
    # useless when DEV.ASSIGN_BOX_ON_ALL_SCALE is True
    ROIS.ASSIGN_ANCHOR_BASE = 224.
    ROIS.METHOD = 'roi_align'  # or roi_pool

    # ==================================
    TEST = AttrDict()
    TEST.BATCH_SIZE = 0   # set in _set_value()
    # Max number of final detections
    TEST.DET_MAX_INSTANCES = 100
    # Minimum probability value to accept a detected instance
    # ROIs below this threshold are skipped
    TEST.DET_MIN_CONFIDENCE = 0
    # Non-maximum suppression threshold for detection
    TEST.DET_NMS_THRESHOLD = 0.3
    TEST.SAVE_IM = False

    # ==================================
    TRAIN = AttrDict()
    TRAIN.BATCH_SIZE = 6
    # Learning rate and momentum
    # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
    # weights to explode. Likely due to differences in optimzer implementation.
    TRAIN.OPTIM_METHOD = 'sgd'
    TRAIN.INIT_LR = 0.01
    TRAIN.MOMENTUM = 0.9
    # Weight decay regularization
    TRAIN.WEIGHT_DECAY = 0.0001
    TRAIN.GAMMA = 0.1
    TRAIN.LR_POLICY = 'steps_with_decay'
    TRAIN.END2END = False
    # in epoch
    TRAIN.SCHEDULE = [6, 4, 3]
    TRAIN.LR_WARM_UP = False
    TRAIN.LR_WP_ITER = 500
    TRAIN.LR_WP_FACTOR = 1. / 3.

    TRAIN.CLIP_GRAD = True
    TRAIN.MAX_GRAD_NORM = 5.0

    # let bn learn and also apply the same weight decay when setting up optimizer
    TRAIN.BN_LEARN = False

    # evaluate mAP after each stage
    TRAIN.DO_VALIDATION = True
    TRAIN.SAVE_FREQ_WITHIN_EPOCH = 10
    TRAIN.FORCE_START_EPOCH = 0   # when you resume training and change the batch size, this is useful
    # apply OT loss in FPN heads
    TRAIN.FPN_OT_LOSS = False
    TRAIN.FPN_OT_LOSS_FAC = 1.

    # ==============================
    DEV = AttrDict()
    DEV.SWITCH = False
    DEV.INIT_BUFFER_WEIGHT = 'scratch'    # currently only support this
    # set to 1 if use all historic data
    DEV.BUFFER_SIZE = 1000
    # set to <= 0 if trained from the very first iter
    DEV.EFFECT_AFER_EP_PERCENT = 0.

    DEV.MULTI_UPSAMPLER = False   # does not affect much
    # if 1, standard conv
    DEV.UPSAMPLE_FAC = 2.

    DEV.LOSS_CHOICE = 'l1'
    DEV.OT_ONE_DIM_FORM = 'conv'   # effective if loss_choice is 'ot'
    DEV.LOSS_FAC = 0.5
    # compute meta_los of small boxes on an instance or class level
    DEV.INST_LOSS = False

    DEV.FEAT_BRANCH_POOL_SIZE = 14
    # ignore regression loss (only for **DEBUG**);
    # doomed if you use it during deployment
    DEV.DIS_REG_LOSS = False

    # assign anchors on all scales and split anchor based on roi-pooling output size
    # if used, then ROIS.ASSIGN_ANCHOR_BASE is inactivated
    DEV.ASSIGN_BOX_ON_ALL_SCALE = False
    # provide a baseline (no meta_loss) to compare
    DEV.BASELINE = False

    DEV.BIG_SUPERVISE = False
    DEV.BIG_LOSS_CHOICE = 'ce'      # default setting (currently only support this)
    DEV.BIG_FC_INIT = 'scratch'     # or 'coco_pretrain'
    DEV.BIG_LOSS_FAC = 1.
    DEV.BIG_FC_INIT_LIST = dict()

    DEV.STRUCTURE = 'alpha'   # 'beta'
    DEV.DIS_UPSAMPLER = False
    DEV.BIG_FEAT_DETACH = True
    # merge compare_feat output into classifier
    DEV.CLS_MERGE_FEAT = False
    DEV.CLS_MERGE_MANNER = 'simple_add'   # 'linear_add'
    DEV.CLS_MERGE_FAC = .5

    # ==============================
    CTRL = AttrDict()
    CTRL.CONFIG_NAME = ''
    CTRL.PHASE = ''
    CTRL.DEBUG = None
    # train on minival and test also on minival
    CTRL.QUICK_VERIFY = False

    CTRL.SHOW_INTERVAL = 50
    CTRL.PROFILE_ANALYSIS = False  # show time for some pass

    # ==============================
    TSNE = AttrDict()
    TSNE.SKIP_INFERENCE = True    # skip the evaluation (compute mAP)
    TSNE.A_FEW = False
    TSNE.PERPLEXITY = 30
    TSNE.METRIC = 'euclidean'
    TSNE.N_TOPICS = 2
    TSNE.BATCH_SZ = 1024     # 1024    # bigger bs is more sparse
    TSNE.TOTAL_EP = 150
    TSNE.ELLIPSE = True
    TSNE.SAMPLE_CHOICE = 'set1'   # for detailed config, see 'def prepare_data()' in tools/tsne/prepare_data.py
    TSNE.FIG_FOLDER_SUX = 'debug5'   # custom folder name

    # ==============================
    MISC = AttrDict()
    MISC.SEED = 2000
    MISC.USE_VISDOM = False
    MISC.VIS = AttrDict()
    MISC.VIS.PORT = -1  # must be passed from configs on different servers
    # the following will be set somewhere else
    MISC.LOG_FILE = None
    MISC.DET_RESULT_FILE = None
    MISC.SAVE_IMAGE_DIR = None
    MISC.RESULT_FOLDER = None
    MISC.DEVICE_ID = []
    MISC.GPU_COUNT = -1

    def display(self, log_file, quiet=False):
        """Display *final* configuration values."""
        print_log("Configurations:", file=log_file, quiet_termi=quiet)
        for a in dir(self):
            if not a.startswith("__") and not callable(getattr(self, a)):
                value = getattr(self, a)
                if isinstance(value, AttrDict):
                    print_log("{}:".format(a), log_file, quiet_termi=quiet)
                    for _, key in enumerate(value):
                        print_log("\t{:30}\t\t{}".format(key, value[key]), log_file, quiet_termi=quiet)
                else:
                    print_log("{}\t{}".format(a, value), log_file, quiet_termi=quiet)
        print_log("\n", log_file, quiet_termi=quiet)

    def _set_value(self):
        """Set values of computed attributes. Override all previous settings."""

        random.seed(self.MISC.SEED)
        torch.manual_seed(self.MISC.SEED)

        if self.CTRL.QUICK_VERIFY:
            self.CTRL.SHOW_INTERVAL = 5
            self.TRAIN.SAVE_FREQ_WITHIN_EPOCH = 2

        if self.CTRL.DEBUG:
            self.CTRL.SHOW_INTERVAL = 1
            self.DATA.IMAGE_MIN_DIM = 320
            self.DATA.IMAGE_MAX_DIM = 512
            self.CTRL.PROFILE_ANALYSIS = False
            self.TSNE.A_FEW = True

        # set MISC.RESULT_FOLDER, 'results/base_101/train (or inference)/'
        self.MISC.RESULT_FOLDER = os.path.join(
            'results', self.CTRL.CONFIG_NAME.lower(), self.CTRL.PHASE)
        if not os.path.exists(self.MISC.RESULT_FOLDER):
            os.makedirs(self.MISC.RESULT_FOLDER)

        self.TEST.BATCH_SIZE = 2 * self.TRAIN.BATCH_SIZE

        # MUST be left **at the end**
        # The strides of each layer of the FPN Pyramid.
        if self.MODEL.BACKBONE == 'resnet101':
            self.MODEL.BACKBONE_STRIDES = [4, 8, 16, 32, 64]
        else:
            raise Exception('unknown backbone structure')

        # Input image size
        self.DATA.IMAGE_SHAPE = np.array(
            [self.DATA.IMAGE_MAX_DIM, self.DATA.IMAGE_MAX_DIM, 3])

        # Compute backbone size from input image size
        self.MODEL.BACKBONE_SHAPES = np.array(
            [[int(math.ceil(self.DATA.IMAGE_SHAPE[0] / stride)),
              int(math.ceil(self.DATA.IMAGE_SHAPE[1] / stride))]
             for stride in self.MODEL.BACKBONE_STRIDES])

        if self.MISC.USE_VISDOM:
            if self.CTRL.DEBUG:
                self.MISC.VIS.PORT = 2042

            assert self.MISC.VIS.PORT > 0, 'vis_port not designated!!!'

            print('\n[visdom is activated] remember to execute '
                  '**python -m visdom.server -port={:d}** on server (or pc)!\n'.format(self.MISC.VIS.PORT))
            self.MISC.VIS.LINE = 100
            self.MISC.VIS.TXT = 200
            self.MISC.VIS.IMG = 300
            self.MISC.VIS.LOSS_LEGEND = [
                'total_loss', 'rpn_cls', 'rpn_bbox',
                'mrcnn_cls', 'mrcnn_bbox', 'mrcnn_mask_loss']
            if self.DEV.SWITCH and not self.DEV.BASELINE:
                self.MISC.VIS.LOSS_LEGEND.append('meta_loss')
            if self.DEV.SWITCH and self.DEV.BIG_SUPERVISE:
                self.MISC.VIS.LOSS_LEGEND.append('big_loss')
            if self.TRAIN.FPN_OT_LOSS:
                self.MISC.VIS.LOSS_LEGEND.append('fpn_ot_loss')

        if self.MISC.GPU_COUNT == 8:
            self.DATA.LOADER_WORKER_NUM = 32
        elif self.MISC.GPU_COUNT == 4:
            self.DATA.LOADER_WORKER_NUM = 16

        if self.DEV.BIG_FC_INIT == 'coco_pretrain':
            self.DEV.BIG_FC_INIT_LIST = {
                # target network vs pretrain network
                'dev_roi.big_fc_layer.weight': 'classifier.linear_class.weight',
                'dev_roi.big_fc_layer.bias': 'classifier.linear_class.bias',
            }
        # TODO (low): add more here; delete some config for brevity
        if not self.TRAIN.LR_WARM_UP:
            del self.TRAIN['LR_WP_ITER']
            del self.TRAIN['LR_WP_FACTOR']
        if not self.DEV.BIG_SUPERVISE:
            del self.DEV['BIG_LOSS_FAC']
            del self.DEV['BIG_FC_INIT']
            del self.DEV['BIG_LOSS_CHOICE']
            del self.DEV['BIG_FC_INIT_LIST']
        if self.DEV.LOSS_CHOICE != 'ot':
            del self.DEV['OT_ONE_DIM_FORM']
示例#7
0
from tools.collections import AttrDict

__C = AttrDict()

cfg = __C

__C.net_config = """[[16, 16], 'mbconv_k3_t1', [], 0, 1]|
[[16, 24], 'mbconv_k5_t3', ['mbconv_k5_t3', 'mbconv_k3_t3'], 2, 2]|
[[24, 48], 'mbconv_k5_t6', [], 0, 2]|
[[48, 80], 'mbconv_k5_t6', ['mbconv_k7_t3', 'mbconv_k5_t3', 'mbconv_k3_t3'], 3, 2]|
[[80, 112], 'mbconv_k3_t3', ['mbconv_k3_t3'], 1, 1]|
[[112, 160], 'mbconv_k7_t6', ['mbconv_k7_t3', 'mbconv_k7_t3'], 2, 2]|
[[160, 352], 'mbconv_k5_t6', ['mbconv_k3_t3'], 1, 1]|
[[352, 416], 'mbconv_k3_t3', [], 0, 1]|
[[416, 480], 'mbconv_k3_t3', [], 0, 1]"""

__C.train_params = AttrDict()

__C.train_params.batch_size = 256
__C.train_params.num_workers = 8

__C.optim = AttrDict()

__C.optim.last_dim = 1728
__C.optim.init_dim = 16
__C.optim.bn_momentum = 0.1
__C.optim.bn_eps = 0.001

__C.data = AttrDict()

__C.data.dataset = 'imagenet'  # cifar10 imagenet
示例#8
0
from tools.collections import AttrDict

__C = AttrDict()

cfg = __C

__C.net_config = ""

__C.train_params = AttrDict()

__C.train_params.batch_size = 256
__C.train_params.num_workers = 8

__C.optim = AttrDict()

__C.optim.init_dim = 16
__C.optim.bn_momentum = 0.1
__C.optim.bn_eps = 0.001

__C.data = AttrDict()

__C.data.dataset = 'imagenet'
__C.data.train_data_type = 'lmdb'
__C.data.val_data_type = 'img'
__C.data.patch_dataset = False
__C.data.num_examples = 1281167
__C.data.input_size = (3, 224, 224)
from tools.collections import AttrDict

__C = AttrDict()

cfg = __C

__C.train_params = AttrDict()
__C.train_params.epochs = 300
__C.train_params.use_seed = False
__C.train_params.seed = 0

__C.optim = AttrDict()
__C.optim.init_lr = 0.025
__C.optim.min_lr = 1e-5
__C.optim.lr_schedule = 'cosine'  # cosine poly
__C.optim.momentum = 0.9
__C.optim.weight_decay = 4e-5
__C.optim.use_grad_clip = False
__C.optim.grad_clip = 10


__C.optim.label_smooth = False   # False, True
__C.optim.smooth_alpha = 0.25

__C.optim.auxiliary = True
__C.optim.auxiliary_weight = 0.4

__C.optim.if_resume = False
__C.optim.resume = AttrDict()
__C.optim.resume.load_path = ''
__C.optim.resume.load_epoch = 0
from tools.collections import AttrDict

__C = AttrDict()

cfg = __C

__C.net_type = 'resnet'  # choose resnet or mobilenet

__C.train_params = AttrDict()
__C.train_params.epochs = 100
__C.train_params.use_seed = False
__C.train_params.seed = 0
__C.train_params.print_freq = 50

__C.optim = AttrDict()
__C.optim.init_lr = 0.1
__C.optim.min_lr = 0
__C.optim.momentum = 0.9
__C.optim.weight_decay = 1e-4
__C.optim.use_grad_clip = False
__C.optim.grad_clip = 10
__C.optim.label_smooth = False
__C.optim.smooth_alpha = 0.1

__C.optim.if_resume = False
__C.optim.resume_path = ''

__C.data = AttrDict()
__C.data.data_path = 'PATH/to/DataSet'
__C.data.num_workers = 32
__C.data.batch_size = 256
from tools.collections import AttrDict

__C = AttrDict()

cfg = __C

__C.net_config = ""

__C.data = AttrDict()
__C.data.seed = 0
__C.data.num_workers = 16
__C.data.batch_size = 256
__C.data.dataset = 'imagenet'
__C.data.train_data_type = 'img'
__C.data.val_data_type = 'img'
__C.data.patch_dataset = False
__C.data.num_examples = 1281167
__C.data.input_size = (3, 224, 224)
示例#12
0
from tools.collections import AttrDict

__C = AttrDict()
search_cfg = __C

__C.search_params = AttrDict()
__C.search_params.arch_update_epoch = 10
__C.search_params.val_start_epoch = 120
__C.search_params.sample_policy = 'prob'  # prob uniform
__C.search_params.weight_sample_num = 1
__C.search_params.softmax_temp = 1.

__C.search_params.adjoin_connect_nums = []
__C.search_params.net_scale = AttrDict()
__C.search_params.net_scale.chs = []
__C.search_params.net_scale.fm_sizes = []
__C.search_params.net_scale.stage = []
__C.search_params.net_scale.num_layers = []

__C.search_params.PRIMITIVES_stack = [
    'mbconv_k3_t3',
    'mbconv_k3_t6',
    'mbconv_k5_t3',
    'mbconv_k5_t6',
    'mbconv_k7_t3',
    'mbconv_k7_t6',
    'skip_connect',
]
__C.search_params.PRIMITIVES_head = [
    'mbconv_k3_t3',
    'mbconv_k3_t6',
from tools.collections import AttrDict

__C = AttrDict()

cfg = __C

__C.train_params = AttrDict()
__C.train_params.epochs = 250
__C.train_params.use_seed = False
__C.train_params.seed = 0

__C.optim = AttrDict()
__C.optim.init_lr = 0.5     # 0.5, 0.1
__C.optim.min_lr = 1e-5
__C.optim.lr_schedule = 'linear'  # cosine poly, linear

__C.optim.momentum = 0.9
__C.optim.weight_decay = 3e-5
__C.optim.use_grad_clip = False
__C.optim.grad_clip = 5

__C.optim.label_smooth = True
__C.optim.smooth_alpha = 0.1

__C.optim.auxiliary = True
__C.optim.auxiliary_weight = 0.4

__C.optim.if_resume = False
__C.optim.resume = AttrDict()
__C.optim.resume.load_path = ''
__C.optim.resume.load_epoch = 0