def __init__(self, observation_space: int, action_space: int): super().__init__(observation_space, action_space) self.name = 'DQNAgent' self.summary_checkpoint = 1000 self.target_update_steps = 2500 self.input_size = 64 self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") print('Utilizing device {}'.format(self.device)) self.policy = DQNLSTM(self.input_size, action_space).to(self.device) self.target = DQNLSTM(self.input_size, action_space).to(self.device) self.target.load_state_dict(self.target.state_dict()) self.target.eval() self.optimizer = Adam(self.policy.parameters(), lr=0.00025) self.loss = SmoothL1Loss() self.replay_buffer = ReplayBuffer(buffer_size=50000, batch_size=32) self.steps_done = 0 self.eps_start = 0.99 self.eps_end = 0.05 self.eps_decay = (self.eps_start - self.eps_end) / 200000 self.eps_threshold = self.eps_start
def create_loss(name, weight, ignore_index=None, pos_weight=None): if name == 'BCEWithLogitsLoss': return nn.BCEWithLogitsLoss(pos_weight=pos_weight) elif name == 'BCEDiceLoss': return BCEDiceLoss(alpha=1, beta=1) elif name == 'CrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index) elif name == 'WeightedCrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return WeightedCrossEntropyLoss(ignore_index=ignore_index) elif name == 'PixelWiseCrossEntropyLoss': return PixelWiseCrossEntropyLoss(class_weights=weight, ignore_index=ignore_index) elif name == 'GeneralizedDiceLoss': return GeneralizedDiceLoss(sigmoid_normalization=False) elif name == 'DiceLoss': return DiceLoss(weight=weight, sigmoid_normalization=False) elif name == 'TagsAngularLoss': return TagsAngularLoss() elif name == 'MSELoss': return MSELoss() elif name == 'SmoothL1Loss': return SmoothL1Loss() elif name == 'L1Loss': return L1Loss() elif name == 'WeightedSmoothL1Loss': return WeightedSmoothL1Loss() else: raise RuntimeError( f"Unsupported loss function: '{name}'. Supported losses: {SUPPORTED_LOSSES}" )
def __init__(self, hparams: AttributeDict): super(LitModelLongitudinal, self).__init__() self.hparams = hparams self.model = UNet( in_channels=hparams.in_channels, out_classes=1, dimensions=3, padding_mode="zeros", activation=hparams.activation, conv_num_in_layer=[1, 2, 3, 3, 3], residual=False, out_channels_first_layer=16, kernel_size=5, normalization=hparams.normalization, downsampling_type="max", use_sigmoid=False, use_bias=True, ) self.sigmoid = Sigmoid() if self.hparams.loss == "l2": self.criterion = MSELoss() elif self.hparams.loss == "l1": self.criterion = L1Loss() elif self.hparams.loss == "smoothl1": self.criterion = SmoothL1Loss() self.train_log_step = random.randint(1, 500) self.val_log_step = random.randint(1, 100) self.clip_min = self.hparams.clip_min self.clip_max = self.hparams.clip_max
def __init__(self, env, mode, pre_trained_model, tensorboard_writer=None): super(DQNAgent, self).__init__(env, mode, tensorboard_writer) self.agent_name = 'DQN' + str(self.agent_no) self.memory = ReplayMemory() self.network = DeepQNetwork(self.obs_space[0], self.action_space) if self.mode == 'play': self.network.load_params(pre_trained_model) self.network.eval() elif self.mode == 'train': self.eval_network = DeepQNetwork(self.obs_space[0], self.action_space) self.eval_network.eval() if pre_trained_model: self.eval_network.load_params(pre_trained_model) self.optimizer = optim.RMSprop(self.network.parameters(), lr=LR) self.loss_func = SmoothL1Loss() else: raise ValueError( 'Please set a valid mode for the agent (play or train)')
def get_loss_criterion(config): """ Returns the loss function based on provided configuration :param config: (dict) a top level configuration object containing the 'loss' key :return: an instance of the loss function """ assert 'loss' in config, 'Could not find loss function configuration' loss_config = config['loss'] name = loss_config['name'] ignore_index = loss_config.get('ignore_index', None) weight = loss_config.get('weight', None) if weight is not None: # convert to cuda tensor if necessary weight = torch.tensor(weight).to(config['device']) if name == 'BCEWithLogitsLoss': skip_last_target = loss_config.get('skip_last_target', False) if ignore_index is None and not skip_last_target: return nn.BCEWithLogitsLoss() else: return BCELossWrapper(nn.BCEWithLogitsLoss(), ignore_index=ignore_index, skip_last_target=skip_last_target) elif name == 'CrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index) elif name == 'WeightedCrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return WeightedCrossEntropyLoss(weight=weight, ignore_index=ignore_index) elif name == 'PixelWiseCrossEntropyLoss': return PixelWiseCrossEntropyLoss(class_weights=weight, ignore_index=ignore_index) elif name == 'GeneralizedDiceLoss': return GeneralizedDiceLoss(weight=weight, ignore_index=ignore_index) elif name == 'DiceLoss': sigmoid_normalization = loss_config.get('sigmoid_normalization', True) skip_last_target = loss_config.get('skip_last_target', False) return DiceLoss(weight=weight, ignore_index=ignore_index, sigmoid_normalization=sigmoid_normalization, skip_last_target=skip_last_target) elif name == 'TagsAngularLoss': tags_coefficients = loss_config['tags_coefficients'] return TagsAngularLoss(tags_coefficients) elif name == 'MSEWithLogitsLoss': return MSEWithLogitsLoss() elif name == 'MSELoss': return MSELoss() elif name == 'SmoothL1Loss': return SmoothL1Loss() elif name == 'L1Loss': return L1Loss() elif name == 'ContrastiveLoss': return ContrastiveLoss(loss_config['delta_var'], loss_config['delta_dist'], loss_config['norm'], loss_config['alpha'], loss_config['beta'], loss_config['gamma']) elif name == 'WeightedSmoothL1Loss': return WeightedSmoothL1Loss(threshold=loss_config['threshold'], initial_weight=loss_config['initial_weight'], apply_below_threshold=loss_config.get('apply_below_threshold', True)) else: raise RuntimeError(f"Unsupported loss function: '{name}'. Supported losses: {SUPPORTED_LOSSES}")
def get_loss_criterion(config): """ Returns the loss function based on provided configuration :param config: (dict) a top level configuration object containing the 'loss' key :return: an instance of the loss function """ assert 'loss' in config, 'Could not find loss function configuration' loss_config = config['loss'] name = loss_config['name'] ignore_index = loss_config.get('ignore_index', None) weight = loss_config.get('weight', None) if weight is not None: # convert to cuda tensor if necessary weight = torch.tensor(weight).to(config['device']) if name == 'BCEWithLogitsLoss': skip_last_target = loss_config.get('skip_last_target', False) if ignore_index is None and not skip_last_target: return nn.BCEWithLogitsLoss() else: return BCELossWrapper(nn.BCEWithLogitsLoss(), ignore_index=ignore_index, skip_last_target=skip_last_target) elif name == 'CrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index) elif name == 'WeightedCrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return WeightedCrossEntropyLoss(weight=weight, ignore_index=ignore_index) elif name == 'PixelWiseCrossEntropyLoss': return PixelWiseCrossEntropyLoss(class_weights=weight, ignore_index=ignore_index) elif name == 'GeneralizedDiceLoss': return GeneralizedDiceLoss(weight=weight, ignore_index=ignore_index) elif name == 'DiceLoss': sigmoid_normalization = loss_config.get('sigmoid_normalization', True) skip_last_target = loss_config.get('skip_last_target', False) return DiceLoss(weight=weight, ignore_index=ignore_index, sigmoid_normalization=sigmoid_normalization, skip_last_target=skip_last_target) elif name == 'TagsAngularLoss': tags_coefficients = loss_config['tags_coefficients'] return TagsAngularLoss(tags_coefficients) elif name == 'MSEWithLogitsLoss': return MSEWithLogitsLoss() elif name == 'MSELoss': return MSELoss() elif name == 'SmoothL1Loss': return SmoothL1Loss() elif name == 'L1Loss': return L1Loss() else: return None
def __init__(self, config): super().__init__(config) self.do_voken_cls = config.do_voken_cls self.do_voken_reg = config.do_voken_reg self.do_voken_ctr = config.do_voken_ctr self.shared_head = config.shared_head self.verbose = config.verbose if self.verbose: print( f"Model: do voken cls -- {self.do_voken_cls}, do_voken_reg -- {self.do_voken_reg}," f" do voken ctr -- {self.do_voken_ctr}") self.token_cls_loss_fct = CrossEntropyLoss() if self.shared_head: if self.verbose: print( "Model: Using shared head for Voken and Token predictions." ) self.cls = BertSharedHead(config) # Reinit the weight of the new head. self.init_weights() else: # Voken Classification if config.do_voken_cls: self.visual_cls_head = BertVLMClassificationHead(config) # Voken Regression if config.do_voken_reg: assert config.voken_dim is not None, "you need to set voken dim in the config." self.visual_reg_head = BertVLMRegressionHead(config) # Voken Constrastive if config.do_voken_ctr: assert config.voken_dim is not None, "you need to set voken dim in the config." self.visual_ctr_head = BertVLMContrastiveHeadNew(config) # Build voken features embeddings if needed. if self.do_voken_ctr or self.do_voken_reg: # The voken emb will be preloaded by func "init_voken_feat_emb" self.voken_feat_emb = nn.Embedding(config.voken_size, config.voken_dim) # Freeze this embedding for p in self.voken_feat_emb.parameters(): p.requires_grad = False # Build Loss functions if config.do_voken_cls: # Voken Classification self.voken_cls_loss_fct = CrossEntropyLoss() if config.do_voken_reg: # Voken Regression self.voken_reg_loss_fct = SmoothL1Loss(reduction='none') # self.voken_reg_loss_fct = torch.nn.L1Loss(reduction='none') if config.do_voken_ctr: # Voken Constrastive self.voken_ctr_loss_fct = CrossEntropyLoss()
def _create_loss(name, loss_config, weight, ignore_index, pos_weight): if name == 'BCEWithLogitsLoss': return nn.BCEWithLogitsLoss(pos_weight=pos_weight) elif name == 'BCEDiceLoss': alpha = loss_config.get('alphs', 1.) beta = loss_config.get('beta', 1.) return BCEDiceLoss(alpha, beta) elif name == 'CrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index) elif name == 'WeightedCrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return WeightedCrossEntropyLoss(ignore_index=ignore_index) elif name == 'PixelWiseCrossEntropyLoss': return PixelWiseCrossEntropyLoss(class_weights=weight, ignore_index=ignore_index) elif name == 'GeneralizedDiceLoss': sigmoid_normalization = loss_config.get('sigmoid_normalization', True) return GeneralizedDiceLoss(sigmoid_normalization=sigmoid_normalization) elif name == 'DiceLoss': sigmoid_normalization = loss_config.get('sigmoid_normalization', True) return DiceLoss(weight=weight, sigmoid_normalization=sigmoid_normalization) elif name == 'GaussianDiceLoss': sigmoid_normalization = loss_config.get('sigmoid_normalization', True) return GaussianDiceLoss(weight=weight, sigmoid_normalization=sigmoid_normalization) elif name == 'TemporalDiceLoss': sigmoid_normalization = loss_config.get('sigmoid_normalization', True) return TemporalDiceLoss(weight=weight, sigmoid_normalization=sigmoid_normalization) elif name == 'TagsAngularLoss': tags_coefficients = loss_config['tags_coefficients'] return TagsAngularLoss(tags_coefficients) elif name == 'MSELoss': return MSELoss() elif name == 'SmoothL1Loss': return SmoothL1Loss() elif name == 'L1Loss': return L1Loss() elif name == 'ContrastiveLoss': return ContrastiveLoss(loss_config['delta_var'], loss_config['delta_dist'], loss_config['norm'], loss_config['alpha'], loss_config['beta'], loss_config['gamma']) elif name == 'WeightedSmoothL1Loss': return WeightedSmoothL1Loss( threshold=loss_config['threshold'], initial_weight=loss_config['initial_weight'], apply_below_threshold=loss_config.get('apply_below_threshold', True)) else: raise RuntimeError( f"Unsupported loss function: '{name}'. Supported losses: {SUPPORTED_LOSSES}" )
def __init__( self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary, dropout: float = 0.5, n_linear_layers=1, ) -> None: """ :param word_embeddings: the embeddings to start with :param encoder: the seq2seq transformer of embeddings can be LSTM for example :param vocab: dataset input and output vocabulary """ super(BaseTextClassifier, self).__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder # Representations this is the layer that is just above the last layer and the non linearity (hidden[-1]) # is is used to calculate FID score, and similar metrics that's why we expose it into self.representations # class attribute self.representations = self.encoder if n_linear_layers > 0: extra_hiddens = [] for k in range(n_linear_layers): extra_hiddens += [ nn.Linear(self.encoder.get_output_dim(), self.encoder.get_output_dim()), nn.ReLU(True) ] self.extra_hiddens = nn.Sequential(*extra_hiddens) else: self.extra_hiddens = None self.hidden2label = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) # self.accuracy = CategoricalAccuracy() self.criterion = CrossEntropyLoss() self.metrics = { "accuracy": CategoricalAccuracy(), "hinge-loss": Loss(HingeEmbeddingLoss()), "huber-loss": Loss(SmoothL1Loss()), "cross-entropy-loss": Loss(CrossEntropyLoss()), "confidence": Confidence() } self.dropout = nn.Dropout(dropout)
def __init__(self, actions): self.model = DroneQNet(2, IMG_W, IMG_H, len(actions)) self.model.double() self.target_model = DroneQNet(2, IMG_W, IMG_H, len(actions)) self.target_model.double() self.criterion = SmoothL1Loss() self.optimizer = Adam(self.model.parameters(), lr=0.001) self.gamma = 0.8 self.train_iterations = 0
def __init__(self, hparam): super(Stage1, self).__init__() self.backbone = BackBone() self.bbox_regress = ComponentRegress() self.criterion = SmoothL1Loss() self.args = hparam['args'] self.hparam = hparam self.optimizer = None self.scheduler = None self.train_data = None self.val_data = None self.test_data = None
def stacked_mean_loss(model_out, epoch, consistency_rampup): base_vid_loss = SmoothL1Loss(reduction='mean') video_ages = torch.matmul(torch.exp(model_out), settings.CLASSES).view(-1) with torch.no_grad(): means = torch.tensor( list(map(torch.mean, video_ages.split(settings.FRAMES_PER_VID)))) target = torch.cat( list(map(lambda x: x.repeat(settings.FRAMES_PER_VID), means))).to(dtype=torch.float32, device=settings.DEVICE) idx = torch.abs(video_ages - target) < 8 target[idx] = video_ages[idx] w = get_current_consistency_weight(epoch, consistency_rampup=consistency_rampup) return base_vid_loss(video_ages, target) * w
def SmoothL1Adam(learn_params, lr=0.001, amsgrad=True): """Returns a SmoothL1 loss function and an Adam optimizer. Arguments --------- :learn_params: The parameters to be learned during training :lr: The learning rate for the Adam optimizer :amsgrad: Used to specify whether to use the AMSGRAD variant instead of the traditional ADAM. """ criterion = SmoothL1Loss() optimizer = Adam(learn_params, lr=lr, amsgrad=True) return criterion, optimizer
def __init__( self, gamma: float = 2.0, pos_weight: float = 4.0, label_smoothing: Optional[float] = None, reduction: str = "mean", smooth: bool = True, ): super(CenterNetLoss, self).__init__() self.reduction = reduction self.cls_criterion = FocalLossWithLogits( gamma, label_smoothing=label_smoothing, reduction="none") self.loc_criterion = SmoothL1Loss( reduction="none") if smooth else L1Loss(reduction="none") self.pos_weight = pos_weight
def __init__(self, reduction: str = "mean"): """ Compute a binary cross entropy. This means that the preds are logits and the targets are a binary (1 or 0) tensor of same shape as logits. :param reduction: Specifies the reduction to apply to the output: `'none'` | `'mean'` | `'sum'`. `'none'`: no reduction will be applied, `'mean'`: the sum of the output will be divided by the number of elements in the output, `'sum'`: the output will be summed. Default: 'mean'. """ super().__init__(reduction=reduction) if babilim.is_backend(babilim.PYTORCH_BACKEND): from torch.nn import SmoothL1Loss self.loss_fun = SmoothL1Loss(reduction="none") else: from tensorflow.keras.losses import huber self.loss_fun = huber self.delta = 1.0
def __init__(self, observation_space: int, action_space: int, cfg: dict): super().__init__(observation_space, action_space) self.name = 'OfflineDQNAgent' self.summary_checkpoint = cfg['SUMMARY_CHECKPOINT'] self.batches_done = 0 self.target_update_steps = cfg['TARGET_UPDATE_INTERVAL'] self.gamma = cfg['GAMMA'] self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print('Utilizing device {}'.format(self.device)) self.policy = DQNDense(observation_space, action_space).to(self.device) self.target = DQNDense(observation_space, action_space).to(self.device) self.target.load_state_dict(self.policy.state_dict()) self.target.eval() self.optimizer = Adam(self.policy.parameters(), lr=cfg['LEARNING_RATE']) self.loss = SmoothL1Loss()
def __init__(self, observation_space: int, action_space: int): super().__init__(observation_space, action_space) self.name = 'OfflineDQNAgent' self.summary_checkpoint = 1000 self.batches_done = 0 self.target_update_steps = 5000 self.input_size = 16 self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") print('Utilizing device {}'.format(self.device)) self.policy = DQNDense(self.input_size, action_space).to(self.device) self.target = DQNDense(self.input_size, action_space).to(self.device) self.target.load_state_dict(self.target.state_dict()) self.target.eval() self.optimizer = Adam(self.policy.parameters(), lr=0.00025) self.loss = SmoothL1Loss()
def get_loss_functions(config): def cross_entropy_loss(policy_logits, target_policy): loss = (-target_policy * LogSoftmax(dim=1)(policy_logits)).sum(1) return loss if config.policy_loss == 'CrossEntropyLoss': policy_loss = cross_entropy_loss else: raise NotImplementedError if not config.no_support: scalar_loss = policy_loss else: if config.scalar_loss == 'MSE': scalar_loss = MSELoss(reduction='none') elif config.scalar_loss == 'Huber': scalar_loss = SmoothL1Loss(reduction='none') else: raise NotImplementedError return scalar_loss, policy_loss
def main(): global args args = arguments.parse_args() experiment_env = create_experiment_dir() assert args.rois_per_batch % args.batch_size == 0, "Uneven number of rois per image" rois_per_image = args.rois_per_batch / args.batch_size train_data = VOCDataSetROIs("data", "train", rois_per_image, enabled_flip=True) dataloader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=5, collate_fn=collate_rois_fn) ################### MODEL BOOTSRAP ##################### print("[+] Bootstrapping model") if args.stage_2_path is not None: print("[+] Loading stage 2 weights") net = FasterRCNN(args.stage_2_path).cuda() net.train() if args.resume is not None: print("[+] Resuming from %s" % args.resume) checkpoint = torch.load(args.resume) net.load_state_dict(checkpoint["state_dict"]) cross_entropy = CrossEntropyLoss(size_average=True).cuda() smooth_l1_loss = SmoothL1Loss(size_average=False).cuda() optimizer = opt.SGD( [params for params in net.parameters() if params.requires_grad], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) ################### MODEL TRAINING ##################### print("[+] Training model") start_epoch = 0 if args.resume is None else checkpoint["epoch"] for epoch in xrange(start_epoch, args.epoch): adjust_learning_rate(optimizer, epoch) train(net, cross_entropy, smooth_l1_loss, optimizer, dataloader, experiment_env, epoch)
def compute_loss( self, model_output: DigitDetectionModelOutput, model_target: DigitDetectionModelTarget, ) -> Optional[torch.Tensor]: loss_box_regression = 0 loss_classification = 0 smooth = SmoothL1Loss() loss_box_regression += smooth(model_output.box_regression_output, model_target.box_regression_target) loss_classification += sigmoid_focal_loss( model_output.classification_output, model_target.classification_target, reduction='mean') if len(model_target.matched_anchors) == 0: return None return (loss_box_regression + loss_classification ) * model_output.classification_output.shape[1] / len( model_target.matched_anchors)
def __init__(self, config): super().__init__() # Configuration self.config = config # LXMERT backbone self.bert = LXMERTBase.from_pretrained( self.config.bert_model_name, config=BertConfig.from_dict( OmegaConf.to_container(self.config, resolve=True)), cache_dir=os.path.join(get_mmf_cache_dir(), "distributed_{}".format(-1)), ) self.num_labels = config.num_labels self.gqa_labels = config.gqa_labels self.task_mask_lm = config.task_mask_lm self.task_obj_predict = config.task_obj_predict self.task_matched = config.task_matched self.task_qa = config.task_qa self.visual_losses = config.visual_losses self.visual_loss_config = config.visual_loss_config # Pre-training heads self.cls = BertPreTrainingHeads( config, self.bert.embeddings.word_embeddings.weight) if self.task_obj_predict: self.obj_predict_head = BertVisualObjHead(config) if self.task_qa: self.answer_head = BertVisualAnswerHead( config, [self.num_labels, self.gqa_labels]) # loss functions self.loss_fcts = { "l2": SmoothL1Loss(reduction="none"), "ce": CrossEntropyLoss(ignore_index=-1, reduction="none"), "ce_lang": CrossEntropyLoss(ignore_index=-1), }
def _create_loss(name, loss_config, weight, ignore_index, pos_weight): if name == 'BCEWithLogitsLoss': return nn.BCEWithLogitsLoss(pos_weight=pos_weight) elif name == 'BCEDiceLoss': alpha = loss_config.get('alphs', 1.) beta = loss_config.get('beta', 1.) return BCEDiceLoss(alpha, beta) elif name == 'CrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_index) elif name == 'WeightedCrossEntropyLoss': if ignore_index is None: ignore_index = -100 # use the default 'ignore_index' as defined in the CrossEntropyLoss return WeightedCrossEntropyLoss(ignore_index=ignore_index) elif name == 'PixelWiseCrossEntropyLoss': return PixelWiseCrossEntropyLoss(class_weights=weight, ignore_index=ignore_index) elif name == 'GeneralizedDiceLoss': normalization = loss_config.get('normalization', 'sigmoid') return GeneralizedDiceLoss(normalization=normalization) elif name == 'DiceLoss': normalization = loss_config.get('normalization', 'sigmoid') return DiceLoss(weight=weight, normalization=normalization) elif name == 'MSELoss': return MSELoss() elif name == 'SmoothL1Loss': return SmoothL1Loss() elif name == 'L1Loss': return L1Loss() elif name == 'WeightedSmoothL1Loss': return WeightedSmoothL1Loss( threshold=loss_config['threshold'], initial_weight=loss_config['initial_weight'], apply_below_threshold=loss_config.get('apply_below_threshold', True)) else: raise RuntimeError(f"Unsupported loss function: '{name}'")
def prediction(self, dataset=None, sampler=None, model_path=None, batch_size=2, epochs=1, is_reports_output=True, log_dir=None): if model_path is None and hasattr(self, 'model_path'): model_path = self.model_path self.learned = True if not hasattr(self, 'learned') or not hasattr(self, 'model'): raise ValueError('not learning model.') if dataset is None: if hasattr(self, 'dataset'): dataset = self.dataset else: raise ValueError('require dataset') Example = namedtuple('Example', ('pred', 'true')) criterion = SmoothL1Loss() def process(batch, model, iter_bar, step): input_ids, input_mask, label_id = batch logits = model(input_ids, input_mask) loss = criterion(logits.view(-1), label_id.view(-1)) example = Example(logits.tolist(), label_id.tolist()) return loss, example preds = self.helper.predict(process, self.model, dataset, model_file=model_path) with open("preds.json", "w") as f: json.dump(preds, f, indent=2, ensure_ascii=False)
def __init__(self, config): super().__init__() # Configuration self.config = config # LXMERT backbone self.bert = LXMERTBase.from_pretrained( self.config.bert_model_name, config=BertConfig.from_dict(self.config), ) self.num_labels = config.num_labels self.gqa_labels = config.gqa_labels self.task_mask_lm = config.task_mask_lm self.task_obj_predict = config.task_obj_predict self.task_matched = config.task_matched self.task_qa = config.task_qa self.visual_losses = config.visual_losses self.visual_loss_config = config.visual_loss_config # Pre-training heads self.cls = BertPreTrainingHeads( config, self.bert.embeddings.word_embeddings.weight) if self.task_obj_predict: self.obj_predict_head = BertVisualObjHead(config) if self.task_qa: self.answer_head = BertVisualAnswerHead( config, [self.num_labels, self.gqa_labels]) # # loss functions self.loss_fcts = { 'l2': SmoothL1Loss(reduction='none'), 'ce': CrossEntropyLoss(ignore_index=-1, reduction='none'), 'ce_lang': CrossEntropyLoss(ignore_index=-1), }
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, visual_feats=None, pos=None, obj_labels=None, matched_label=None, ans=None): (lang_output, visn_output), pooled_output = self.bert( input_ids, token_type_ids, attention_mask, visual_feats=(visual_feats, pos), ) lang_prediction_scores, cross_relationship_score = self.cls(lang_output, pooled_output) if self.task_qa: answer_score = self.answer_head(pooled_output) else: # This answer_score would not be used anywhere, # just to keep a constant return function signature. answer_score = pooled_output[0][0] total_loss = 0. loss_fct = CrossEntropyLoss(ignore_index=-1) losses = () if masked_lm_labels is not None and self.task_mask_lm: masked_lm_loss = loss_fct( lang_prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1) ) total_loss += masked_lm_loss losses += (masked_lm_loss.detach(),) if matched_label is not None and self.task_matched: matched_loss = loss_fct( cross_relationship_score.view(-1, 2), matched_label.view(-1) ) total_loss += matched_loss losses += (matched_loss.detach(),) if obj_labels is not None and self.task_obj_predict: loss_fcts = { 'l2': SmoothL1Loss(reduction='none'), 'ce': CrossEntropyLoss(ignore_index=-1, reduction='none') } total_visn_loss = 0. visn_prediction_scores_dict = self.obj_predict_head(visn_output) for key in VISUAL_CONFIG.visual_losses: label, mask_conf = obj_labels[key] output_dim, loss_fct_name, label_shape, weight = VISUAL_CONFIG.visual_loss_config[key] visn_loss_fct = loss_fcts[loss_fct_name] visn_prediction_scores = visn_prediction_scores_dict[key] visn_loss = visn_loss_fct( visn_prediction_scores.view(-1, output_dim), label.view(*label_shape), ) if visn_loss.dim() > 1: # Regression Losses visn_loss = visn_loss.mean(1) visn_loss = (visn_loss * mask_conf.view(-1)).mean() * weight total_visn_loss += visn_loss losses += (visn_loss.detach(),) total_loss += total_visn_loss if ans is not None and self.task_qa: answer_loss = loss_fct( answer_score.view(-1, self.num_answers), ans.view(-1) ) # Since this Github version pre-trains with QA loss from the beginning, # I exclude "*2" here to match the effect of QA losses. # Previous: (loss *0) for 6 epochs, (loss *2) for 6 epochs. (Used 10 instead of 6 in EMNLP paper) # Now : (loss *1) for 12 epochs # # * 2 # Multiply by 2 because > half of the data will not have label total_loss += answer_loss losses += (answer_loss.detach(),) return total_loss, torch.stack(losses).unsqueeze(0), answer_score.detach()
def __init__(self): """ """ super(LossSplines, self).__init__() self.loss = SmoothL1Loss()
def forward( self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, visual_feats=None, pos=None, obj_labels=None, matched_label=None, ans=None, ): (lang_output, visn_output), pooled_output = self.bert( input_ids, token_type_ids, attention_mask, visual_feats=(visual_feats, pos), ) lang_prediction_scores, cross_relationship_score = self.cls( lang_output, pooled_output) if self.task_qa: answer_score = self.answer_head(pooled_output) else: # This answer_score would not be used anywhere, # just to keep a constant return function signature. answer_score = pooled_output[0][0] total_loss = 0.0 loss_fct = CrossEntropyLoss(ignore_index=-1) losses = () if masked_lm_labels is not None and self.task_mask_lm: masked_lm_loss = loss_fct( lang_prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1), ) total_loss += masked_lm_loss losses += (masked_lm_loss.detach(), ) if matched_label is not None and self.task_matched: matched_loss = loss_fct(cross_relationship_score.view(-1, 2), matched_label.view(-1)) total_loss += matched_loss losses += (matched_loss.detach(), ) if obj_labels is not None and self.task_obj_predict: loss_fcts = { "l2": SmoothL1Loss(reduction="none"), "ce": CrossEntropyLoss(ignore_index=-1, reduction="none"), } total_visn_loss = 0.0 visn_prediction_scores_dict = self.obj_predict_head(visn_output) for key in VISUAL_CONFIG.visual_losses: label, mask_conf = obj_labels[key] ( output_dim, loss_fct_name, label_shape, weight, ) = VISUAL_CONFIG.visual_loss_config[key] visn_loss_fct = loss_fcts[loss_fct_name] visn_prediction_scores = visn_prediction_scores_dict[key] visn_loss = visn_loss_fct( visn_prediction_scores.view(-1, output_dim), label.view(*label_shape), ) if visn_loss.dim() > 1: # Regression Losses visn_loss = visn_loss.mean(1) visn_loss = (visn_loss * mask_conf.view(-1)).mean() * weight total_visn_loss += visn_loss losses += (visn_loss.detach(), ) total_loss += total_visn_loss if ans is not None and self.task_qa: answer_loss = ( loss_fct(answer_score.view(-1, self.num_answers), ans.view(-1)) * 2 ) # Multiply by 2 because > half of the data will not have label total_loss += answer_loss losses += (answer_loss.detach(), ) return total_loss, torch.stack(losses).unsqueeze( 0), answer_score.detach()
import torch """ Utility dictionaries to map a string to a class """ pytorch_model_dict = { "MultiAttnHeadSimple": MultiAttnHeadSimple, "SimpleTransformer": SimpleTransformer, "TransformerXL": TransformerXL, "DummyTorchModel": DummyTorchModel, "LSTM": LSTMForecast, "SimpleLinearModel": SimpleLinearModel, "CustomTransformerDecoder": CustomTransformerDecoder } pytorch_criterion_dict = { "MSE": MSELoss(), "SmoothL1Loss": SmoothL1Loss(), "PoissonNLLLoss": PoissonNLLLoss(), "RMSE": RMSELoss(), "MAPE": MAPELoss() } evaluation_functions_dict = {"NSE": "", "MSE": ""} decoding_functions = { "greedy_decode": greedy_decode, "simple_decode": simple_decode } pytorch_opt_dict = {"Adam": Adam, "SGD": SGD, "BertAdam": BertAdam} scikit_dict = {}
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, vocab: Vocabulary, dropout: float = 0.5, num_extra_layers: int = 0, dd_hidden_dim=None) -> None: """ :param word_embeddings: the embeddings to start with :param encoder: the seq2seq transformer of embeddings can be LSTM for example :param vocab: dataset input and output vocabulary """ super(DomainClassifier, self).__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder if dd_hidden_dim is None: self.h_size = encoder.get_output_dim() else: self.h_size = dd_hidden_dim # Add extra layer of hidden linear layers with relu for the encoder output. if num_extra_layers > 0: extra_hiddens = [] extra_hiddens += [ nn.Linear(encoder.get_output_dim(), self.h_size), nn.ReLU(True) ] for k in range(num_extra_layers - 1): extra_hiddens += [ nn.Linear(self.h_size, self.h_size), nn.ReLU(True) ] self.extra_hiddens = nn.Sequential(*extra_hiddens) else: self.extra_hiddens = None # Linear layer to calculate domain class self.hidden2label = torch.nn.Linear( in_features=self.h_size, out_features=vocab.get_vocab_size('labels')) self.representations = self.extra_hiddens[ -2] if self.extra_hiddens is not None else self.encoder self.dropout = nn.Dropout(dropout) self.criterion = CrossEntropyLoss() self.metrics = { "accuracy": CategoricalAccuracy(), "hinge-loss": Loss(HingeEmbeddingLoss()), "huber-loss": Loss(SmoothL1Loss()), "cross-entropy-loss": Loss(CrossEntropyLoss()), "perplexity": Confidence() }
def get_criterion(self): if self.criterion is None: self.criterion = SmoothL1Loss() return self.criterion