Exemplo n.º 1
0
def getLoss(ltype, device):
    if ltype == "BinaryCrossentropy":
        return BinaryCrossentropy().to(device)
    if ltype == "BCEWithLogitsLoss":
        return BCEWithLogitsLoss().to(device)
    if ltype == "FocalBCEWithLogitsLoss":
        return FocalBCEWithLogitsLoss(device).to(device)
Exemplo n.º 2
0
def train():
    tb_writer = SummaryWriter('tb_output')
    device = 'cuda:0' if CONF['GPU'] else 'cpu'
    model: nn.Module = CaseModel()
    # tb_writer.add_graph(model)
    model.train()
    
    train_dataset = CasRelDataset(path_or_json=CONF['TRAIN_DATA_PATH'])
    eval_dataset = CasRelDataset(path_or_json=CONF['EVAL_DATA_PATH'])
    dataloader = DataLoader(train_dataset,
                            batch_size=CONF['batch_size'],
                            shuffle=True,
                            collate_fn=collate_casrel)
    loss_func = BCEWithLogitsLoss()
    best_loss = 1e3
    optim = Adam(model.parameters(), lr=1e-5)
    global_steps = 0

    for epoch_num in range(Epochs):
        epoch_loss = 0.0
        model = model.to(device=device)
        for (batch_tokens,
             batch_mask,
             batch_sub_head,
             batch_sub_tail,
             batch_sub_head_arr,
             batch_sub_tail_arr,
             batch_obj_head_arr,
             batch_obj_tail_arr) in tqdm(dataloader, f'Epoch {epoch_num:3.0f}/{Epochs}', len(dataloader)):
            batch_tokens, batch_mask, batch_sub_head, batch_sub_tail, batch_sub_head_arr, batch_sub_tail_arr,batch_obj_head_arr, batch_obj_tail_arr = list(
                map(lambda x: x.to(device),
                    (batch_tokens, batch_mask, batch_sub_head, batch_sub_tail,batch_sub_head_arr, batch_sub_tail_arr,batch_obj_head_arr, batch_obj_tail_arr)
                    )
            )
            sub_head_pred, sub_tail_pred, obj_head_pred, obj_tail_pred = model(batch_tokens,
                                                                               batch_mask,
                                                                               batch_sub_head,
                                                                               batch_sub_tail)
            sub_head_loss = loss_func(sub_head_pred.squeeze(), batch_sub_head_arr)
            sub_tail_loss = loss_func(sub_tail_pred.squeeze(), batch_sub_tail_arr)
            obj_head_loss = loss_func(obj_head_pred, batch_obj_head_arr)
            obj_tail_loss = loss_func(obj_tail_pred, batch_obj_tail_arr)
            loss = sub_head_loss + sub_tail_loss + obj_head_loss + obj_tail_loss
            epoch_loss += loss
            logger.info(f'every batch loss: {loss}')
            global_steps += 1
            tb_writer.add_scalar('train_loss', loss, global_steps)
            optim.zero_grad()
            loss.backward()
            optim.step()
        # end one epoch

        p, r, f = metric(model.to('cpu'), eval_dataset)
        logger.info(f'epoch:{epoch_num + 1:3.0f}, precision: {p:5.4f}, recall: {r:5.4f}, f1-score: {f:5.4f}')
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            save_model = CONF['SAVE_MODEL']
            if not os.path.exists(os.path.dirname(save_model)):
                os.makedirs(os.path.dirname(save_model))
            torch.save(model.state_dict(), save_model)
Exemplo n.º 3
0
    def __init__(self, args, num_features, time_length):
        """[summary]

        Args:
            args ([type]): [description]
            time_length (int): Total timesteps in dataset.
        """
        super(DySAT, self).__init__()
        self.args = args
        if args.window < 0:
            self.num_time_steps = time_length
        else:
            self.num_time_steps = min(time_length, args.window + 1)  # window = 0 => only self.
        self.num_features = num_features

        self.structural_head_config = list(map(int, args.structural_head_config.split(",")))
        self.structural_layer_config = list(map(int, args.structural_layer_config.split(",")))
        self.temporal_head_config = list(map(int, args.temporal_head_config.split(",")))
        self.temporal_layer_config = list(map(int, args.temporal_layer_config.split(",")))
        self.spatial_drop = args.spatial_drop
        self.temporal_drop = args.temporal_drop

        self.structural_attn, self.temporal_attn = self.build_model()

        self.bceloss = BCEWithLogitsLoss()
Exemplo n.º 4
0
 def __init__(self,
              weights,
              per_image=False,
              skip_empty=True,
              channel_weights=[1, 0.1, 0.1],
              channel_losses=None):
     super().__init__()
     self.weights = weights
     self.bce = BCEWithLogitsLoss()
     self.dice = DiceLoss(per_image=per_image)
     self.gdl = GeneralizedDice()
     self.jaccard = JaccardLoss(per_image=per_image)
     self.focal = FocalLoss2d()
     self.mapping = {
         'bce': self.bce,
         'dice': self.dice,
         'gdl': self.gdl,
         'focal': self.focal,
         'jaccard': self.jaccard
     }
     self.expect_sigmoid = {'dice', 'focal', 'jaccard', "gdl"}
     self.per_channel = {'dice', 'jaccard', "gdl"}
     self.values = {}
     self.channel_weights = channel_weights
     self.channel_losses = channel_losses
     self.skip_empty = skip_empty
    def forward(self,
                input_ids,
                attention_mask=None,
                token_type_ids=None,
                position_ids=None,
                head_mask=None,
                labels=None):

        outputs = self.bert(input_ids,
                            attention_mask=attention_mask,
                            token_type_ids=token_type_ids,
                            position_ids=position_ids,
                            head_mask=head_mask)

        pooled_output = outputs[1]

        pooled_output = self.dropout(pooled_output + 0.1)
        logits = self.classifier(pooled_output)

        # add hidden states and attention if they are here
        outputs = (logits, ) + outputs[2:]

        if labels is not None:
            if self.num_labels == 1:
                #  We are doing regression
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels),
                                labels.view(-1))
            outputs = (loss, ) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)
 def __init__(self,
              weights,
              per_image=False,
              skip_empty=False,
              channel_weights=[1, 0.2, 0.1],
              channel_losses=None):
     super().__init__()
     self.weights = weights
     self.bce = BCEWithLogitsLoss()
     self.dice = DiceLoss()
     self.lcdice = LogCoshDiceLoss()
     self.jaccard = JaccardLoss(per_image=per_image)
     self.focal = BinaryFocalLoss()
     self.mapping = {
         'bce': self.bce,
         'dice': self.dice,
         'lcdice': self.lcdice,
         'focal': self.focal,
         'jaccard': self.jaccard
     }
     self.expect_sigmoid = {'dice', 'jaccard', 'lcdice'}
     self.per_channel = {'dice', 'jaccard', 'lcdice', "focal", "bce"}
     self.values = {}
     self.channel_weights = channel_weights
     self.channel_losses = channel_losses
     self.skip_empty = skip_empty
Exemplo n.º 7
0
    def __init__(self, loss_names, loss_weights, device, num_classes):
        """
        :param loss_names: list of loss names,
            possible losses=['jaccard', 'nlll', 'crossentropy', 'smooth_jaccard', 'focal', 'dice']
        :param loss_weights: list of weight coefficients for each loss from loss_names.
        :param device: execution device.
        :param num_classes: number of classes in training data.
        """
        super(MultiLoss, self).__init__()
        assert len(loss_names) == len(loss_weights)
        self.device = device
        self.losses = dict()
        self.loss_weights = dict()
        self.num_classes = num_classes

        for loss, weight in zip(loss_names, loss_weights):
            loss = loss.lower()

            if loss == 'jaccard':
                if self.num_classes > 1:
                    self.losses[loss] = JaccardLossMulti()
                else:
                    self.losses[loss] = JaccardLoss()

            elif loss == 'nlll':
                # if last layer of network is softmax use NLLLoss if isn't use CrossEntropyLoss
                self.losses[loss] = NLLLoss()

            elif loss == 'crossentropy':
                # if last layer of network is softmax use NLLLoss if isn't use CrossEntropyLoss
                # or BCEWithLogitsLoss for binary case
                if self.num_classes > 1:
                    self.losses[loss] = CrossEntropyLoss(size_average=True)
                else:
                    self.losses[loss] = BCEWithLogitsLoss(size_average=True)

            elif loss == 'smooth_jaccard':
                if self.num_classes > 1:
                    raise ValueError("ERROR: for multiclass case loss is not implemented.")
                else:
                    self.losses[loss] = SmoothJaccardLoss()

            elif loss == 'focal':
                if self.num_classes > 1:
                    self.losses[loss] = FocalLossMulti(size_average=False)
                else:
                    self.losses[loss] = FocalLossBinary(size_average=False)

            elif loss == 'dice' and self.num_classes < 2:
                self.losses[loss] = DiceLoss()

            else:
                raise ValueError(loss)

            self.loss_weights[loss] = weight
  def __init__(self, adapterClassifier, id2label, lr) -> None:
      super().__init__()

      self.classifier = adapterClassifier
      self.id2label = id2label

      self.lr = lr
      self.criterion = BCEWithLogitsLoss(pos_weight=torch.full((len(id2label),), 1.))

      self.sig = Sigmoid()

      self.declare_metrics(self.id2label)
Exemplo n.º 9
0
 def __init__(self, device, alpha=1, gamma=2, reduce=True):
     super(FocalBCEWithLogitsLoss, self).__init__()
     self.alpha = alpha
     self.gamma = gamma
     self.reduce = reduce
     self.bce = BCEWithLogitsLoss(reduction='none').to(device)
Exemplo n.º 10
0
def train_binary_classifier(true_inputs,
                            false_inputs,
                            encoder,
                            params,
                            num_val_samples=1000):

    outputmodelname = params.outputmodelname + "_binary_clf"
    if params.load_binary_clf:
        binary_classifier = BinaryClassifier(params.embedding_dim, 512, 0.,
                                             0.).to(encoder.device)
        checkpoint = torch.load(os.path.join(params.outputdir,
                                             outputmodelname),
                                map_location=params.device)
        binary_classifier.load_state_dict(checkpoint["model_state_dict"])
        return binary_classifier

    inputs = true_inputs + false_inputs
    t = ([1] * len(true_inputs)) + ([0] * len(false_inputs))

    # get validation set
    indices = list(range(len(inputs)))
    inputs, t = np.array(inputs), np.array(t)
    shuffle(indices)
    val_inputs = inputs[indices[-num_val_samples:]]
    val_targets = t[indices[-num_val_samples:]]
    inputs = inputs[indices[:-num_val_samples]]
    t = t[indices[:-num_val_samples]]
    indices = list(range(len(inputs)))

    binary_classifier = BinaryClassifier(
        params.embedding_dim, 512, params.dropout_binary,
        params.gaussian_noise_binary).to(encoder.device)
    opt = torch.optim.Adam(binary_classifier.parameters(), lr=params.lr_bclf)
    freeze(encoder)
    encoder.eval()
    loss_f = BCEWithLogitsLoss()

    def save_clf():
        checkpoint = {"model_state_dict": binary_classifier.state_dict()}
        torch.save(checkpoint, os.path.join(params.outputdir, outputmodelname))

    best_acc = evaluate(val_inputs, val_targets, encoder, binary_classifier,
                        params)
    bsize = params.batch_size
    correct = 0.
    for e in range(params.n_epochs_binary):

        # shuffle data in each epoch
        shuffle(indices)
        inputs = inputs[indices]
        t = t[indices]

        binary_classifier.train()
        losses = []
        for idx in range(0, len(inputs), bsize):
            ib = inputs[idx:idx + bsize]
            tb = t[idx:idx + bsize]

            tb = torch.tensor(tb, device=encoder.device).view(-1, 1).float()
            with torch.no_grad():
                embeddings = encoder(ib)
            preds = binary_classifier(embeddings)
            acc = ((preds > 0.5) == tb).sum()
            loss = loss_f(preds, tb)
            correct += acc

            opt.zero_grad()
            loss.backward()
            opt.step()
            losses.append(loss.item())

            if (idx / bsize) % params.log_freq == 0:
                avg_loss = np.array(losses[-params.log_freq:]).mean()
                print(
                    "Binary classification step {}<->{}: loss {} ; t-acc: {}, v-acc: {}"
                    .format(e, idx, avg_loss,
                            correct / float(params.log_freq * bsize),
                            best_acc))
                correct = 0.

        val_acc = evaluate(val_inputs, val_targets, encoder, binary_classifier,
                           params)
        if val_acc > best_acc:
            best_acc = val_acc
            save_clf()
        print("Loss in epoch {}: {}".format(e, np.array(losses).mean()))

    return binary_classifier
Exemplo n.º 11
0
def main():
    gpu_num = int(sys.argv[1])
    random_seed = (int(time.time()) * (gpu_num + 1)) % (2 ** 31 - 1)

    np.random.seed(random_seed)
    random.seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)

    HYPERPARAMETERS = {
        'batch_size': choice([4096, 8192, 16384]),  # [8192//2, 8192*2]
        'nn_encoder_out': choice(list(range(10, 100))),  # [20,40]
        'enc_hidden_layer_k': choice(np.linspace(0.5, 4.0, 8)),
    # [0.5,4] denominator of nn 'nn_encoder_out' E.G. if 'nn_encoder_out' = 30 so every feature encoder hidden layer size will be 15
        'n_splits': 10,  # n folds
        'optimizer': 'adam',  # ['RMSprop','adam']
        'lr': choice(np.linspace(0.001, 0.01, 10)),  # [0.01,0.001]
        'use_dropout': choice([True,False]),
        'use_bn': choice([True,False]),
        'lr_sheduler_factor': choice(np.linspace(0.1, 0.9, 9)),  # [0.1,0.9]
        'lr_sheduler_patience': choice(list(range(3, 15))),  # [3,15]
        'lr_sheduler_min_lr': 0.0001,  # not so important but non't have to be too small
        'max_epoch': 9999,  # we want to use early_stop so just need to be big
        'early_stop_wait': 20,  # bigger - better but slower, but i guess 20 is okay
        'upsampling_times': choice(list(range(3, 20))),  # [3,20] more = slower
        'upsampling_class_balancer': choice(list(range(2, 10)))  # [0.1,7]
    }

    ans = {}
    for i in range(6):
        with open(f"../output/hpo_logs_{i}.json" ,"r") as f:
            for item in f.readlines():
                d = eval(item)
                ans[d["target"]] = d["params"]

    score = sorted(ans)[-gpu_num - 1]
    params = ans[score]

    params['batch_size'] = int(params['batch_size'])
    params['nn_encoder_out'] = int(params['nn_encoder_out'])
    params['lr_sheduler_patience'] = int(params['lr_sheduler_patience'])
    params['upsampling_times'] = int(params['upsampling_times'])
    params['upsampling_class_balancer'] = int(params['upsampling_class_balancer'])
    params['upsampling_class_balancer'] = min(params['upsampling_class_balancer'],
                                              params['upsampling_times'])
    params['use_bn'] = params['use_bn'] > 0.5
    params['use_dropout'] = params['use_dropout'] > 0.5

    for key in params:
        HYPERPARAMETERS[key] = params[key]

    with open(f"log_{gpu_num}.txt", "a") as f:
        for key in HYPERPARAMETERS:
            f.write(key + " " + str(HYPERPARAMETERS[key]) + "\n")
            print(key, HYPERPARAMETERS[key])

    print(score)
    print("\nSEED:", random_seed)
    print("GPU:", gpu_num, "\n")

    input_path = "../input/"
    output_path = "../output/"

    print("torch:", torch.__version__)
    print("loading data...")

    train_df = pd.read_csv(input_path + 'train.csv.zip')

    label = train_df.target
    train = train_df.drop(['ID_code', 'target'], axis=1)
    cols = train.columns

    test = pd.read_csv(input_path + 'test.csv.zip')
    test = test.drop(['ID_code'], axis=1)

    test_filtered = pd.read_pickle(input_path + 'test_filtered.pkl')
    test_filtered = test_filtered.loc[:, train.columns]

    train_test = pd.concat([train, test_filtered]).reset_index(drop=True)

    vcs_train_test = {}

    for col in tqdm(train.columns):
        vcs_train_test[col] = train_test.loc[:, col].value_counts()

    generate_features(test, vcs_train_test, cols)

    ups = UpsamplingPreprocessor(HYPERPARAMETERS['upsampling_times'], HYPERPARAMETERS['upsampling_class_balancer'])

    loss_f = BCEWithLogitsLoss()

    batch_size = HYPERPARAMETERS['batch_size']
    N_IN = 2

    gpu = torch.device(f'cuda:{gpu_num % 4}')
    cpu = torch.device('cpu')

    folds = StratifiedKFold(n_splits=HYPERPARAMETERS['n_splits'], shuffle=True, random_state=42)
    oof = np.zeros(len(train))
    predictions = np.zeros(len(test))
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(train.values, label)):
        print("Fold {}".format(fold_))

        X_train, Train_label = ups.fit_transform(train.loc[trn_idx], label.loc[trn_idx])
        X_val, Val_label = train.loc[val_idx], label.loc[val_idx]
        generate_features(X_train, vcs_train_test, cols)
        generate_features(X_val, vcs_train_test, cols)
        cols_new = X_train.columns
        scaler = StandardScaler()
        X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=cols_new)
        X_val = pd.DataFrame(scaler.transform(X_val), columns=cols_new)
        test_new = pd.DataFrame(scaler.transform(test), columns=cols_new)

        train_tensors = []
        val_tensors = []
        test_tensors = []

        for fff in range(200):
            cols_to_use = [f'var_{fff}', f'var_{fff}_1_flag']
            train_t = X_train.loc[:, cols_to_use].values
            val_t = X_val.loc[:, cols_to_use].values
            test_t = test_new.loc[:, cols_to_use].values
            train_tensors.append(torch.tensor(train_t, requires_grad=False, device=cpu, dtype=torch.float32))
            val_tensors.append(torch.tensor(val_t, requires_grad=False, device=cpu, dtype=torch.float32))

            test_tensors.append(torch.tensor(test_t, requires_grad=False, device=gpu, dtype=torch.float32))

        train_tensors = torch.cat(train_tensors, 1).view((-1, 200, N_IN))
        val_tensors = torch.cat(val_tensors, 1).view((-1, 200, N_IN))
        test_tensors = torch.cat(test_tensors, 1).view((-1, 200, N_IN))
        try:
            y_train_t = torch.tensor(Train_label, requires_grad=False, device=cpu, dtype=torch.float32)
        except:
            y_train_t = torch.tensor(Train_label.values, requires_grad=False, device=cpu, dtype=torch.float32)

        try:
            y_val_t = torch.tensor(Val_label, requires_grad=False, device=cpu, dtype=torch.float32)
        except:
            y_val_t = torch.tensor(Val_label.values, requires_grad=False, device=cpu, dtype=torch.float32)

        nn = NN(D_in=N_IN,
                enc_out=HYPERPARAMETERS['nn_encoder_out'],
                enc_hidden_layer_k=HYPERPARAMETERS['enc_hidden_layer_k'],
                use_dropout=HYPERPARAMETERS['use_dropout'],
                use_BN=HYPERPARAMETERS['use_bn']).to(gpu)

        if HYPERPARAMETERS['optimizer'] == 'adam':
            optimizer = Adam(params=nn.parameters(), lr=HYPERPARAMETERS['lr'])
        elif HYPERPARAMETERS['optimizer'] == 'RMSprop':
            optimizer = RMSprop(params=nn.parameters(), lr=HYPERPARAMETERS['lr'])

        scheduler = ReduceLROnPlateau(optimizer, 'max', factor=HYPERPARAMETERS['lr_sheduler_factor'],
                                      patience=HYPERPARAMETERS['lr_sheduler_patience'],
                                      min_lr=HYPERPARAMETERS['lr_sheduler_min_lr'], verbose=True)

        best_AUC = 0
        early_stop = 0

        for epoch in tqdm(range(HYPERPARAMETERS['max_epoch'])):
            nn.train()
            dl = batch_iter(train_tensors, y_train_t, batch_size=batch_size)
            for data, label_t in dl:
                pred = nn(data.to(gpu))
                loss = loss_f(pred, torch.unsqueeze(label_t.to(gpu), -1))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            with torch.no_grad():
                nn.eval()
                blobs = []
                for batch in torch.split(val_tensors, batch_size):
                    blob = nn(batch.to(gpu)).data.cpu().numpy().flatten()
                    blobs.append(blob)
                val_pred = np.concatenate(blobs)
                AUC = roc_auc_score(label[val_idx], val_pred)
                print('EPOCH {}'.format(epoch))
                print('LOSS: ', loss_f(torch.tensor(val_pred), y_val_t))
                print('AUC: ', AUC)
                scheduler.step(AUC)

                if AUC > best_AUC:
                    early_stop = 0
                    best_AUC = AUC
                    torch.save(nn, output_path + f'best_auc_nn_{gpu_num}.pkl')
                else:
                    early_stop += 1
                    print('SCORE IS NOT THE BEST. Early stop counter: {}'.format(early_stop))

                if early_stop == HYPERPARAMETERS['early_stop_wait']:
                    print(f'EARLY_STOPPING NOW, BEST AUC = {best_AUC}')
                    break
                print('=' * 50)

            best_model = torch.load(output_path + f'best_auc_nn_{gpu_num}.pkl')

        with torch.no_grad():
            best_model.eval()
            blobs = []
            for batch in torch.split(val_tensors, batch_size):
                blob = best_model(batch.to(gpu)).data.cpu().numpy().flatten()
                blobs.append(blob)

            oof[val_idx] = np.concatenate(blobs)

            auc = round(roc_auc_score(Val_label, oof[val_idx]), 5)
            with open(f"log_{gpu_num}.txt", "a") as f:
                f.write(str(fold_) + " " + str(auc) + "\n")

            blobs = []
            for batch in torch.split(test_tensors, batch_size):
                blob = best_model(batch).data.cpu().numpy().flatten()
                blobs.append(blob)
        predictions_test = np.concatenate(blobs)

        predictions += predictions_test / folds.n_splits

    auc = round(roc_auc_score(label, oof), 5)
    print("CV score: {:<8.5f}".format(auc))

    with open(f"log_{gpu_num}.txt", "a") as f:
        f.write("OOF " + str(auc) + "\n")

    np.save(output_path + f"nn_{gpu_num}_{auc}_oof.npy", oof)
    np.save(output_path + f"nn_{gpu_num}_{auc}_test.npy", predictions)
Exemplo n.º 12
0
        loss += loss_class_laso_out
        loss += loss_recon
        # don't include subtraction for now (a little weird)
        # loss += loss_class_S
        # loss += loss_recon_S
        loss += loss_class_U
        loss += loss_recon_U
        loss += loss_class_I
        loss += loss_recon_I

        return loss


if __name__ == "__main__":
    # RUN FOR SANITY CHECK
    from torch.nn.modules.loss import BCEWithLogitsLoss, MSELoss
    laso = LaSO()
    laso_loss = LaSOLoss(BCEWithLogitsLoss(), MSELoss())

    laso.zero_grad()
    # this should be None
    print("Classifier fully connected layer gradient",
          laso.classifier_model.fc.weight.grad)

    x = laso(torch.randn((8, 3, 244, 244)).to(device))
    loss = laso_loss(x, torch.randint(0, 2, (8, 20)).to(device))

    loss.backward()
    # this should be nonzero
    print("Classifier fully connected layer gradient",
          laso.classifier_model.fc.weight.grad)
 def __init__(self, weight=None, size_average=True):
     super().__init__()
     self.bce = BCEWithLogitsLoss()