示例#1
0
def train(dataroot, classifier_name='cnn'):
    balance = get_balancing_technique()
    K = 10
    fold_prefix = str(K) + 'bal_fold_{}.csv' if balance == 'explicit' else str(
        K) + 'r_fold_{}.csv'

    class_weight = get_class_weights(dataroot)

    classifier_args, config = get_args(classifier_name, class_weight)
    pre_fingerprint = join(dataroot, 'c_{}'.format(classifier_name))
    fingerprint = join(pre_fingerprint + config, 'K_{}'.format(K))
    print(fingerprint)
    folds_data = load_folds(dataroot, fold_prefix, K)
    for test_index in range(K):
        print('-----------{}----------'.format(test_index))
        X_train = np.concatenate(
            [fold[0] for i, fold in enumerate(folds_data) if i != test_index],
            axis=0)
        y_train = np.concatenate(
            [fold[1] for i, fold in enumerate(folds_data) if i != test_index],
            axis=0)

        logdir = join(fingerprint, 'log', '{}'.format(test_index))
        ensure_dir(logdir)
        classifier_args['runs_dir'] = logdir
        clf = get_classifier(classifier_args)
        clf.fit(X_train, y_train)
        modelname = join(classifier_args['runs_dir'], 'model.pkl')
        pickle.dump(clf, open(modelname, 'wb'))
示例#2
0
def train(dataroot, classifier_name='cnn'):
    balance = get_balancing_technique()
    K = 10
    fold_prefix = '{}bal_fold_{}.csv' if balance == 'explicit' else '{}r_fold_{}.csv'

    class_weight = get_class_weights(dataroot)

    classifier_args, config = get_args(classifier_name, class_weight)
    pre_fingerprint = join(dataroot, 'c_{}'.format(classifier_name))
    fingerprint = join(pre_fingerprint + config, 'K_{}'.format(K))
    print(fingerprint)
    num_epochs = 40
    for test_index in range(K):
        print('-----------{}----------'.format(test_index))
        dev_indices = [i for i in range(K) if i != test_index]
        val_index = dev_indices[0]
        train_indices = dev_indices[1:]
        val_csv = join(dataroot, fold_prefix.format(K, val_index))
        list_of_train_csvs = [
            join(dataroot, fold_prefix.format(K, i)) for i in train_indices
        ]

        logdir = join(fingerprint, 'log', '{}'.format(test_index))
        ensure_dir(logdir)
        classifier_args['runs_dir'] = logdir
        clf = get_classifier(classifier_args)
        clf.fit(list_of_train_csvs, val_csv, num_epochs)
    def train(self, epochs=500):
        opt = tf.keras.optimizers.Adam()
        loss = tf.keras.losses.categorical_crossentropy
        X, Y = self.get_dataset()

        self.compile(optimizer=opt, loss=loss)
        self.fit(
            X,
            Y,
            batch_size=64,
            epochs=epochs,
            class_weight=get_class_weights(
                np.argmax(Y, axis=1), smooth_factor=0.05
            )  # weight under-represented classes almost equally
        )
def main():
    parser = options.get_parser('Trainer')
    options.add_dataset_args(parser)
    options.add_preprocessing_args(parser)
    options.add_model_args(parser)
    options.add_optimization_args(parser)
    options.add_checkpoint_args(parser)
    
    args = parser.parse_args()
    print(args)
    
    args.cuda = not args.disable_cuda and torch.cuda.is_available()
    
    # checkpoint
    checkpoint_dir = os.path.dirname(args.checkpoint)
    if not os.path.isdir(checkpoint_dir):
        os.mkdir(checkpoint_dir)
    
    # load dataset
    train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus(args.processed_dir)
    assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!'
    train_corpus = [(line.sent, line.type, line.p1, line.p2) for line in train_raw_corpus]
    val_corpus = [(line.sent, line.type, line.p1, line.p2) for line in val_raw_corpus]
    test_corpus = [(line.sent, line.type, line.p1, line.p2) for line in test_raw_corpus]
    
    
    start_epoch = 0
    caseless = args.caseless
    batch_size = args.batch_size
    num_epoch = args.num_epoch
    
    # preprocessing
    sents = [tup[0] for tup in train_corpus + val_corpus]
    feature_map = utils.build_vocab(sents, min_count=args.min_count, caseless=caseless)
    ##
#    target_map = {c:i for i, c in enumerate(['null', 'true'])}
    target_map = ddi2013.target_map
    train_features, train_targets = utils.build_corpus(train_corpus, feature_map, target_map, caseless)
    val_features, val_targets = utils.build_corpus(val_corpus, feature_map, target_map, caseless)
    test_features, test_targets = utils.build_corpus(test_corpus, feature_map, target_map, caseless)
    
    class_weights = torch.Tensor(utils.get_class_weights(train_targets)) if args.class_weight else None
    train_loader = utils.construct_bucket_dataloader(train_features, train_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=True)
    val_loader = utils.construct_bucket_dataloader(val_features, val_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=False)
    test_loader = utils.construct_bucket_dataloader(test_features, test_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=False)
    print('Preprocessing done! Vocab size: {}'.format(len(feature_map)))
    
    # build model
    vocab_size = len(feature_map)
    tagset_size = len(target_map)
    model = utils.build_model(args, vocab_size, tagset_size)
    
    # loss
    criterion = utils.build_loss(args, class_weights=class_weights)
    
    # load states
    if os.path.isfile(args.load_checkpoint):
        print('Loading checkpoint file from {}...'.format(args.load_checkpoint))
        checkpoint_file = torch.load(args.load_checkpoint)
        start_epoch = checkpoint_file['epoch'] + 1
        model.load_state_dict(checkpoint_file['state_dict'])
    #    optimizer.load_state_dict(checkpoint_file['optimizer'])
    else:
        print('no checkpoint file found: {}, train from scratch...'.format(args.load_checkpoint))
        if not args.rand_embedding:
            pretrained_word_embedding, in_doc_word_indices = utils.load_word_embedding(args.emb_file, feature_map, args.embedding_dim)
            print(pretrained_word_embedding.size())
            print(vocab_size)
            model.load_pretrained_embedding(pretrained_word_embedding)
            if args.disable_fine_tune:
                model.update_part_embedding(in_doc_word_indices) # update only non-pretrained words
        model.rand_init(init_embedding=args.rand_embedding)
    
    # trainer
    trainer = SeqTrainer(args, model, criterion)
    
    if os.path.isfile(args.load_checkpoint):
        dev_prec, dev_rec, dev_f1, _ = evaluate(trainer, val_loader, target_map, cuda=args.cuda)
        test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda)
        print('checkpoint dev_prec: {:.4f}, dev_rec: {:.4f}, dev_f1: {:.4f}, test_prec: {:.4f}, test_rec: {:.4f}, test_f1: {:.4f}'.format(
            dev_prec, dev_rec, dev_f1, test_prec, test_rec, test_f1))
    
    track_list = []
    best_f1 = float('-inf')
    patience_count = 0
    start_time = time.time()
    
    
    for epoch in range(start_epoch, num_epoch):
        epoch_loss = train(train_loader, trainer, epoch)
    
        # update lr
        trainer.lr_step()
           
        dev_prec, dev_rec, dev_f1, dev_loss = evaluate(trainer, val_loader, target_map, cuda=args.cuda)
        if dev_f1 >= best_f1:
            patience_count = 0
            best_f1 = dev_f1
    
            test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda)
    
            track_list.append({'epoch': epoch, 'loss': epoch_loss, 
                'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss, 
                'test_prec': test_prec, 'test_rec': test_rec, 'test_f1': test_f1})
            print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}\tsaving...'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1))
    
            try:
                utils.save_checkpoint({
                            'epoch': epoch,
                            'state_dict': model.state_dict(),
                            'optimizer': trainer.optimizer.state_dict(),
                            'f_map': feature_map,
                            't_map': target_map,
                        }, {'track_list': track_list,
                            'args': vars(args)
                            }, args.checkpoint + '_lstm')
            except Exception as inst:
                print(inst)
        else:
            patience_count += 1
            track_list.append({'epoch': epoch,'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss})
            print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}'.format(epoch, epoch_loss, dev_f1, dev_loss))
    
        print('epoch: {} in {} take: {} s'.format(epoch, args.num_epoch, time.time() - start_time))
        if patience_count >= args.patience:
            break
示例#5
0
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("-t", "--train", required=True, help="Train type")
    args = vars(ap.parse_args())

    #parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    TRAIN = args["train"]
    if (TRAIN not in ["SEG", "CLASS"]):
        raise ValueError(f"{TRAIN} not defined")

    # base config
    base_dir = cp["BASE"].get("base_dir")

    # train config
    output_dir = cp[TRAIN].get("output_dir")
    base_model_name = cp[TRAIN].get("model_name")

    use_base_model_weights = cp[TRAIN].getboolean("use_base_model_weights")
    use_trained_model_weights = cp[TRAIN].getboolean(
        "use_trained_model_weights")
    use_best_weights = cp[TRAIN].getboolean("use_best_weights")

    output_weights_name = cp[TRAIN].get("output_weights_name")

    epochs = cp[TRAIN].getint("epochs")
    batch_size = cp[TRAIN].getint("batch_size")
    initial_learning_rate = cp[TRAIN].getfloat("initial_learning_rate")

    generator_workers = cp[TRAIN].getint("generator_workers")
    image_dimension = cp[TRAIN].getint("image_dimension")

    train_steps = cp[TRAIN].get("train_steps")
    validation_steps = cp[TRAIN].get("validation_steps")

    patience_reduce_lr = cp[TRAIN].getint("patience_reduce_lr")
    patience_early_stop = cp[TRAIN].getint("patience_early_stop")
    min_lr = cp[TRAIN].getfloat("min_lr")

    dataset_csv_dir = cp[TRAIN].get("dataset_csv_dir")

    show_model_summary = cp[TRAIN].getboolean("show_model_summary")

    if (TRAIN == "CLASS"):
        positive_weights_multiply = cp[TRAIN].getfloat(
            "positive_weights_multiply")
        class_names = cp[TRAIN].get("class_names").split(",")
        mask_folder = cp[TRAIN].get("mask_folder")
        patch_size = cp[TRAIN].getint("patch_size")
        N = cp["TEST"].getint("N")
    else:
        num_classes = cp[TRAIN].getint("num_classes")
        class_names = None

    current_epoch = 0
    # check output_dir, create it if not exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        # resuming mode
        print("** use trained model weights **")
        # load training status for resuming
        training_stats_file = os.path.join(output_dir, ".training_stats.json")

        if os.path.isfile(training_stats_file):
            training_stats = json.load(open(training_stats_file))
            initial_learning_rate = training_stats['lr']
            current_epoch = training_stats['epoch']
        else:
            training_stats = {}
    else:
        # start over
        training_stats = {}

    print(f"backup config file to {output_dir}")
    shutil.copy(config_file,
                os.path.join(output_dir,
                             os.path.split(config_file)[1]))

    datasets = ["train", "val", "test"]
    for dataset in datasets:
        shutil.copy(os.path.join(dataset_csv_dir, f"{dataset}.csv"),
                    output_dir)

    # get train/dev sample counts
    train_counts, train_pos_counts = get_class_counts(
        os.path.join(output_dir, "train.csv"), class_names)
    val_counts, _ = get_class_counts(os.path.join(output_dir, "val.csv"),
                                     class_names)

    # compute steps
    if train_steps == "auto":
        train_steps = int(train_counts / batch_size)
    else:
        try:
            train_steps = int(train_steps)
        except ValueError:
            raise ValueError(
                f"train_steps: {train_steps} is invalid, please use 'auto' or integer."
            )

    print(f"** train_steps: {train_steps} **")

    if validation_steps == "auto":
        validation_steps = int(val_counts / batch_size)
    else:
        try:
            validation_steps = int(validation_steps)
        except ValueError:
            raise ValueError(
                f"validation_steps: {validation_steps} is invalid,please use 'auto' or integer."
            )

    print(f"** validation_steps: {validation_steps} **")

    class_weights = None
    if (TRAIN == "CLASS"):
        # compute class weights
        print("** compute class weights from training data **")
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
        )

    print("** class_weights **")
    print(class_weights)

    print("** load model **")

    if use_trained_model_weights:
        if use_best_weights:
            model_weights_file = os.path.join(output_dir,
                                              f"best_{output_weights_name}")
        else:
            model_weights_file = os.path.join(output_dir, output_weights_name)
    else:
        model_weights_file = None

    print("** compile model **")
    METRICS = [
        TruePositives(name='tp'),
        FalsePositives(name='fp'),
        TrueNegatives(name='tn'),
        FalseNegatives(name='fn'),
        Accuracy(name='accuracy'),
        Precision(name='precision'),
        Recall(name='recall'),
        AUC(name='auc'),
    ]
    optimizer = Adam(lr=initial_learning_rate)

    if (TRAIN == "CLASS"):
        model = Resnet18(input_shape=(N, patch_size, patch_size, 3),
                         weights_path=model_weights_file,
                         N=N,
                         nb_classes=len(class_names))
        model.compile(optimizer=optimizer,
                      loss="categorical_crossentropy",
                      metrics=METRICS)
        checkpoint_monitor = 'val_loss'
    else:
        model = Densenet103(nb_classes=num_classes - 1,
                            weights_path=model_weights_file,
                            input_shape=(image_dimension, image_dimension, 1))
        model.compile(optimizer=optimizer, loss="binary_crossentropy")
        checkpoint_monitor = 'val_loss'

    if show_model_summary:
        print(model.summary())

    print("** create image generators **")

    if (TRAIN == "CLASS"):
        train_sequence = classification_gen(
            dataset_csv_file=os.path.join(output_dir, "train.csv"),
            class_names=class_names,
            N=N,
            batch_size=batch_size,
            normalization_func=imageNet_preprocessing,
            target_size=(image_dimension, image_dimension),
            patch_size=(patch_size, patch_size),
            augmenter=augmenter,
            base_dir=base_dir,
            mask_folder=mask_folder,
            steps=train_steps,
        )

        validation_sequence = classification_gen(
            dataset_csv_file=os.path.join(output_dir, "val.csv"),
            class_names=class_names,
            N=N,
            batch_size=batch_size,
            normalization_func=imageNet_preprocessing,
            target_size=(image_dimension, image_dimension),
            patch_size=(patch_size, patch_size),
            augmenter=augmenter,
            base_dir=base_dir,
            steps=validation_steps,
            mask_folder=mask_folder,
            shuffle_on_epoch_end=False,
        )
    else:
        train_sequence = segmentation_gen(
            dataset_csv_file=os.path.join(output_dir, "train.csv"),
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            base_dir=base_dir,
            steps=train_steps,
        )

        validation_sequence = segmentation_gen(
            dataset_csv_file=os.path.join(output_dir, "val.csv"),
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            base_dir=base_dir,
            steps=validation_steps,
            shuffle_on_epoch_end=False,
        )

    output_weights_path = os.path.join(output_dir, output_weights_name)
    print(f"** set output weights path to: {output_weights_path} **")

    checkpoint = ModelCheckpoint(output_weights_path,
                                 save_weights_only=True,
                                 save_best_only=True,
                                 verbose=1,
                                 monitor=checkpoint_monitor)

    if (TRAIN == "CLASS"):
        performance_callback = MultipleClassAUROC(
            sequence=validation_sequence,
            class_names=class_names,
            weights_path=output_weights_path,
            stats=training_stats,
            workers=generator_workers)
    else:
        performance_callback = Jaccard(sequence=validation_sequence,
                                       weights_path=output_weights_path,
                                       stats=training_stats,
                                       workers=generator_workers)

    callbacks = [
        checkpoint,
        performance_callback,
        TensorBoard(log_dir=os.path.join(output_dir, "logs"),
                    batch_size=batch_size),
        ReduceLROnPlateau(monitor='loss',
                          factor=0.1,
                          patience=patience_reduce_lr,
                          verbose=1,
                          mode="min",
                          min_lr=min_lr),
        EarlyStopping(
            monitor="val_loss",
            min_delta=0,
            patience=patience_early_stop,
            verbose=0,
            mode="min",
            baseline=None,
            restore_best_weights=False,
        ),
    ]

    print("** start training **")

    history = model.fit_generator(
        generator=train_sequence,
        initial_epoch=current_epoch,
        epochs=epochs,
        class_weight=class_weights,
        validation_data=validation_sequence,
        callbacks=callbacks,
        workers=generator_workers,
        shuffle=False,
    )

    # dump history
    print("** dump history **")
    with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
        pickle.dump({"history": history.history}, f)
    print("** done! **")
        for layer in trained_net.layers:
            if 'l1' in layer.name:
                layer.trainable = True
            if 'l2' in layer.name:
                layer.trainable = False
            if 'l3' in layer.name:
                layer.trainable = True
    else:
        for layer in trained_net.layers:
            if 'l1' in layer.name:
                layer.trainable = False
            if 'l2' in layer.name:
                layer.trainable = True
            if 'l3' in layer.name:
                layer.trainable = True
    class_weights = get_class_weights(y_t_c)
    trained_net.compile(optimizer=tf.keras.optimizers.Adam(LR_TARGET),
                        loss=losses,
                        loss_weights=loss_weights,
                        metrics=['accuracy'])
    trained_net.fit(x_t_c, [y_t_c, y_t_c, y_t_c],
                    batch_size=64,
                    epochs=1,
                    verbose=0,
                    class_weight=class_weights)

    # evalulation
    res_t = trained_net.evaluate(X_T_VAL, [Y_T_VAL, Y_T_VAL, Y_T_VAL],
                                 verbose=0)
    print('\nStep:', step, 'Acc on target domain test data:', res_t[-1],
          'Len y_t_p:', len(y_t_p), '\n')
示例#7
0
    img_file, label_file, h_enc, w_enc, h_dec, w_dec)

# create single head datasets
train_single_ds = filelist_train.shuffle(n_train).map(map_single).cache().batch(batch_size).repeat()
val_single_ds = filelist_val.map(map_single).cache().batch(batch_size).repeat()
test_single_ds = filelist_test.map(map_single).cache().batch(batch_size).repeat()

# create double head datasets
train_double_ds = filelist_train.shuffle(n_train).map(map_double).cache().batch(batch_size).repeat()
val_double_ds = filelist_val.map(map_double).cache().batch(batch_size).repeat()
test_double_ds = filelist_test.map(map_double).cache().batch(batch_size).repeat()

# get class weights
label_filelist = tf.data.Dataset.list_files(label_pattern, shuffle=False)
label_ds = label_filelist.map(lambda x: map_label(x, h_dec, w_dec))
class_weights = get_class_weights(label_ds).tolist()


def train_enet():

    Enet = EnetModel(C=12,MultiObjective=True,l2=1e-3)
    # Train Encoder
    for layer in Enet.layers[-6:]:
      layer.trainable = False

    n_epochs = 60
    adam_optimizer = tf.keras.optimizers.Adam(learning_rate=5e-4)
    Enet.compile(optimizer=adam_optimizer,
                 loss=['sparse_categorical_crossentropy','sparse_categorical_crossentropy'],
                 metrics=['accuracy','accuracy'],
                 loss_weights=[1.0,0.0])
def cross_validate(model_params, train_params, feature_type, naming):

    # Get trail list
    cross_val_splits = utils.get_cross_val_splits()

    # Cross-Validation Result
    result = []

    # Cross Validation
    for split_idx, split in enumerate(cross_val_splits):
        feature_dir = os.path.join(raw_feature_dir, split['name'])
        test_trail_list = split['test']
        train_trail_list = split['train']

        split_naming = naming + '_split_{}'.format(split_idx + 1)

        trained_model_file = utils.get_tcn_model_file(split_naming)
        log_dir = utils.get_tcn_log_sub_dir(split_naming)

        # Model
        model = EncoderDecoderNet(**model_params)
        model = model.cuda()

        print(model)

        n_layers = len(model_params['encoder_params']['layer_sizes'])

        # Dataset
        train_dataset = RawFeatureDataset(dataset_name,
                                          feature_dir,
                                          train_trail_list,
                                          feature_type=feature_type,
                                          encode_level=n_layers,
                                          sample_rate=sample_rate,
                                          sample_aug=False,
                                          normalization=[None, None])

        test_norm = [train_dataset.get_means(), train_dataset.get_stds()]
        test_dataset = RawFeatureDataset(dataset_name,
                                         feature_dir,
                                         test_trail_list,
                                         feature_type=feature_type,
                                         encode_level=n_layers,
                                         sample_rate=sample_rate,
                                         sample_aug=False,
                                         normalization=test_norm)

        loss_weights = utils.get_class_weights(train_dataset)
        #loss_weights = None

        if train_params is not None:
            train_model(model,
                        train_dataset,
                        test_dataset,
                        **train_params,
                        loss_weights=loss_weights,
                        trained_model_file=trained_model_file,
                        log_dir=log_dir)
            #log_dir=None)

        model.load_state_dict(torch.load(trained_model_file))

        acc, edit, _, f_scores = test_model(model,
                                            test_dataset,
                                            loss_weights=loss_weights,
                                            plot_naming=split_naming)

        result.append(
            [acc, edit, f_scores[0], f_scores[1], f_scores[2], f_scores[3]])

    result = np.array(result)

    return result
示例#9
0
def train(FLAGS):

    # -------------------- Defining the hyperparameters --------------------
    batch_size = FLAGS.batch_size  #
    epochs = FLAGS.epochs  #
    training_type = FLAGS.training_type  #
    learning_rate = FLAGS.learning_rate  #
    save_every = FLAGS.save_every  #
    num_classes = FLAGS.num_classes  #
    weight_decay = FLAGS.weight_decay
    img_pattern = FLAGS.img_pattern  #
    label_pattern = FLAGS.label_pattern  #
    img_pattern_val = FLAGS.img_pattern_val  #
    label_pattern_val = FLAGS.label_pattern_val  #
    tb_logs = FLAGS.tensorboard_logs  #
    img_width = FLAGS.img_width  #
    img_height = FLAGS.img_height  #
    save_model = FLAGS.save_model  #
    cache_train = FLAGS.cache_train  #
    cache_val = FLAGS.cache_val  #
    cache_test = FLAGS.cache_test  #
    print('[INFO]Defined all the hyperparameters successfully!')

    # setup tensorboard
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                          histogram_freq=1)

    # encoder and decoder dimensions
    h_enc = img_height // 8
    w_enc = img_width // 8
    h_dec = img_height
    w_dec = img_width

    # create (img,label) string tensor lists
    filelist_train = preprocess_img_label(img_pattern, label_pattern)
    filelist_val = preprocess_img_label(img_pattern_val, label_pattern_val)

    # training dataset size
    n_train = tf.data.experimental.cardinality(filelist_train).numpy()
    n_val = tf.data.experimental.cardinality(filelist_val).numpy()

    # define mapping functions for single and double head nets
    map_single = partial(map_singlehead, h_img=h_dec, w_img=w_dec)
    map_double = partial(map_doublehead,
                         h_enc=h_enc,
                         w_enc=w_enc,
                         h_dec=h_dec,
                         w_dec=w_dec)

    # create dataset
    if training_type == 0 or training_type == 1:
        map_fn = map_double
    else:
        map_fn = map_single
    train_ds = filelist_train.shuffle(n_train).map(map_fn).cache(
        cache_train).batch(batch_size).repeat()
    val_ds = filelist_val.map(map_fn).cache(cache_val).batch(
        batch_size).repeat()

    # final training and validation datasets

    # -------------------- get the class weights --------------------
    print('[INFO]Starting to define the class weights...')
    label_filelist = tf.data.Dataset.list_files(label_pattern, shuffle=False)
    label_ds = label_filelist.map(lambda x: process_label(x, h_dec, w_dec))
    class_weights = get_class_weights(label_ds).tolist()
    print('[INFO]Fetched all class weights successfully!')

    # -------------------- istantiate model --------------------
    if training_type == 0 or training_type == 1:
        Enet = EnetModel(C=num_classes, MultiObjective=True, l2=weight_decay)
    else:
        Enet = EnetModel(C=num_classes, l2=weight_decay)
    print('[INFO]Model Instantiated!')

    # -------------------- start training --------------------
    adam_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    # -- two stages training --
    if training_type == 0:

        # freeze decoder layers
        for layer in Enet.layers[-6:]:
            layer.trainable = False

        # compile encoder: only the first objective matters
        Enet.compile(optimizer=adam_optimizer,
                     loss=[
                         'sparse_categorical_crossentropy',
                         'sparse_categorical_crossentropy'
                     ],
                     metrics=['accuracy', 'accuracy'],
                     loss_weights=[1.0, 0.0])

        # train encoder
        Enet.fit(x=train_ds,
                 epochs=epochs,
                 steps_per_epoch=n_train // batch_size,
                 validation_data=val_ds,
                 validation_steps=n_val // batch_size // 5,
                 class_weight=class_weights,
                 callbacks=[tensorboard_callback])

        # freeze encoder and unfreeze decoder
        for layer in Enet.layers[-6:]:
            layer.trainable = True
        for layer in Enet.layers[:-6]:
            layer.trainable = False

        # compile model: only the second objective matters
        Enet.compile(optimizer=adam_optimizer,
                     loss=[
                         'sparse_categorical_crossentropy',
                         'sparse_categorical_crossentropy'
                     ],
                     metrics=['accuracy', 'accuracy'],
                     loss_weights=[0.0, 1.0])

        # train decoder
        enet_hist = Enet.fit(x=train_ds,
                             epochs=epochs,
                             steps_per_epoch=n_train // batch_size,
                             validation_data=val_ds,
                             validation_steps=n_val // batch_size // 5,
                             class_weight=class_weights,
                             callbacks=[tensorboard_callback])

    # -- simultaneous double objective trainings --
    elif training_type == 1:

        # compile model
        Enet.compile(optimizer=adam_optimizer,
                     loss=[
                         'sparse_categorical_crossentropy',
                         'sparse_categorical_crossentropy'
                     ],
                     metrics=['accuracy', 'accuracy'],
                     loss_weights=[0.5, 0.5])

        # fit model
        print('train: ', n_train, 'batch: ', batch_size)
        enet_hist = Enet.fit(x=train_ds,
                             epochs=epochs,
                             steps_per_epoch=n_train // batch_size,
                             validation_data=val_ds,
                             validation_steps=n_val // batch_size // 5,
                             class_weight=class_weights,
                             callbacks=[tensorboard_callback])

    # -- end to end training --
    else:

        # compile model
        Enet.compile(optimizer=adam_optimizer,
                     loss=['sparse_categorical_crossentropy'],
                     metrics=['accuracy'])

        enet_hist = Enet.fit(x=train_ds,
                             epochs=epochs,
                             steps_per_epoch=n_train // batch_size,
                             validation_data=val_ds,
                             validation_steps=n_val // batch_size // 5,
                             class_weight=class_weights,
                             callbacks=[tensorboard_callback])

    # -------------------- save model --------------------
    Enet.save_weights(save_model)
def main():
    parser = options.get_parser('Generator')
    options.add_dataset_args(parser)
    options.add_preprocessing_args(parser)
    options.add_model_args(parser)
    options.add_optimization_args(parser)
    options.add_checkpoint_args(parser)
    options.add_generation_args(parser)

    args = parser.parse_args()

    model_path = args.load_checkpoint + '.model'
    args_path = args.load_checkpoint + '.json'
    with open(args_path, 'r') as f:
        _args = json.load(f)['args']
    [setattr(args, k, v) for k, v in _args.items()]

    args.cuda = not args.disable_cuda and torch.cuda.is_available()

    print(args)

    if args.cuda:
        torch.backends.cudnn.benchmark = True

    # increase recursion depth
    sys.setrecursionlimit(10000)

    # load dataset
    train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus(
        args.processed_dir, ddi=False)
    assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!'
    train_corpus = [(line.sent, line.type, line.p1, line.p2)
                    for line in train_raw_corpus]
    val_corpus = [(line.sent, line.type, line.p1, line.p2)
                  for line in val_raw_corpus]

    caseless = args.caseless
    batch_size = args.batch_size

    # build vocab
    sents = [tup[0] for tup in train_corpus + val_corpus]
    feature_map = utils.build_vocab(sents,
                                    min_count=args.min_count,
                                    caseless=caseless)
    target_map = ddi2013.target_map

    # get class weights
    _, train_targets = utils.build_corpus(train_corpus, feature_map,
                                          target_map, caseless)
    class_weights = torch.Tensor(
        utils.get_class_weights(train_targets)) if args.class_weight else None

    # load dataets
    _, _, test_loader = utils.load_datasets(args.processed_dir,
                                            args.train_size,
                                            args,
                                            feature_map,
                                            dataloader=True)

    # build model
    vocab_size = len(feature_map)
    tagset_size = len(target_map)
    model = RelationTreeModel(vocab_size, tagset_size, args)

    # loss
    criterion = utils.build_loss(args, class_weights=class_weights)

    # load states
    assert os.path.isfile(model_path), "Checkpoint not found!"
    print('Loading checkpoint file from {}...'.format(model_path))
    checkpoint_file = torch.load(model_path)
    model.load_state_dict(checkpoint_file['state_dict'])

    # trainer
    trainer = TreeTrainer(args, model, criterion)

    # predict
    y_true, y_pred, treelists, f1_by_len = predict(trainer,
                                                   test_loader,
                                                   target_map,
                                                   cuda=args.cuda)

    # assign words to roots
    for tup, treelist in zip(test_raw_corpus, treelists):
        for t in treelist:
            t.idx = tup.sent[t.idx] if t.idx < len(tup.sent) else None

    # prediction
    print('Predicting...')
    # write result: sent_id|e1|e2|ddi|type
    with open(args.predict_file, 'w') as f:
        for tup, pred in zip(test_raw_corpus, y_pred):
            ddi = 0 if pred == 'null' else 1
            f.write('|'.join([tup.sent_id, tup.e1, tup.e2, str(ddi), pred]))
            f.write('\n')

    def print_info(f, tup, target, pred, root):
        f.write('{}\n'.format(' '.join(tup.sent)))
        f.write('{}\n'.format(' | '.join(
            [tup.sent_id, tup.e1, tup.e2, target, pred])))
        f.write('{}\n\n'.format(root))

    # error analysis
    print('Analyzing...')
    with open(args.error_file, 'w') as f:
        f.write(' | '.join(['sent_id', 'e1', 'e2', 'target', 'pred']))
        f.write('\n')
        for tup, target, pred, treelist in zip(test_raw_corpus, y_true, y_pred,
                                               treelists):
            if target != pred:
                print_info(f, tup, target, pred, treelist[-1])

    # attention
    print('Writing attention scores...')
    with open(args.correct_file, 'w') as f:
        f.write(' | '.join(['target', 'sent', 'att_weight']))
        f.write('\n')
        for tup, target, pred, treelist in zip(test_raw_corpus, y_true, y_pred,
                                               treelists):
            if target == pred and target != 'null':
                print_info(f, tup, target, pred, treelist[-1])
def main():
    parser = options.get_parser('Trainer')
    options.add_dataset_args(parser)
    options.add_preprocessing_args(parser)
    options.add_model_args(parser)
    options.add_optimization_args(parser)
    options.add_checkpoint_args(parser)
    
    args = parser.parse_args()
    print(args)
    
    args.cuda = not args.disable_cuda and torch.cuda.is_available()
    torch.manual_seed(5)
    
    if args.cuda:
        torch.backends.cudnn.benchmark = True
    
    # increase recursion depth
    sys.setrecursionlimit(10000)
    # checkpoint
    checkpoint_dir = os.path.dirname(args.checkpoint)
    if not os.path.isdir(checkpoint_dir):
        os.mkdir(checkpoint_dir)
    
    # load dataset
    train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus(args.processed_dir, ddi=False)
    assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!'
    train_corpus = [(line.sent, line.type, line.p1, line.p2) for line in train_raw_corpus]
    val_corpus = [(line.sent, line.type, line.p1, line.p2) for line in val_raw_corpus]    
    
    start_epoch = 0
    caseless = args.caseless
    batch_size = args.batch_size
    num_epoch = args.num_epoch
    
    # build vocab
    sents = [tup[0] for tup in train_corpus + val_corpus]
    feature_map = utils.build_vocab(sents, min_count=args.min_count, caseless=caseless)
    target_map = ddi2013.target_map
    
    # get class weights
    _, train_targets = utils.build_corpus(train_corpus, feature_map, target_map, caseless)
    class_weights = torch.Tensor(utils.get_class_weights(train_targets)) if args.class_weight else None
        
    train_loader, val_loader, test_loader = utils.load_datasets(args.processed_dir, args.train_size, args, feature_map, dataloader=True)            
    
    # build model
    vocab_size = len(feature_map)
    tagset_size = len(target_map)
    model = RelationTreeModel(vocab_size, tagset_size, args)
    
    # loss
    criterion = utils.build_loss(args, class_weights=class_weights)
    
    # load states
    if os.path.isfile(args.load_checkpoint):
        print('Loading checkpoint file from {}...'.format(args.load_checkpoint))
        checkpoint_file = torch.load(args.load_checkpoint)
        start_epoch = checkpoint_file['epoch'] + 1
        model.load_state_dict(checkpoint_file['state_dict'])
    #    optimizer.load_state_dict(checkpoint_file['optimizer'])
    else:
        print('no checkpoint file found: {}, train from scratch...'.format(args.load_checkpoint))
        if not args.rand_embedding:
            pretrained_word_embedding, in_doc_word_indices = utils.load_word_embedding(args.emb_file, feature_map, args.embedding_dim)
            print(pretrained_word_embedding.size())
            print(vocab_size)
            model.load_pretrained_embedding(pretrained_word_embedding)
            if args.disable_fine_tune:
                model.update_part_embedding(in_doc_word_indices) # update only non-pretrained words
        model.rand_init(init_embedding=args.rand_embedding)
    
    # trainer
    trainer = TreeTrainer(args, model, criterion)
    
    best_f1 = float('-inf')
    
    if os.path.isfile(args.load_checkpoint):
        dev_prec, dev_rec, dev_f1, _ = evaluate(trainer, val_loader, target_map, cuda=args.cuda)
        test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda)
        best_f1 = dev_f1
        print('checkpoint dev_prec: {:.4f}, dev_rec: {:.4f}, dev_f1: {:.4f}, test_prec: {:.4f}, test_rec: {:.4f}, test_f1: {:.4f}'.format(
            dev_prec, dev_rec, dev_f1, test_prec, test_rec, test_f1))
        
    track_list = []
    
    patience_count = 0
    start_time = time.time()
    q = mp.Queue()
    
    # set start methods
    try:
        mp.set_start_method('spawn')
    except RuntimeError:
        pass

    for epoch in range(start_epoch, num_epoch):
        epoch_loss = train(train_loader, trainer, epoch)
#        processes = []
#        for rank in range(args.num_processes):
#            p = mp.Process(target=train, args=(train_loader, trainer, epoch, q))
#            p.start()
#            processes.append(p)
#        for p in processes:
#            p.join()
#        
#        epoch_loss = q.get()

                
        # update lr
        trainer.lr_step(epoch_loss)
        
        dev_prec, dev_rec, dev_f1, dev_loss = evaluate(trainer, val_loader, target_map, cuda=args.cuda)
        test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda)
        if dev_f1 >= best_f1:
            patience_count = 0
            best_f1 = dev_f1
    
            track_list.append({'epoch': epoch, 'loss': epoch_loss, 
                'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss, 
                'test_prec': test_prec, 'test_rec': test_rec, 'test_f1': test_f1})
            print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}\tsaving...'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1))
    
            try:
                utils.save_checkpoint({
                            'epoch': epoch,
                            'state_dict': model.state_dict(),
                            'optimizer': trainer.optimizer.state_dict(),
                            'f_map': feature_map,
                            't_map': target_map,
                        }, {'track_list': track_list,
                            'args': vars(args)
                            }, args.checkpoint)
            except Exception as inst:
                print(inst)
        else:
            patience_count += 1
            track_list.append({'epoch': epoch,'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss})
            print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1))
    
        print('epoch: {} in {} take: {} s'.format(epoch, args.num_epoch, time.time() - start_time))
        if patience_count >= args.patience:
            break
示例#12
0
FLAGS.setDefaults()

model_factory = ModelFactory()

# load training and test set file names

train_generator = get_generator(FLAGS.train_csv, FLAGS, augmenter)
test_generator = get_generator(FLAGS.test_csv, FLAGS)

class_weights = None
if FLAGS.use_class_balancing:
    if FLAGS.multi_label_classification:
        class_weights = get_multilabel_class_weights(
            train_generator.y, FLAGS.positive_weights_multiply)
    else:
        class_weights = get_class_weights(train_generator.get_class_counts(),
                                          FLAGS.positive_weights_multiply)

# load classifier from saved weights or get a new one
training_stats = {}
learning_rate = FLAGS.learning_rate

if FLAGS.load_model_path != '' and FLAGS.load_model_path is not None:
    visual_model = load_model(FLAGS.load_model_path)
    if FLAGS.show_model_summary:
        visual_model.summary()
    training_stats_file = os.path.join(FLAGS.save_model_path,
                                       ".training_stats.json")
    if os.path.isfile(training_stats_file):
        training_stats = json.load(open(training_stats_file))
        learning_rate = training_stats['lr']
        print("Will continue from learning rate: {}".format(learning_rate))
示例#13
0
if args.name != '':
    config['model']['name'] = args.name
if args.max_iter > 0:
    config['train']['max_iter'] = args.max_iter
if args.batch_size > 0:
    config['train']['batch_size'] = args.batch_size

num_classes = int(config['model']['num_classes'])
batch_size = int(config['train']['batch_size'])

train_data = VFDataset([1, 3, 5, 8])
valid_data = VFDataset([4, 6])
test_data = VFDataset([2, 7])

class_weights = get_class_weights(num_classes=num_classes,
                                  train_data=train_data,
                                  valid_data=valid_data)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

device = torch.device('cuda')
Model = model_dict[args.model]
model = Model(n_classes=num_classes).to(device)
model = nn.DataParallel(model)
trainer = Trainer(model, config, train_loader, valid_loader, class_weights,
                  device)

trainer.train(vis_data=test_data)
accuracy, precisions, recalls, IoUs = trainer.eval(test_loader)
示例#14
0
def classification(variant='1_hour',
                   structureless=False,
                   batch_size=8,
                   x_size=12,
                   h_size=8,
                   emo_size=8,
                   top_sizes=(16, 16),
                   p_drop=0.1,
                   verbose=1,
                   bi=True,
                   deep=True,
                   lr_tree=0.05,
                   lr_top=0.01,
                   decay_tree=0.003,
                   decay_top=0.006,
                   epochs=60,
                   cuda_id=-1):

    data_dir = '../data/'
    out_dir = '../results/'
    graphs_dir = data_dir + 'graphs_all/'
    cascade_size_file = data_dir + 'cascade_size.csv'

    device = set_device(cuda_id)

    if structureless:
        x_size = x_size - 2

    train_ids = np.array([
        ID.split('_')[0] for ID in os.listdir(graphs_dir)
        if variant in ID and 'test' not in ID
    ])
    test_ids = np.unique([
        ID.split('_')[0] for ID in os.listdir(graphs_dir)
        if variant + '_test' in ID
    ])

    train_set = CascadeData(train_ids,
                            graphs_dir,
                            cascade_size_file,
                            variant=variant,
                            categorical=True,
                            structureless=structureless)
    test_set = CascadeData(test_ids,
                           graphs_dir,
                           cascade_size_file,
                           test=True,
                           variant=variant,
                           categorical=True,
                           structureless=structureless)

    _, weights_all = get_class_weights(train_set)
    weighted_sampler = WeightedRandomSampler(weights_all, len(weights_all))

    train_generator = DataLoader(train_set,
                                 collate_fn=cascade_batcher(device),
                                 batch_size=batch_size,
                                 num_workers=8,
                                 sampler=weighted_sampler)
    test_generator = DataLoader(test_set,
                                collate_fn=cascade_batcher(device),
                                batch_size=batch_size,
                                num_workers=8)

    deep_tree = DeepTreeLSTMClassifier(x_size,
                                       4,
                                       emo_size,
                                       h_size=h_size,
                                       top_sizes=top_sizes,
                                       bi=bi,
                                       deep=deep,
                                       pd=p_drop)

    criterion = nn.CrossEntropyLoss()
    optimizer_tree = th.optim.Adam(deep_tree.bottom_net.parameters(),
                                   lr=lr_tree,
                                   weight_decay=decay_tree)
    optimizer_top = th.optim.Adam(deep_tree.top_net.parameters(),
                                  lr=lr_top,
                                  weight_decay=decay_top)
    scheduler_tree = th.optim.lr_scheduler.StepLR(optimizer_tree,
                                                  step_size=10,
                                                  gamma=0.8)
    scheduler_top = th.optim.lr_scheduler.StepLR(optimizer_top,
                                                 step_size=10,
                                                 gamma=0.8)

    callbacks = [
        EarlyStopping(patience=10),
        ExperimentLogger(out_dir, filename='logs_classification.csv')
    ]

    model_trainer = DeepTreeTrainer(deep_tree)
    model_trainer.compile(optimizer_tree,
                          optimizer_top,
                          criterion,
                          scheduler_tree=scheduler_tree,
                          scheduler_top=scheduler_top,
                          callbacks=callbacks,
                          metrics=['mul_acc'])
    model_trainer.fit(train_generator,
                      test_generator,
                      epochs,
                      cuda_id,
                      verbose=verbose)
    return deep_tree
train_do_ds = tf_dataset_generator(train_path,
                                   process_path_double_obj,
                                   batch_size=8)
val_do_ds = tf_dataset_generator(val_path,
                                 process_path_double_obj,
                                 batch_size=8)
test_do_ds = tf_dataset_generator(test_path,
                                  process_path_double_obj,
                                  batch_size=8)

list_ds = tf.data.Dataset.list_files(train_path + '/*')
data_set = list_ds.map(process_path_double_obj,
                       num_parallel_calls=tf.data.experimental.AUTOTUNE)

# get class weights
class_weights = get_class_weights(
    tf_dataset_generator(train_path, process_path, train=False, cache=False))

# define single headed model
EnetSingle = EnetModel(C=12)
for img, iml in train_ds.take(1):
    img_test = img
    iml_test = iml
img_out = EnetSingle(img_test)

# define double headed model
EnetDouble = EnetModel(C=12, MultiObjective=True)
for img, iml in train_do_ds.take(1):
    img_do_test = img
    iml_do_test = iml
img_do_out = EnetDouble(img_do_test)
示例#16
0
def train_model(model,
                train_dl,
                val_dl,
                epochs: int = 10,
                lr: float = 3e-4,
                name: str = 'no_name',
                mcat_ratio: float = 0.1,
                ema: float = 0.99,
                pbar_width: int = None,
                use_wandb: bool = True,
                overwrite_model: bool = True):
    ''' Train a given model. 
    
    INPUT
        model: torch.nn.Module
            The model we would like to train
        train_dl: torch.utils.data.DataLoader
            A dataloader containing the training set
        val_dl : torch.utils.data.DataLoader
            A dataloader containing the validation set
        epochs: int = 10
            The amount of epochs to train
        lr: float = 3e-4
            The learning rate used
        name: str = 'no_name'
            The name of the training run, used for wandb purposes
        mcat_ratio: float = 0.1
            How much the master category loss is prioritised over the
            category loss
        ema: float = 0.99
            The fact used in computing the exponential moving averages of
            the loss and sample-average F1 scores. Roughly corresponds to
            taking the average of the previous 1 / (1 - ema) many batches
        pbar_width: int = None
            The width of the progress bar. If running in a Jupyter notebook
            then this should be set to ~1000
        use_wandb: bool = True
            Whether to use the Weights & Biases online performance recording
        overwrite_model: bool = True
            Whether to overwrite existing models when saving

    OUTPUT
        The trained model
    '''
    from sklearn.metrics import f1_score
    import warnings
    from pathlib import Path

    print(f'Training on {len(train_dl) * train_dl.batch_size:,d} samples '\
          f'and validating on {len(val_dl) * val_dl.batch_size:,d} samples.')
    print(f'Number of trainable parameters: {model.trainable_params():,d}')

    # Sign into wandb and log metrics from model
    if use_wandb:
        import wandb
        config = {
            'name': name,
            'mcat_ratio': mcat_ratio,
            'epochs': epochs,
            'lr': lr,
            'batch_size': train_dl.batch_size,
            'ema': ema,
            'vectors': train_dl.vectors,
            'dropout': model.params['dropout'],
            'nlayers': model.params['nlayers'],
            'dim': model.params['dim'],
            'boom_dim': model.params['boom_dim'],
            'emb_dim': model.params['vocab'].vectors.shape[1],
        }
        wandb.init(project='scholarly', config=config)
        wandb.watch(model)

    weights = get_class_weights(train_dl,
                                pbar_width=model.pbar_width,
                                data_dir=model.data_dir)
    criterion = NestedBCELoss(**weights,
                              mcat_ratio=mcat_ratio,
                              data_dir=model.data_dir)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    mcat_masks = get_mcat_masks(data_dir=model.data_dir)

    if model.is_cuda():
        mcat_masks = mcat_masks.cuda()
        criterion = criterion.cuda()

    avg_loss, avg_cat_f1, avg_mcat_f1, best_score = 0, 0, 0, 0
    for epoch in range(epochs):
        with tqdm(total=len(train_dl) * train_dl.batch_size,
                  ncols=model.pbar_width) as pbar:
            model.train()

            for idx, (x_train, y_train) in enumerate(train_dl):
                optimizer.zero_grad()

                if model.is_cuda():
                    x_train = x_train.cuda()
                    y_train = y_train.cuda()

                # Get cat predictions
                y_hat = model(x_train)
                preds = torch.sigmoid(y_hat)

                # Get master cat predictions
                my_hat, my_train = cats2mcats(y_hat,
                                              y_train,
                                              masks=mcat_masks,
                                              data_dir=model.data_dir)
                mpreds = torch.sigmoid(my_hat)

                # Calculate loss and perform backprop
                loss = criterion(y_hat, y_train)
                loss.backward()
                optimizer.step()

                # Compute f1 scores
                with warnings.catch_warnings():
                    warnings.simplefilter('ignore')
                    cat_f1 = f1_score(preds.cpu() > 0.5,
                                      y_train.cpu(),
                                      average='samples')
                    mcat_f1 = f1_score(mpreds.cpu() > 0.5,
                                       my_train.cpu(),
                                       average='samples')

                # Keep track of the current iteration index
                iteration = epoch * len(train_dl) * train_dl.batch_size
                iteration += idx * train_dl.batch_size

                # Exponentially moving average of loss and f1 scores
                avg_loss = ema * avg_loss + (1 - ema) * float(loss)
                avg_loss /= 1 - ema**(iteration / (1 - ema) + 1)
                avg_cat_f1 = ema * avg_cat_f1 + (1 - ema) * float(cat_f1)
                avg_cat_f1 /= 1 - ema**(iteration / (1 - ema) + 1)
                avg_mcat_f1 = ema * avg_mcat_f1 + (1 - ema) * float(mcat_f1)
                avg_mcat_f1 /= 1 - ema**(iteration / (1 - ema) + 1)

                # Log wandb
                if use_wandb:
                    wandb.log({
                        'loss': avg_loss,
                        'cat f1': avg_cat_f1,
                        'mcat f1': avg_mcat_f1
                    })

                # Update the progress bar
                desc = f'Epoch {epoch:2d} - '\
                       f'loss {avg_loss:.4f} - '\
                       f'cat f1 {avg_cat_f1:.4f} - '\
                       f'mcat f1 {avg_mcat_f1:.4f}'
                pbar.set_description(desc)
                pbar.update(train_dl.batch_size)

            # Compute validation scores
            with torch.no_grad():
                model.eval()

                val_loss, val_cat_f1, val_mcat_f1 = 0, 0, 0
                y_vals, y_hats = [], []
                for x_val, y_val in val_dl:

                    if model.is_cuda():
                        x_val = x_val.cuda()
                        y_val = y_val.cuda()

                    # Get cat predictions
                    y_hat = model(x_val)
                    preds = torch.sigmoid(y_hat)

                    # Get mcat predictions
                    my_hat, my_val = cats2mcats(y_hat,
                                                y_val,
                                                masks=mcat_masks,
                                                data_dir=model.data_dir)
                    mpreds = torch.sigmoid(my_hat)

                    # Collect the true and predicted labels
                    y_vals.append(y_val)
                    y_hats.append(preds > 0.5)

                    # Accumulate loss
                    val_loss += float(criterion(y_hat, y_val, weighted=False))

                    # Accumulate f1 scores
                    with warnings.catch_warnings():
                        warnings.simplefilter('ignore')
                        val_cat_f1 += f1_score(preds.cpu() > 0.5,
                                               y_val.cpu(),
                                               average='samples')
                        val_mcat_f1 += f1_score(mpreds.cpu() > 0.5,
                                                my_val.cpu(),
                                                average='samples')

                # Concatenate the true and predicted labels
                y_val = torch.cat(y_vals, dim=0)
                y_hat = torch.cat(y_hats, dim=0)

                # Compute the average loss and f1 scores
                val_loss /= len(val_dl)
                val_cat_f1 /= len(val_dl)
                val_mcat_f1 /= len(val_dl)

                # Log wandb
                if use_wandb:
                    wandb.log({
                        'val loss': val_loss,
                        'val cat f1': val_cat_f1,
                        'val mcat f1': val_mcat_f1
                    })

                # If the current cat f1 score is the best so far, then
                # replace the stored model with the current one
                if val_cat_f1 > best_score:
                    model_fname = f'model_{val_cat_f1 * 100:.2f}.pt'
                    best_score = val_cat_f1
                    data = {
                        'params': model.params,
                        'state_dict': model.state_dict(),
                        'scores': model.evaluate(val_dl, output_dict=True)
                    }

                    if overwrite_model:
                        for f in get_path(model.data_dir).glob(f'model*.pt'):
                            f.unlink()

                    with warnings.catch_warnings():
                        warnings.simplefilter('ignore')
                        path = get_path(model.data_dir) / model_fname
                        torch.save(data, path)

                    # Save the model's state dict to wandb directory
                    if use_wandb:
                        if overwrite_model:
                            for f in Path(wandb.run.dir).glob(f'model*.pt'):
                                f.unlink()
                        torch.save(data, Path(wandb.run.dir) / model_fname)
                        wandb.save(model_fname)

                # Update progress bar
                desc = f'Epoch {epoch:2d} - '\
                       f'loss {avg_loss:.4f} - '\
                       f'cat f1 {avg_cat_f1:.4f} - '\
                       f'mcat f1 {avg_mcat_f1:.4f} - '\
                       f'val_loss {val_loss:.4f} - '\
                       f'val cat f1 {val_cat_f1:.4f} - '\
                       f'val mcat f1 {val_mcat_f1:.4f}'
                pbar.set_description(desc)

    return model
                      mode='test')
c_test.validate_corpus()

# Sentences, Labels, POS Tags - I could just use better variable names
x_test, y_test, z_pos = features.generate_input_and_labels(c_test.sentences,
                                                           Vectors=embeddings)

# POS Tags to numerical sequences
pos_tokenizer = Tokenizer()
pos_tokenizer.fit_on_texts(z_pos)
pos_sequences = pos_tokenizer.texts_to_sequences(z_pos)
z_test = to_categorical(pos_sequences)

# TODO: Needs to come from the train I assume
class_weights = list(
    utils.get_class_weights(c_test.label_list, WEIGHT_SMOOTHING).values())
print('loss_weight {}'.format(class_weights))

# Load model and Embeddings
model = load_model('naacl_metaphor.h5',
                   custom_objects={
                       'loss':
                       utils.weighted_categorical_crossentropy(class_weights),
                       'f1':
                       utils.f1,
                       'precision':
                       utils.precision,
                       'recall':
                       utils.recall
                   })
示例#18
0
文件: train.py 项目: ben-heil/whistl
def train_with_erm(classifier, train_loader, tune_loader, num_epochs, logger=None, save_file=None):
    ''' Train the provided classifier on the data from train_loader, evaluating the performance
    along the way with the data from tune_loader

    Arguments
    ---------
    classifier: pytorch.nn.Module
        The model to train
    train_loader: pytorch.utils.data.DataLoader
        The DataLoader containing training data
    tune_loader: pytorch.utils.data.DataLoader
        The DataLoader containing tuning data
    num_epochs: int
        The number of times the model should be trained on all the data
    logger: logging.logger
        The python logger object to handle printing logs
    save_file: string or Path object
        The file to save the model to. If save_file is None, the model won't be saved

    Returns
    -------
    results: dict
        A dictionary containing lists tracking different loss metrics across epochs
    '''
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    classifier = classifier.to(device)
    optimizer = optim.Adam(classifier.parameters(), lr=1e-5)

    class_weights = utils.get_class_weights(train_loader)

    # Calculate baseline tune set prediction accuracy (just pick the largest class)
    tune_label_counts, _ = utils.get_value_counts(tune_loader)
    baseline = max(list(tune_label_counts.values())) / len(tune_dataset)

    results = {'train_loss': [], 'tune_loss': [], 'train_acc': [], 'tune_acc': [],
               'baseline': baseline}
    try:
        best_tune_loss = None

        for epoch in tqdm_notebook(range(num_epochs)):
            train_loss = 0
            train_correct = 0
            # Set training mode
            classifier = classifier.train()
            for batch in train_loader:
                expression, labels, ids = batch
                expression = expression.float().to(device)
                labels = labels.float().to(device)

                # Get weights to handle the class imbalance
                batch_weights = [class_weights[int(label)] for label in labels]
                batch_weights = torch.FloatTensor(batch_weights).to(device)

                loss_function = nn.BCEWithLogitsLoss(weight=batch_weights)

                # Standard update step
                optimizer.zero_grad()
                output = classifier(expression)
                loss = loss_function(output, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                train_correct += utils.count_correct(output, labels)

            # Disable the gradient and switch into model evaluation mode
            with torch.no_grad():
                classifier = classifier.eval()

                tune_loss = 0
                tune_correct = 0
                for tune_batch in tune_loader:
                    expression, labels, ids = tune_batch
                    expression = expression.float().to(device)
                    tune_labels = labels.float().to(device)

                    batch_weights = [class_weights[int(label)] for label in labels]
                    batch_weights = torch.FloatTensor(batch_weights).to(device)

                    loss_function = nn.BCEWithLogitsLoss(weight=batch_weights)

                    tune_output = classifier(expression)
                    loss = loss_function(tune_output, tune_labels)
                    tune_loss += loss.item()
                    tune_correct += utils.count_correct(tune_output, tune_labels)

                # Save the model
                if save_file is not None:
                    if best_tune_loss is None or tune_loss < best_tune_loss:
                        best_tune_loss = tune_loss
                        torch.save(classifier, save_file)

            train_accuracy = train_correct / len(train_dataset)
            tune_accuracy = tune_correct / len(tune_dataset)

            if logger is not None:
                logger.info('Epoch {}'.format(epoch))
                logger.info('Train loss: {}'.format(train_loss / len(train_dataset)))
                logger.info('Tune loss: {}'.format(tune_loss / len(tune_dataset)))
                logger.info('Train accuracy: {}'.format(train_accuracy))
                logger.info('Tune accuracy: {}'.format(tune_accuracy))
                logger.info('Baseline accuracy: {}'.format(baseline))

            results['train_loss'].append(train_loss / len(train_dataset))
            results['tune_loss'].append(tune_loss / len(tune_dataset))
            results['train_acc'].append(train_accuracy)
            results['tune_acc'].append(tune_accuracy)
    except Exception as e:
        # Print error
        logger.error(e, exc_info=True)
    finally:
        results = utils.add_study_ids_to_results(results, train_dirs, tune_dirs)
        return results