def train(dataroot, classifier_name='cnn'): balance = get_balancing_technique() K = 10 fold_prefix = str(K) + 'bal_fold_{}.csv' if balance == 'explicit' else str( K) + 'r_fold_{}.csv' class_weight = get_class_weights(dataroot) classifier_args, config = get_args(classifier_name, class_weight) pre_fingerprint = join(dataroot, 'c_{}'.format(classifier_name)) fingerprint = join(pre_fingerprint + config, 'K_{}'.format(K)) print(fingerprint) folds_data = load_folds(dataroot, fold_prefix, K) for test_index in range(K): print('-----------{}----------'.format(test_index)) X_train = np.concatenate( [fold[0] for i, fold in enumerate(folds_data) if i != test_index], axis=0) y_train = np.concatenate( [fold[1] for i, fold in enumerate(folds_data) if i != test_index], axis=0) logdir = join(fingerprint, 'log', '{}'.format(test_index)) ensure_dir(logdir) classifier_args['runs_dir'] = logdir clf = get_classifier(classifier_args) clf.fit(X_train, y_train) modelname = join(classifier_args['runs_dir'], 'model.pkl') pickle.dump(clf, open(modelname, 'wb'))
def train(dataroot, classifier_name='cnn'): balance = get_balancing_technique() K = 10 fold_prefix = '{}bal_fold_{}.csv' if balance == 'explicit' else '{}r_fold_{}.csv' class_weight = get_class_weights(dataroot) classifier_args, config = get_args(classifier_name, class_weight) pre_fingerprint = join(dataroot, 'c_{}'.format(classifier_name)) fingerprint = join(pre_fingerprint + config, 'K_{}'.format(K)) print(fingerprint) num_epochs = 40 for test_index in range(K): print('-----------{}----------'.format(test_index)) dev_indices = [i for i in range(K) if i != test_index] val_index = dev_indices[0] train_indices = dev_indices[1:] val_csv = join(dataroot, fold_prefix.format(K, val_index)) list_of_train_csvs = [ join(dataroot, fold_prefix.format(K, i)) for i in train_indices ] logdir = join(fingerprint, 'log', '{}'.format(test_index)) ensure_dir(logdir) classifier_args['runs_dir'] = logdir clf = get_classifier(classifier_args) clf.fit(list_of_train_csvs, val_csv, num_epochs)
def train(self, epochs=500): opt = tf.keras.optimizers.Adam() loss = tf.keras.losses.categorical_crossentropy X, Y = self.get_dataset() self.compile(optimizer=opt, loss=loss) self.fit( X, Y, batch_size=64, epochs=epochs, class_weight=get_class_weights( np.argmax(Y, axis=1), smooth_factor=0.05 ) # weight under-represented classes almost equally )
def main(): parser = options.get_parser('Trainer') options.add_dataset_args(parser) options.add_preprocessing_args(parser) options.add_model_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) args = parser.parse_args() print(args) args.cuda = not args.disable_cuda and torch.cuda.is_available() # checkpoint checkpoint_dir = os.path.dirname(args.checkpoint) if not os.path.isdir(checkpoint_dir): os.mkdir(checkpoint_dir) # load dataset train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus(args.processed_dir) assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!' train_corpus = [(line.sent, line.type, line.p1, line.p2) for line in train_raw_corpus] val_corpus = [(line.sent, line.type, line.p1, line.p2) for line in val_raw_corpus] test_corpus = [(line.sent, line.type, line.p1, line.p2) for line in test_raw_corpus] start_epoch = 0 caseless = args.caseless batch_size = args.batch_size num_epoch = args.num_epoch # preprocessing sents = [tup[0] for tup in train_corpus + val_corpus] feature_map = utils.build_vocab(sents, min_count=args.min_count, caseless=caseless) ## # target_map = {c:i for i, c in enumerate(['null', 'true'])} target_map = ddi2013.target_map train_features, train_targets = utils.build_corpus(train_corpus, feature_map, target_map, caseless) val_features, val_targets = utils.build_corpus(val_corpus, feature_map, target_map, caseless) test_features, test_targets = utils.build_corpus(test_corpus, feature_map, target_map, caseless) class_weights = torch.Tensor(utils.get_class_weights(train_targets)) if args.class_weight else None train_loader = utils.construct_bucket_dataloader(train_features, train_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=True) val_loader = utils.construct_bucket_dataloader(val_features, val_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=False) test_loader = utils.construct_bucket_dataloader(test_features, test_targets, feature_map['PAD'], batch_size, args.position_bound, is_train=False) print('Preprocessing done! Vocab size: {}'.format(len(feature_map))) # build model vocab_size = len(feature_map) tagset_size = len(target_map) model = utils.build_model(args, vocab_size, tagset_size) # loss criterion = utils.build_loss(args, class_weights=class_weights) # load states if os.path.isfile(args.load_checkpoint): print('Loading checkpoint file from {}...'.format(args.load_checkpoint)) checkpoint_file = torch.load(args.load_checkpoint) start_epoch = checkpoint_file['epoch'] + 1 model.load_state_dict(checkpoint_file['state_dict']) # optimizer.load_state_dict(checkpoint_file['optimizer']) else: print('no checkpoint file found: {}, train from scratch...'.format(args.load_checkpoint)) if not args.rand_embedding: pretrained_word_embedding, in_doc_word_indices = utils.load_word_embedding(args.emb_file, feature_map, args.embedding_dim) print(pretrained_word_embedding.size()) print(vocab_size) model.load_pretrained_embedding(pretrained_word_embedding) if args.disable_fine_tune: model.update_part_embedding(in_doc_word_indices) # update only non-pretrained words model.rand_init(init_embedding=args.rand_embedding) # trainer trainer = SeqTrainer(args, model, criterion) if os.path.isfile(args.load_checkpoint): dev_prec, dev_rec, dev_f1, _ = evaluate(trainer, val_loader, target_map, cuda=args.cuda) test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda) print('checkpoint dev_prec: {:.4f}, dev_rec: {:.4f}, dev_f1: {:.4f}, test_prec: {:.4f}, test_rec: {:.4f}, test_f1: {:.4f}'.format( dev_prec, dev_rec, dev_f1, test_prec, test_rec, test_f1)) track_list = [] best_f1 = float('-inf') patience_count = 0 start_time = time.time() for epoch in range(start_epoch, num_epoch): epoch_loss = train(train_loader, trainer, epoch) # update lr trainer.lr_step() dev_prec, dev_rec, dev_f1, dev_loss = evaluate(trainer, val_loader, target_map, cuda=args.cuda) if dev_f1 >= best_f1: patience_count = 0 best_f1 = dev_f1 test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda) track_list.append({'epoch': epoch, 'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss, 'test_prec': test_prec, 'test_rec': test_rec, 'test_f1': test_f1}) print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}\tsaving...'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1)) try: utils.save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), 'f_map': feature_map, 't_map': target_map, }, {'track_list': track_list, 'args': vars(args) }, args.checkpoint + '_lstm') except Exception as inst: print(inst) else: patience_count += 1 track_list.append({'epoch': epoch,'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss}) print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}'.format(epoch, epoch_loss, dev_f1, dev_loss)) print('epoch: {} in {} take: {} s'.format(epoch, args.num_epoch, time.time() - start_time)) if patience_count >= args.patience: break
def main(): ap = argparse.ArgumentParser() ap.add_argument("-t", "--train", required=True, help="Train type") args = vars(ap.parse_args()) #parser config config_file = "./config.ini" cp = ConfigParser() cp.read(config_file) TRAIN = args["train"] if (TRAIN not in ["SEG", "CLASS"]): raise ValueError(f"{TRAIN} not defined") # base config base_dir = cp["BASE"].get("base_dir") # train config output_dir = cp[TRAIN].get("output_dir") base_model_name = cp[TRAIN].get("model_name") use_base_model_weights = cp[TRAIN].getboolean("use_base_model_weights") use_trained_model_weights = cp[TRAIN].getboolean( "use_trained_model_weights") use_best_weights = cp[TRAIN].getboolean("use_best_weights") output_weights_name = cp[TRAIN].get("output_weights_name") epochs = cp[TRAIN].getint("epochs") batch_size = cp[TRAIN].getint("batch_size") initial_learning_rate = cp[TRAIN].getfloat("initial_learning_rate") generator_workers = cp[TRAIN].getint("generator_workers") image_dimension = cp[TRAIN].getint("image_dimension") train_steps = cp[TRAIN].get("train_steps") validation_steps = cp[TRAIN].get("validation_steps") patience_reduce_lr = cp[TRAIN].getint("patience_reduce_lr") patience_early_stop = cp[TRAIN].getint("patience_early_stop") min_lr = cp[TRAIN].getfloat("min_lr") dataset_csv_dir = cp[TRAIN].get("dataset_csv_dir") show_model_summary = cp[TRAIN].getboolean("show_model_summary") if (TRAIN == "CLASS"): positive_weights_multiply = cp[TRAIN].getfloat( "positive_weights_multiply") class_names = cp[TRAIN].get("class_names").split(",") mask_folder = cp[TRAIN].get("mask_folder") patch_size = cp[TRAIN].getint("patch_size") N = cp["TEST"].getint("N") else: num_classes = cp[TRAIN].getint("num_classes") class_names = None current_epoch = 0 # check output_dir, create it if not exists if not os.path.isdir(output_dir): os.makedirs(output_dir) # if previously trained weights is used, never re-split if use_trained_model_weights: # resuming mode print("** use trained model weights **") # load training status for resuming training_stats_file = os.path.join(output_dir, ".training_stats.json") if os.path.isfile(training_stats_file): training_stats = json.load(open(training_stats_file)) initial_learning_rate = training_stats['lr'] current_epoch = training_stats['epoch'] else: training_stats = {} else: # start over training_stats = {} print(f"backup config file to {output_dir}") shutil.copy(config_file, os.path.join(output_dir, os.path.split(config_file)[1])) datasets = ["train", "val", "test"] for dataset in datasets: shutil.copy(os.path.join(dataset_csv_dir, f"{dataset}.csv"), output_dir) # get train/dev sample counts train_counts, train_pos_counts = get_class_counts( os.path.join(output_dir, "train.csv"), class_names) val_counts, _ = get_class_counts(os.path.join(output_dir, "val.csv"), class_names) # compute steps if train_steps == "auto": train_steps = int(train_counts / batch_size) else: try: train_steps = int(train_steps) except ValueError: raise ValueError( f"train_steps: {train_steps} is invalid, please use 'auto' or integer." ) print(f"** train_steps: {train_steps} **") if validation_steps == "auto": validation_steps = int(val_counts / batch_size) else: try: validation_steps = int(validation_steps) except ValueError: raise ValueError( f"validation_steps: {validation_steps} is invalid,please use 'auto' or integer." ) print(f"** validation_steps: {validation_steps} **") class_weights = None if (TRAIN == "CLASS"): # compute class weights print("** compute class weights from training data **") class_weights = get_class_weights( train_counts, train_pos_counts, multiply=positive_weights_multiply, ) print("** class_weights **") print(class_weights) print("** load model **") if use_trained_model_weights: if use_best_weights: model_weights_file = os.path.join(output_dir, f"best_{output_weights_name}") else: model_weights_file = os.path.join(output_dir, output_weights_name) else: model_weights_file = None print("** compile model **") METRICS = [ TruePositives(name='tp'), FalsePositives(name='fp'), TrueNegatives(name='tn'), FalseNegatives(name='fn'), Accuracy(name='accuracy'), Precision(name='precision'), Recall(name='recall'), AUC(name='auc'), ] optimizer = Adam(lr=initial_learning_rate) if (TRAIN == "CLASS"): model = Resnet18(input_shape=(N, patch_size, patch_size, 3), weights_path=model_weights_file, N=N, nb_classes=len(class_names)) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=METRICS) checkpoint_monitor = 'val_loss' else: model = Densenet103(nb_classes=num_classes - 1, weights_path=model_weights_file, input_shape=(image_dimension, image_dimension, 1)) model.compile(optimizer=optimizer, loss="binary_crossentropy") checkpoint_monitor = 'val_loss' if show_model_summary: print(model.summary()) print("** create image generators **") if (TRAIN == "CLASS"): train_sequence = classification_gen( dataset_csv_file=os.path.join(output_dir, "train.csv"), class_names=class_names, N=N, batch_size=batch_size, normalization_func=imageNet_preprocessing, target_size=(image_dimension, image_dimension), patch_size=(patch_size, patch_size), augmenter=augmenter, base_dir=base_dir, mask_folder=mask_folder, steps=train_steps, ) validation_sequence = classification_gen( dataset_csv_file=os.path.join(output_dir, "val.csv"), class_names=class_names, N=N, batch_size=batch_size, normalization_func=imageNet_preprocessing, target_size=(image_dimension, image_dimension), patch_size=(patch_size, patch_size), augmenter=augmenter, base_dir=base_dir, steps=validation_steps, mask_folder=mask_folder, shuffle_on_epoch_end=False, ) else: train_sequence = segmentation_gen( dataset_csv_file=os.path.join(output_dir, "train.csv"), batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, base_dir=base_dir, steps=train_steps, ) validation_sequence = segmentation_gen( dataset_csv_file=os.path.join(output_dir, "val.csv"), batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, base_dir=base_dir, steps=validation_steps, shuffle_on_epoch_end=False, ) output_weights_path = os.path.join(output_dir, output_weights_name) print(f"** set output weights path to: {output_weights_path} **") checkpoint = ModelCheckpoint(output_weights_path, save_weights_only=True, save_best_only=True, verbose=1, monitor=checkpoint_monitor) if (TRAIN == "CLASS"): performance_callback = MultipleClassAUROC( sequence=validation_sequence, class_names=class_names, weights_path=output_weights_path, stats=training_stats, workers=generator_workers) else: performance_callback = Jaccard(sequence=validation_sequence, weights_path=output_weights_path, stats=training_stats, workers=generator_workers) callbacks = [ checkpoint, performance_callback, TensorBoard(log_dir=os.path.join(output_dir, "logs"), batch_size=batch_size), ReduceLROnPlateau(monitor='loss', factor=0.1, patience=patience_reduce_lr, verbose=1, mode="min", min_lr=min_lr), EarlyStopping( monitor="val_loss", min_delta=0, patience=patience_early_stop, verbose=0, mode="min", baseline=None, restore_best_weights=False, ), ] print("** start training **") history = model.fit_generator( generator=train_sequence, initial_epoch=current_epoch, epochs=epochs, class_weight=class_weights, validation_data=validation_sequence, callbacks=callbacks, workers=generator_workers, shuffle=False, ) # dump history print("** dump history **") with open(os.path.join(output_dir, "history.pkl"), "wb") as f: pickle.dump({"history": history.history}, f) print("** done! **")
for layer in trained_net.layers: if 'l1' in layer.name: layer.trainable = True if 'l2' in layer.name: layer.trainable = False if 'l3' in layer.name: layer.trainable = True else: for layer in trained_net.layers: if 'l1' in layer.name: layer.trainable = False if 'l2' in layer.name: layer.trainable = True if 'l3' in layer.name: layer.trainable = True class_weights = get_class_weights(y_t_c) trained_net.compile(optimizer=tf.keras.optimizers.Adam(LR_TARGET), loss=losses, loss_weights=loss_weights, metrics=['accuracy']) trained_net.fit(x_t_c, [y_t_c, y_t_c, y_t_c], batch_size=64, epochs=1, verbose=0, class_weight=class_weights) # evalulation res_t = trained_net.evaluate(X_T_VAL, [Y_T_VAL, Y_T_VAL, Y_T_VAL], verbose=0) print('\nStep:', step, 'Acc on target domain test data:', res_t[-1], 'Len y_t_p:', len(y_t_p), '\n')
img_file, label_file, h_enc, w_enc, h_dec, w_dec) # create single head datasets train_single_ds = filelist_train.shuffle(n_train).map(map_single).cache().batch(batch_size).repeat() val_single_ds = filelist_val.map(map_single).cache().batch(batch_size).repeat() test_single_ds = filelist_test.map(map_single).cache().batch(batch_size).repeat() # create double head datasets train_double_ds = filelist_train.shuffle(n_train).map(map_double).cache().batch(batch_size).repeat() val_double_ds = filelist_val.map(map_double).cache().batch(batch_size).repeat() test_double_ds = filelist_test.map(map_double).cache().batch(batch_size).repeat() # get class weights label_filelist = tf.data.Dataset.list_files(label_pattern, shuffle=False) label_ds = label_filelist.map(lambda x: map_label(x, h_dec, w_dec)) class_weights = get_class_weights(label_ds).tolist() def train_enet(): Enet = EnetModel(C=12,MultiObjective=True,l2=1e-3) # Train Encoder for layer in Enet.layers[-6:]: layer.trainable = False n_epochs = 60 adam_optimizer = tf.keras.optimizers.Adam(learning_rate=5e-4) Enet.compile(optimizer=adam_optimizer, loss=['sparse_categorical_crossentropy','sparse_categorical_crossentropy'], metrics=['accuracy','accuracy'], loss_weights=[1.0,0.0])
def cross_validate(model_params, train_params, feature_type, naming): # Get trail list cross_val_splits = utils.get_cross_val_splits() # Cross-Validation Result result = [] # Cross Validation for split_idx, split in enumerate(cross_val_splits): feature_dir = os.path.join(raw_feature_dir, split['name']) test_trail_list = split['test'] train_trail_list = split['train'] split_naming = naming + '_split_{}'.format(split_idx + 1) trained_model_file = utils.get_tcn_model_file(split_naming) log_dir = utils.get_tcn_log_sub_dir(split_naming) # Model model = EncoderDecoderNet(**model_params) model = model.cuda() print(model) n_layers = len(model_params['encoder_params']['layer_sizes']) # Dataset train_dataset = RawFeatureDataset(dataset_name, feature_dir, train_trail_list, feature_type=feature_type, encode_level=n_layers, sample_rate=sample_rate, sample_aug=False, normalization=[None, None]) test_norm = [train_dataset.get_means(), train_dataset.get_stds()] test_dataset = RawFeatureDataset(dataset_name, feature_dir, test_trail_list, feature_type=feature_type, encode_level=n_layers, sample_rate=sample_rate, sample_aug=False, normalization=test_norm) loss_weights = utils.get_class_weights(train_dataset) #loss_weights = None if train_params is not None: train_model(model, train_dataset, test_dataset, **train_params, loss_weights=loss_weights, trained_model_file=trained_model_file, log_dir=log_dir) #log_dir=None) model.load_state_dict(torch.load(trained_model_file)) acc, edit, _, f_scores = test_model(model, test_dataset, loss_weights=loss_weights, plot_naming=split_naming) result.append( [acc, edit, f_scores[0], f_scores[1], f_scores[2], f_scores[3]]) result = np.array(result) return result
def train(FLAGS): # -------------------- Defining the hyperparameters -------------------- batch_size = FLAGS.batch_size # epochs = FLAGS.epochs # training_type = FLAGS.training_type # learning_rate = FLAGS.learning_rate # save_every = FLAGS.save_every # num_classes = FLAGS.num_classes # weight_decay = FLAGS.weight_decay img_pattern = FLAGS.img_pattern # label_pattern = FLAGS.label_pattern # img_pattern_val = FLAGS.img_pattern_val # label_pattern_val = FLAGS.label_pattern_val # tb_logs = FLAGS.tensorboard_logs # img_width = FLAGS.img_width # img_height = FLAGS.img_height # save_model = FLAGS.save_model # cache_train = FLAGS.cache_train # cache_val = FLAGS.cache_val # cache_test = FLAGS.cache_test # print('[INFO]Defined all the hyperparameters successfully!') # setup tensorboard log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) # encoder and decoder dimensions h_enc = img_height // 8 w_enc = img_width // 8 h_dec = img_height w_dec = img_width # create (img,label) string tensor lists filelist_train = preprocess_img_label(img_pattern, label_pattern) filelist_val = preprocess_img_label(img_pattern_val, label_pattern_val) # training dataset size n_train = tf.data.experimental.cardinality(filelist_train).numpy() n_val = tf.data.experimental.cardinality(filelist_val).numpy() # define mapping functions for single and double head nets map_single = partial(map_singlehead, h_img=h_dec, w_img=w_dec) map_double = partial(map_doublehead, h_enc=h_enc, w_enc=w_enc, h_dec=h_dec, w_dec=w_dec) # create dataset if training_type == 0 or training_type == 1: map_fn = map_double else: map_fn = map_single train_ds = filelist_train.shuffle(n_train).map(map_fn).cache( cache_train).batch(batch_size).repeat() val_ds = filelist_val.map(map_fn).cache(cache_val).batch( batch_size).repeat() # final training and validation datasets # -------------------- get the class weights -------------------- print('[INFO]Starting to define the class weights...') label_filelist = tf.data.Dataset.list_files(label_pattern, shuffle=False) label_ds = label_filelist.map(lambda x: process_label(x, h_dec, w_dec)) class_weights = get_class_weights(label_ds).tolist() print('[INFO]Fetched all class weights successfully!') # -------------------- istantiate model -------------------- if training_type == 0 or training_type == 1: Enet = EnetModel(C=num_classes, MultiObjective=True, l2=weight_decay) else: Enet = EnetModel(C=num_classes, l2=weight_decay) print('[INFO]Model Instantiated!') # -------------------- start training -------------------- adam_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) # -- two stages training -- if training_type == 0: # freeze decoder layers for layer in Enet.layers[-6:]: layer.trainable = False # compile encoder: only the first objective matters Enet.compile(optimizer=adam_optimizer, loss=[ 'sparse_categorical_crossentropy', 'sparse_categorical_crossentropy' ], metrics=['accuracy', 'accuracy'], loss_weights=[1.0, 0.0]) # train encoder Enet.fit(x=train_ds, epochs=epochs, steps_per_epoch=n_train // batch_size, validation_data=val_ds, validation_steps=n_val // batch_size // 5, class_weight=class_weights, callbacks=[tensorboard_callback]) # freeze encoder and unfreeze decoder for layer in Enet.layers[-6:]: layer.trainable = True for layer in Enet.layers[:-6]: layer.trainable = False # compile model: only the second objective matters Enet.compile(optimizer=adam_optimizer, loss=[ 'sparse_categorical_crossentropy', 'sparse_categorical_crossentropy' ], metrics=['accuracy', 'accuracy'], loss_weights=[0.0, 1.0]) # train decoder enet_hist = Enet.fit(x=train_ds, epochs=epochs, steps_per_epoch=n_train // batch_size, validation_data=val_ds, validation_steps=n_val // batch_size // 5, class_weight=class_weights, callbacks=[tensorboard_callback]) # -- simultaneous double objective trainings -- elif training_type == 1: # compile model Enet.compile(optimizer=adam_optimizer, loss=[ 'sparse_categorical_crossentropy', 'sparse_categorical_crossentropy' ], metrics=['accuracy', 'accuracy'], loss_weights=[0.5, 0.5]) # fit model print('train: ', n_train, 'batch: ', batch_size) enet_hist = Enet.fit(x=train_ds, epochs=epochs, steps_per_epoch=n_train // batch_size, validation_data=val_ds, validation_steps=n_val // batch_size // 5, class_weight=class_weights, callbacks=[tensorboard_callback]) # -- end to end training -- else: # compile model Enet.compile(optimizer=adam_optimizer, loss=['sparse_categorical_crossentropy'], metrics=['accuracy']) enet_hist = Enet.fit(x=train_ds, epochs=epochs, steps_per_epoch=n_train // batch_size, validation_data=val_ds, validation_steps=n_val // batch_size // 5, class_weight=class_weights, callbacks=[tensorboard_callback]) # -------------------- save model -------------------- Enet.save_weights(save_model)
def main(): parser = options.get_parser('Generator') options.add_dataset_args(parser) options.add_preprocessing_args(parser) options.add_model_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) options.add_generation_args(parser) args = parser.parse_args() model_path = args.load_checkpoint + '.model' args_path = args.load_checkpoint + '.json' with open(args_path, 'r') as f: _args = json.load(f)['args'] [setattr(args, k, v) for k, v in _args.items()] args.cuda = not args.disable_cuda and torch.cuda.is_available() print(args) if args.cuda: torch.backends.cudnn.benchmark = True # increase recursion depth sys.setrecursionlimit(10000) # load dataset train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus( args.processed_dir, ddi=False) assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!' train_corpus = [(line.sent, line.type, line.p1, line.p2) for line in train_raw_corpus] val_corpus = [(line.sent, line.type, line.p1, line.p2) for line in val_raw_corpus] caseless = args.caseless batch_size = args.batch_size # build vocab sents = [tup[0] for tup in train_corpus + val_corpus] feature_map = utils.build_vocab(sents, min_count=args.min_count, caseless=caseless) target_map = ddi2013.target_map # get class weights _, train_targets = utils.build_corpus(train_corpus, feature_map, target_map, caseless) class_weights = torch.Tensor( utils.get_class_weights(train_targets)) if args.class_weight else None # load dataets _, _, test_loader = utils.load_datasets(args.processed_dir, args.train_size, args, feature_map, dataloader=True) # build model vocab_size = len(feature_map) tagset_size = len(target_map) model = RelationTreeModel(vocab_size, tagset_size, args) # loss criterion = utils.build_loss(args, class_weights=class_weights) # load states assert os.path.isfile(model_path), "Checkpoint not found!" print('Loading checkpoint file from {}...'.format(model_path)) checkpoint_file = torch.load(model_path) model.load_state_dict(checkpoint_file['state_dict']) # trainer trainer = TreeTrainer(args, model, criterion) # predict y_true, y_pred, treelists, f1_by_len = predict(trainer, test_loader, target_map, cuda=args.cuda) # assign words to roots for tup, treelist in zip(test_raw_corpus, treelists): for t in treelist: t.idx = tup.sent[t.idx] if t.idx < len(tup.sent) else None # prediction print('Predicting...') # write result: sent_id|e1|e2|ddi|type with open(args.predict_file, 'w') as f: for tup, pred in zip(test_raw_corpus, y_pred): ddi = 0 if pred == 'null' else 1 f.write('|'.join([tup.sent_id, tup.e1, tup.e2, str(ddi), pred])) f.write('\n') def print_info(f, tup, target, pred, root): f.write('{}\n'.format(' '.join(tup.sent))) f.write('{}\n'.format(' | '.join( [tup.sent_id, tup.e1, tup.e2, target, pred]))) f.write('{}\n\n'.format(root)) # error analysis print('Analyzing...') with open(args.error_file, 'w') as f: f.write(' | '.join(['sent_id', 'e1', 'e2', 'target', 'pred'])) f.write('\n') for tup, target, pred, treelist in zip(test_raw_corpus, y_true, y_pred, treelists): if target != pred: print_info(f, tup, target, pred, treelist[-1]) # attention print('Writing attention scores...') with open(args.correct_file, 'w') as f: f.write(' | '.join(['target', 'sent', 'att_weight'])) f.write('\n') for tup, target, pred, treelist in zip(test_raw_corpus, y_true, y_pred, treelists): if target == pred and target != 'null': print_info(f, tup, target, pred, treelist[-1])
def main(): parser = options.get_parser('Trainer') options.add_dataset_args(parser) options.add_preprocessing_args(parser) options.add_model_args(parser) options.add_optimization_args(parser) options.add_checkpoint_args(parser) args = parser.parse_args() print(args) args.cuda = not args.disable_cuda and torch.cuda.is_available() torch.manual_seed(5) if args.cuda: torch.backends.cudnn.benchmark = True # increase recursion depth sys.setrecursionlimit(10000) # checkpoint checkpoint_dir = os.path.dirname(args.checkpoint) if not os.path.isdir(checkpoint_dir): os.mkdir(checkpoint_dir) # load dataset train_raw_corpus, val_raw_corpus, test_raw_corpus = utils.load_corpus(args.processed_dir, ddi=False) assert train_raw_corpus and val_raw_corpus and test_raw_corpus, 'Corpus not found, please run preprocess.py to obtain corpus!' train_corpus = [(line.sent, line.type, line.p1, line.p2) for line in train_raw_corpus] val_corpus = [(line.sent, line.type, line.p1, line.p2) for line in val_raw_corpus] start_epoch = 0 caseless = args.caseless batch_size = args.batch_size num_epoch = args.num_epoch # build vocab sents = [tup[0] for tup in train_corpus + val_corpus] feature_map = utils.build_vocab(sents, min_count=args.min_count, caseless=caseless) target_map = ddi2013.target_map # get class weights _, train_targets = utils.build_corpus(train_corpus, feature_map, target_map, caseless) class_weights = torch.Tensor(utils.get_class_weights(train_targets)) if args.class_weight else None train_loader, val_loader, test_loader = utils.load_datasets(args.processed_dir, args.train_size, args, feature_map, dataloader=True) # build model vocab_size = len(feature_map) tagset_size = len(target_map) model = RelationTreeModel(vocab_size, tagset_size, args) # loss criterion = utils.build_loss(args, class_weights=class_weights) # load states if os.path.isfile(args.load_checkpoint): print('Loading checkpoint file from {}...'.format(args.load_checkpoint)) checkpoint_file = torch.load(args.load_checkpoint) start_epoch = checkpoint_file['epoch'] + 1 model.load_state_dict(checkpoint_file['state_dict']) # optimizer.load_state_dict(checkpoint_file['optimizer']) else: print('no checkpoint file found: {}, train from scratch...'.format(args.load_checkpoint)) if not args.rand_embedding: pretrained_word_embedding, in_doc_word_indices = utils.load_word_embedding(args.emb_file, feature_map, args.embedding_dim) print(pretrained_word_embedding.size()) print(vocab_size) model.load_pretrained_embedding(pretrained_word_embedding) if args.disable_fine_tune: model.update_part_embedding(in_doc_word_indices) # update only non-pretrained words model.rand_init(init_embedding=args.rand_embedding) # trainer trainer = TreeTrainer(args, model, criterion) best_f1 = float('-inf') if os.path.isfile(args.load_checkpoint): dev_prec, dev_rec, dev_f1, _ = evaluate(trainer, val_loader, target_map, cuda=args.cuda) test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda) best_f1 = dev_f1 print('checkpoint dev_prec: {:.4f}, dev_rec: {:.4f}, dev_f1: {:.4f}, test_prec: {:.4f}, test_rec: {:.4f}, test_f1: {:.4f}'.format( dev_prec, dev_rec, dev_f1, test_prec, test_rec, test_f1)) track_list = [] patience_count = 0 start_time = time.time() q = mp.Queue() # set start methods try: mp.set_start_method('spawn') except RuntimeError: pass for epoch in range(start_epoch, num_epoch): epoch_loss = train(train_loader, trainer, epoch) # processes = [] # for rank in range(args.num_processes): # p = mp.Process(target=train, args=(train_loader, trainer, epoch, q)) # p.start() # processes.append(p) # for p in processes: # p.join() # # epoch_loss = q.get() # update lr trainer.lr_step(epoch_loss) dev_prec, dev_rec, dev_f1, dev_loss = evaluate(trainer, val_loader, target_map, cuda=args.cuda) test_prec, test_rec, test_f1, _ = evaluate(trainer, test_loader, target_map, cuda=args.cuda) if dev_f1 >= best_f1: patience_count = 0 best_f1 = dev_f1 track_list.append({'epoch': epoch, 'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss, 'test_prec': test_prec, 'test_rec': test_rec, 'test_f1': test_f1}) print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}\tsaving...'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1)) try: utils.save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), 'f_map': feature_map, 't_map': target_map, }, {'track_list': track_list, 'args': vars(args) }, args.checkpoint) except Exception as inst: print(inst) else: patience_count += 1 track_list.append({'epoch': epoch,'loss': epoch_loss, 'dev_prec': dev_prec, 'dev_rec': dev_rec, 'dev_f1': dev_f1, 'dev_loss': dev_loss}) print('epoch: {}, loss: {:.4f}, dev_f1: {:.4f}, dev_loss: {:.4f}, test_f1: {:.4f}'.format(epoch, epoch_loss, dev_f1, dev_loss, test_f1)) print('epoch: {} in {} take: {} s'.format(epoch, args.num_epoch, time.time() - start_time)) if patience_count >= args.patience: break
FLAGS.setDefaults() model_factory = ModelFactory() # load training and test set file names train_generator = get_generator(FLAGS.train_csv, FLAGS, augmenter) test_generator = get_generator(FLAGS.test_csv, FLAGS) class_weights = None if FLAGS.use_class_balancing: if FLAGS.multi_label_classification: class_weights = get_multilabel_class_weights( train_generator.y, FLAGS.positive_weights_multiply) else: class_weights = get_class_weights(train_generator.get_class_counts(), FLAGS.positive_weights_multiply) # load classifier from saved weights or get a new one training_stats = {} learning_rate = FLAGS.learning_rate if FLAGS.load_model_path != '' and FLAGS.load_model_path is not None: visual_model = load_model(FLAGS.load_model_path) if FLAGS.show_model_summary: visual_model.summary() training_stats_file = os.path.join(FLAGS.save_model_path, ".training_stats.json") if os.path.isfile(training_stats_file): training_stats = json.load(open(training_stats_file)) learning_rate = training_stats['lr'] print("Will continue from learning rate: {}".format(learning_rate))
if args.name != '': config['model']['name'] = args.name if args.max_iter > 0: config['train']['max_iter'] = args.max_iter if args.batch_size > 0: config['train']['batch_size'] = args.batch_size num_classes = int(config['model']['num_classes']) batch_size = int(config['train']['batch_size']) train_data = VFDataset([1, 3, 5, 8]) valid_data = VFDataset([4, 6]) test_data = VFDataset([2, 7]) class_weights = get_class_weights(num_classes=num_classes, train_data=train_data, valid_data=valid_data) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False) device = torch.device('cuda') Model = model_dict[args.model] model = Model(n_classes=num_classes).to(device) model = nn.DataParallel(model) trainer = Trainer(model, config, train_loader, valid_loader, class_weights, device) trainer.train(vis_data=test_data) accuracy, precisions, recalls, IoUs = trainer.eval(test_loader)
def classification(variant='1_hour', structureless=False, batch_size=8, x_size=12, h_size=8, emo_size=8, top_sizes=(16, 16), p_drop=0.1, verbose=1, bi=True, deep=True, lr_tree=0.05, lr_top=0.01, decay_tree=0.003, decay_top=0.006, epochs=60, cuda_id=-1): data_dir = '../data/' out_dir = '../results/' graphs_dir = data_dir + 'graphs_all/' cascade_size_file = data_dir + 'cascade_size.csv' device = set_device(cuda_id) if structureless: x_size = x_size - 2 train_ids = np.array([ ID.split('_')[0] for ID in os.listdir(graphs_dir) if variant in ID and 'test' not in ID ]) test_ids = np.unique([ ID.split('_')[0] for ID in os.listdir(graphs_dir) if variant + '_test' in ID ]) train_set = CascadeData(train_ids, graphs_dir, cascade_size_file, variant=variant, categorical=True, structureless=structureless) test_set = CascadeData(test_ids, graphs_dir, cascade_size_file, test=True, variant=variant, categorical=True, structureless=structureless) _, weights_all = get_class_weights(train_set) weighted_sampler = WeightedRandomSampler(weights_all, len(weights_all)) train_generator = DataLoader(train_set, collate_fn=cascade_batcher(device), batch_size=batch_size, num_workers=8, sampler=weighted_sampler) test_generator = DataLoader(test_set, collate_fn=cascade_batcher(device), batch_size=batch_size, num_workers=8) deep_tree = DeepTreeLSTMClassifier(x_size, 4, emo_size, h_size=h_size, top_sizes=top_sizes, bi=bi, deep=deep, pd=p_drop) criterion = nn.CrossEntropyLoss() optimizer_tree = th.optim.Adam(deep_tree.bottom_net.parameters(), lr=lr_tree, weight_decay=decay_tree) optimizer_top = th.optim.Adam(deep_tree.top_net.parameters(), lr=lr_top, weight_decay=decay_top) scheduler_tree = th.optim.lr_scheduler.StepLR(optimizer_tree, step_size=10, gamma=0.8) scheduler_top = th.optim.lr_scheduler.StepLR(optimizer_top, step_size=10, gamma=0.8) callbacks = [ EarlyStopping(patience=10), ExperimentLogger(out_dir, filename='logs_classification.csv') ] model_trainer = DeepTreeTrainer(deep_tree) model_trainer.compile(optimizer_tree, optimizer_top, criterion, scheduler_tree=scheduler_tree, scheduler_top=scheduler_top, callbacks=callbacks, metrics=['mul_acc']) model_trainer.fit(train_generator, test_generator, epochs, cuda_id, verbose=verbose) return deep_tree
train_do_ds = tf_dataset_generator(train_path, process_path_double_obj, batch_size=8) val_do_ds = tf_dataset_generator(val_path, process_path_double_obj, batch_size=8) test_do_ds = tf_dataset_generator(test_path, process_path_double_obj, batch_size=8) list_ds = tf.data.Dataset.list_files(train_path + '/*') data_set = list_ds.map(process_path_double_obj, num_parallel_calls=tf.data.experimental.AUTOTUNE) # get class weights class_weights = get_class_weights( tf_dataset_generator(train_path, process_path, train=False, cache=False)) # define single headed model EnetSingle = EnetModel(C=12) for img, iml in train_ds.take(1): img_test = img iml_test = iml img_out = EnetSingle(img_test) # define double headed model EnetDouble = EnetModel(C=12, MultiObjective=True) for img, iml in train_do_ds.take(1): img_do_test = img iml_do_test = iml img_do_out = EnetDouble(img_do_test)
def train_model(model, train_dl, val_dl, epochs: int = 10, lr: float = 3e-4, name: str = 'no_name', mcat_ratio: float = 0.1, ema: float = 0.99, pbar_width: int = None, use_wandb: bool = True, overwrite_model: bool = True): ''' Train a given model. INPUT model: torch.nn.Module The model we would like to train train_dl: torch.utils.data.DataLoader A dataloader containing the training set val_dl : torch.utils.data.DataLoader A dataloader containing the validation set epochs: int = 10 The amount of epochs to train lr: float = 3e-4 The learning rate used name: str = 'no_name' The name of the training run, used for wandb purposes mcat_ratio: float = 0.1 How much the master category loss is prioritised over the category loss ema: float = 0.99 The fact used in computing the exponential moving averages of the loss and sample-average F1 scores. Roughly corresponds to taking the average of the previous 1 / (1 - ema) many batches pbar_width: int = None The width of the progress bar. If running in a Jupyter notebook then this should be set to ~1000 use_wandb: bool = True Whether to use the Weights & Biases online performance recording overwrite_model: bool = True Whether to overwrite existing models when saving OUTPUT The trained model ''' from sklearn.metrics import f1_score import warnings from pathlib import Path print(f'Training on {len(train_dl) * train_dl.batch_size:,d} samples '\ f'and validating on {len(val_dl) * val_dl.batch_size:,d} samples.') print(f'Number of trainable parameters: {model.trainable_params():,d}') # Sign into wandb and log metrics from model if use_wandb: import wandb config = { 'name': name, 'mcat_ratio': mcat_ratio, 'epochs': epochs, 'lr': lr, 'batch_size': train_dl.batch_size, 'ema': ema, 'vectors': train_dl.vectors, 'dropout': model.params['dropout'], 'nlayers': model.params['nlayers'], 'dim': model.params['dim'], 'boom_dim': model.params['boom_dim'], 'emb_dim': model.params['vocab'].vectors.shape[1], } wandb.init(project='scholarly', config=config) wandb.watch(model) weights = get_class_weights(train_dl, pbar_width=model.pbar_width, data_dir=model.data_dir) criterion = NestedBCELoss(**weights, mcat_ratio=mcat_ratio, data_dir=model.data_dir) optimizer = optim.Adam(model.parameters(), lr=lr) mcat_masks = get_mcat_masks(data_dir=model.data_dir) if model.is_cuda(): mcat_masks = mcat_masks.cuda() criterion = criterion.cuda() avg_loss, avg_cat_f1, avg_mcat_f1, best_score = 0, 0, 0, 0 for epoch in range(epochs): with tqdm(total=len(train_dl) * train_dl.batch_size, ncols=model.pbar_width) as pbar: model.train() for idx, (x_train, y_train) in enumerate(train_dl): optimizer.zero_grad() if model.is_cuda(): x_train = x_train.cuda() y_train = y_train.cuda() # Get cat predictions y_hat = model(x_train) preds = torch.sigmoid(y_hat) # Get master cat predictions my_hat, my_train = cats2mcats(y_hat, y_train, masks=mcat_masks, data_dir=model.data_dir) mpreds = torch.sigmoid(my_hat) # Calculate loss and perform backprop loss = criterion(y_hat, y_train) loss.backward() optimizer.step() # Compute f1 scores with warnings.catch_warnings(): warnings.simplefilter('ignore') cat_f1 = f1_score(preds.cpu() > 0.5, y_train.cpu(), average='samples') mcat_f1 = f1_score(mpreds.cpu() > 0.5, my_train.cpu(), average='samples') # Keep track of the current iteration index iteration = epoch * len(train_dl) * train_dl.batch_size iteration += idx * train_dl.batch_size # Exponentially moving average of loss and f1 scores avg_loss = ema * avg_loss + (1 - ema) * float(loss) avg_loss /= 1 - ema**(iteration / (1 - ema) + 1) avg_cat_f1 = ema * avg_cat_f1 + (1 - ema) * float(cat_f1) avg_cat_f1 /= 1 - ema**(iteration / (1 - ema) + 1) avg_mcat_f1 = ema * avg_mcat_f1 + (1 - ema) * float(mcat_f1) avg_mcat_f1 /= 1 - ema**(iteration / (1 - ema) + 1) # Log wandb if use_wandb: wandb.log({ 'loss': avg_loss, 'cat f1': avg_cat_f1, 'mcat f1': avg_mcat_f1 }) # Update the progress bar desc = f'Epoch {epoch:2d} - '\ f'loss {avg_loss:.4f} - '\ f'cat f1 {avg_cat_f1:.4f} - '\ f'mcat f1 {avg_mcat_f1:.4f}' pbar.set_description(desc) pbar.update(train_dl.batch_size) # Compute validation scores with torch.no_grad(): model.eval() val_loss, val_cat_f1, val_mcat_f1 = 0, 0, 0 y_vals, y_hats = [], [] for x_val, y_val in val_dl: if model.is_cuda(): x_val = x_val.cuda() y_val = y_val.cuda() # Get cat predictions y_hat = model(x_val) preds = torch.sigmoid(y_hat) # Get mcat predictions my_hat, my_val = cats2mcats(y_hat, y_val, masks=mcat_masks, data_dir=model.data_dir) mpreds = torch.sigmoid(my_hat) # Collect the true and predicted labels y_vals.append(y_val) y_hats.append(preds > 0.5) # Accumulate loss val_loss += float(criterion(y_hat, y_val, weighted=False)) # Accumulate f1 scores with warnings.catch_warnings(): warnings.simplefilter('ignore') val_cat_f1 += f1_score(preds.cpu() > 0.5, y_val.cpu(), average='samples') val_mcat_f1 += f1_score(mpreds.cpu() > 0.5, my_val.cpu(), average='samples') # Concatenate the true and predicted labels y_val = torch.cat(y_vals, dim=0) y_hat = torch.cat(y_hats, dim=0) # Compute the average loss and f1 scores val_loss /= len(val_dl) val_cat_f1 /= len(val_dl) val_mcat_f1 /= len(val_dl) # Log wandb if use_wandb: wandb.log({ 'val loss': val_loss, 'val cat f1': val_cat_f1, 'val mcat f1': val_mcat_f1 }) # If the current cat f1 score is the best so far, then # replace the stored model with the current one if val_cat_f1 > best_score: model_fname = f'model_{val_cat_f1 * 100:.2f}.pt' best_score = val_cat_f1 data = { 'params': model.params, 'state_dict': model.state_dict(), 'scores': model.evaluate(val_dl, output_dict=True) } if overwrite_model: for f in get_path(model.data_dir).glob(f'model*.pt'): f.unlink() with warnings.catch_warnings(): warnings.simplefilter('ignore') path = get_path(model.data_dir) / model_fname torch.save(data, path) # Save the model's state dict to wandb directory if use_wandb: if overwrite_model: for f in Path(wandb.run.dir).glob(f'model*.pt'): f.unlink() torch.save(data, Path(wandb.run.dir) / model_fname) wandb.save(model_fname) # Update progress bar desc = f'Epoch {epoch:2d} - '\ f'loss {avg_loss:.4f} - '\ f'cat f1 {avg_cat_f1:.4f} - '\ f'mcat f1 {avg_mcat_f1:.4f} - '\ f'val_loss {val_loss:.4f} - '\ f'val cat f1 {val_cat_f1:.4f} - '\ f'val mcat f1 {val_mcat_f1:.4f}' pbar.set_description(desc) return model
mode='test') c_test.validate_corpus() # Sentences, Labels, POS Tags - I could just use better variable names x_test, y_test, z_pos = features.generate_input_and_labels(c_test.sentences, Vectors=embeddings) # POS Tags to numerical sequences pos_tokenizer = Tokenizer() pos_tokenizer.fit_on_texts(z_pos) pos_sequences = pos_tokenizer.texts_to_sequences(z_pos) z_test = to_categorical(pos_sequences) # TODO: Needs to come from the train I assume class_weights = list( utils.get_class_weights(c_test.label_list, WEIGHT_SMOOTHING).values()) print('loss_weight {}'.format(class_weights)) # Load model and Embeddings model = load_model('naacl_metaphor.h5', custom_objects={ 'loss': utils.weighted_categorical_crossentropy(class_weights), 'f1': utils.f1, 'precision': utils.precision, 'recall': utils.recall })
def train_with_erm(classifier, train_loader, tune_loader, num_epochs, logger=None, save_file=None): ''' Train the provided classifier on the data from train_loader, evaluating the performance along the way with the data from tune_loader Arguments --------- classifier: pytorch.nn.Module The model to train train_loader: pytorch.utils.data.DataLoader The DataLoader containing training data tune_loader: pytorch.utils.data.DataLoader The DataLoader containing tuning data num_epochs: int The number of times the model should be trained on all the data logger: logging.logger The python logger object to handle printing logs save_file: string or Path object The file to save the model to. If save_file is None, the model won't be saved Returns ------- results: dict A dictionary containing lists tracking different loss metrics across epochs ''' device = torch.device("cuda" if torch.cuda.is_available() else "cpu") classifier = classifier.to(device) optimizer = optim.Adam(classifier.parameters(), lr=1e-5) class_weights = utils.get_class_weights(train_loader) # Calculate baseline tune set prediction accuracy (just pick the largest class) tune_label_counts, _ = utils.get_value_counts(tune_loader) baseline = max(list(tune_label_counts.values())) / len(tune_dataset) results = {'train_loss': [], 'tune_loss': [], 'train_acc': [], 'tune_acc': [], 'baseline': baseline} try: best_tune_loss = None for epoch in tqdm_notebook(range(num_epochs)): train_loss = 0 train_correct = 0 # Set training mode classifier = classifier.train() for batch in train_loader: expression, labels, ids = batch expression = expression.float().to(device) labels = labels.float().to(device) # Get weights to handle the class imbalance batch_weights = [class_weights[int(label)] for label in labels] batch_weights = torch.FloatTensor(batch_weights).to(device) loss_function = nn.BCEWithLogitsLoss(weight=batch_weights) # Standard update step optimizer.zero_grad() output = classifier(expression) loss = loss_function(output, labels) loss.backward() optimizer.step() train_loss += loss.item() train_correct += utils.count_correct(output, labels) # Disable the gradient and switch into model evaluation mode with torch.no_grad(): classifier = classifier.eval() tune_loss = 0 tune_correct = 0 for tune_batch in tune_loader: expression, labels, ids = tune_batch expression = expression.float().to(device) tune_labels = labels.float().to(device) batch_weights = [class_weights[int(label)] for label in labels] batch_weights = torch.FloatTensor(batch_weights).to(device) loss_function = nn.BCEWithLogitsLoss(weight=batch_weights) tune_output = classifier(expression) loss = loss_function(tune_output, tune_labels) tune_loss += loss.item() tune_correct += utils.count_correct(tune_output, tune_labels) # Save the model if save_file is not None: if best_tune_loss is None or tune_loss < best_tune_loss: best_tune_loss = tune_loss torch.save(classifier, save_file) train_accuracy = train_correct / len(train_dataset) tune_accuracy = tune_correct / len(tune_dataset) if logger is not None: logger.info('Epoch {}'.format(epoch)) logger.info('Train loss: {}'.format(train_loss / len(train_dataset))) logger.info('Tune loss: {}'.format(tune_loss / len(tune_dataset))) logger.info('Train accuracy: {}'.format(train_accuracy)) logger.info('Tune accuracy: {}'.format(tune_accuracy)) logger.info('Baseline accuracy: {}'.format(baseline)) results['train_loss'].append(train_loss / len(train_dataset)) results['tune_loss'].append(tune_loss / len(tune_dataset)) results['train_acc'].append(train_accuracy) results['tune_acc'].append(tune_accuracy) except Exception as e: # Print error logger.error(e, exc_info=True) finally: results = utils.add_study_ids_to_results(results, train_dirs, tune_dirs) return results