def run(ini_file='tinyimg.ini', data_in_dir='./../../dataset', model_cfg='../cfg/vgg-tiny.cfg', model_out_dir='./models', epochs=30, lr=3.0e-5, batch_sz=256, num_worker=4, log_freq=20, use_gpu=True): # Step 1: parse config cfg = parse_cfg(ini_file, data_in_dir=data_in_dir, model_cfg=model_cfg, model_out_dir=model_out_dir, epochs=epochs, lr=lr, batch_sz=batch_sz, log_freq=log_freq, num_worker=num_worker, use_gpu=use_gpu) print_cfg(cfg) # Step 2: create data sets and loaders train_ds, val_ds = build_train_val_datasets(cfg, in_memory=True) train_loader, val_loader = DLFactory.create_train_val_dataloader( cfg, train_ds, val_ds) # Step 3: create model model = MFactory.create_model(cfg) # Step 4: train/valid # This demos our approach can be easily intergrate with our app framework device = get_device(cfg) data = DataBunch(train_loader, val_loader, device=device) learn = Learner(data, model, loss_func=torch.nn.CrossEntropyLoss(), metrics=accuracy) # callback_fns=[partial(EarlyStoppingCallback, monitor='accuracy', min_delta=0.01, patience=2)]) # lr_find(learn, start_lr=1e-7, end_lr=10) # learn.recorder.plot() # lrs_losses = [(lr, loss) for lr, loss in zip(learn.recorder.lrs, learn.recorder.losses)] # min_lr = min(lrs_losses[10:-5], key=lambda x: x[1])[0] # lr = min_lr/10.0 # plt.show() # print(f'Minimal lr rate is {min_lr} propose init lr {lr}') # fit_one_cycle(learn, epochs, lr) learn.fit(epochs, lr)
def get_score(): print('Make Train Features.') with open(args.temporary_file, 'rb') as f: x_train, x_feat_train, y_train_o, y_aux_train, embedding_matrix = pickle.load( f) def power_mean(series, p=-5): total = sum(np.power(series, p)) return np.power(total / len(series), 1 / p) def sigmoid(x): return 1 / (1 + np.exp(-x)) # all, sub, s&t, !s&t, s&!t, !s&!t weight_factor = list(map(float, args.weight_factor.split(','))) identity_factor_1 = list(map(float, args.identity_factor_1.split(','))) identity_factor_2 = list(map(float, args.identity_factor_2.split(','))) model_factor = list(map(int, args.model_factor.split(','))) print('weight_factor =', weight_factor) print('identity_factor_1 = ', identity_factor_1) print('identity_factor_2 = ', identity_factor_2) print('model_factor = ', model_factor) train = read_competision_file(train=True) identity_columns = [ 'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish', 'muslim', 'black', 'white', 'psychiatric_or_mental_illness' ] index_subgroup, index_bpsn, index_bnsp = dict(), dict(), dict() for col in identity_columns: index_subgroup[col] = (train[col].fillna(0).values >= 0.5).astype(bool) index_bpsn[col] = ( (((train['target'].values < 0.5).astype(bool).astype(np.int) + (train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int)) > 1).astype(bool)) + (( ((train['target'].values >= 0.5).astype(bool).astype(np.int) + (train[col].fillna(0).values < 0.5).astype(bool).astype( np.int)) > 1).astype(bool)) index_bnsp[col] = ( (((train['target'].values >= 0.5).astype(bool).astype(np.int) + (train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int)) > 1).astype(bool)) + (( ((train['target'].values < 0.5).astype(bool).astype(np.int) + (train[col].fillna(0).values < 0.5).astype(bool).astype( np.int)) > 1).astype(bool)) # Overall weights = np.ones((len(x_train), )) * weight_factor[0] # Subgroup weights += (train[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int) * weight_factor[1] weights += (((train['target'].values >= 0.5).astype(bool).astype(np.int) + (train[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(np.int) * weight_factor[2] weights += (((train['target'].values >= 0.5).astype(bool).astype(np.int) + (train[identity_columns].fillna(0).values < 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(np.int) * weight_factor[3] weights += (((train['target'].values < 0.5).astype(bool).astype(np.int) + (train[identity_columns].fillna(0).values >= 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(np.int) * weight_factor[4] weights += (((train['target'].values < 0.5).astype(bool).astype(np.int) + (train[identity_columns].fillna(0).values < 0.5).sum( axis=1).astype(bool).astype(np.int)) > 1).astype(bool).astype(np.int) * weight_factor[5] index_id1, index_id2 = dict(), dict() for col in identity_columns: index_id1[col] = ( ((train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int) + (train['target'].values >= 0.5).astype(bool).astype(np.int)) > 1).astype(bool) index_id2[col] = ( ((train[col].fillna(0).values >= 0.5).astype(bool).astype(np.int) + (train['target'].values < 0.5).astype(bool).astype(np.int)) > 1).astype(bool) for col, id1 in zip(identity_columns, identity_factor_1): weights[index_id1[col]] += id1 for col, id2 in zip(identity_columns, identity_factor_2): weights[index_id2[col]] += id2 loss_weight = 1.0 / weights.mean() aux_impact_factor = list(map(float, args.aux_impact_factor.split(','))) aux_identity_factor = list(map(float, args.aux_identity_factor.split(','))) print('aux_impact_factor =', aux_impact_factor) print('aux_identity_factor =', aux_identity_factor) weights_aux = np.ones((len(x_train), )) weights_aux[(train['target'].values >= 0.5).astype(np.int) + (train[identity_columns].fillna(0).values < 0.5).sum(axis=1). astype(bool).astype(np.int) > 1] = aux_identity_factor[0] weights_aux[(train['target'].values >= 0.5).astype(np.int) + (train[identity_columns].fillna(0).values >= 0.5).sum(axis=1). astype(bool).astype(np.int) > 1] = aux_identity_factor[1] weights_aux[(train['target'].values < 0.5).astype(np.int) + (train[identity_columns].fillna(0).values < 0.5).sum(axis=1). astype(bool).astype(np.int) > 1] = aux_identity_factor[2] weights_aux[(train['target'].values < 0.5).astype(np.int) + (train[identity_columns].fillna(0).values >= 0.5).sum(axis=1). astype(bool).astype(np.int) > 1] = aux_identity_factor[3] y_train = np.vstack([y_train_o, weights, weights_aux]).T del train def custom_loss_aux(data, targets): ''' Define custom loss function for weighted BCE on 'target' column ''' bce_loss_1 = nn.BCEWithLogitsLoss(weight=targets[:, 1:2])(data[:, :1], targets[:, :1]) bce_loss_aux_1 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 1:2], targets[:, 3:4]) bce_loss_aux_2 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 2:3], targets[:, 4:5]) bce_loss_aux_3 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 3:4], targets[:, 5:6]) bce_loss_aux_4 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 4:5], targets[:, 6:7]) bce_loss_aux_5 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 5:6], targets[:, 7:8]) bce_loss_aux_6 = nn.BCEWithLogitsLoss(weight=targets[:, 2:3])( data[:, 6:7], targets[:, 8:9]) return (bce_loss_1 * loss_weight) + ( bce_loss_aux_1 * aux_impact_factor[0]) + (bce_loss_aux_2 * aux_impact_factor[1]) + ( bce_loss_aux_3 * aux_impact_factor[2] ) + (bce_loss_aux_4 * aux_impact_factor[3]) + ( bce_loss_aux_5 * aux_impact_factor[4]) + (bce_loss_aux_6 * aux_impact_factor[5]) from sklearn.model_selection import KFold, train_test_split from sklearn.metrics import classification_report, roc_auc_score batch_size = args.batch_size lr = args.learning_ratio max_features = np.max(x_train) kf = KFold(n_splits=5, random_state=12, shuffle=True) final_epoch_score_cv = dict() final_fold_count = 0 for fold_id, (big_index, small_index) in enumerate(kf.split(y_train)): final_fold_count += 1 if args.minimize == 1: train_index, test_index = train_test_split(np.arange(len(y_train)), test_size=0.5, random_state=1234, shuffle=True) elif args.minimize == 2: train_index, test_index = train_test_split(np.arange(len(y_train)), test_size=0.666, random_state=1234, shuffle=True) elif args.minimize == 3: train_index, test_index = big_index[:25600], small_index[:12800] else: train_index, test_index = big_index, small_index if len(args.model_file) > 0: train_index = np.arange(len(x_train)) if args.use_feats_url: x_train_train = np.hstack( [x_feat_train[train_index], x_train[train_index]]) x_train_test = np.hstack( [x_feat_train[test_index], x_train[test_index]]) feats_nums = x_feat_train.shape[1] else: x_train_train = x_train[train_index] x_train_test = x_train[test_index] feats_nums = 0 x_train_torch = torch.tensor(x_train_train, dtype=torch.long) x_test_torch = torch.tensor(x_train_test, dtype=torch.long) y_train_torch = torch.tensor(np.hstack([y_train, y_aux_train])[train_index], dtype=torch.float32) y_test_torch = torch.tensor(np.hstack([y_train, y_aux_train])[test_index], dtype=torch.float32) train_dataset = data.TensorDataset(x_train_torch, y_train_torch) valid_dataset = data.TensorDataset(x_test_torch, y_test_torch) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) databunch = DataBunch(train_dl=train_loader, valid_dl=valid_loader) checkpoint_predictions = [] weights = [] seed_everything(args.random_seed + fold_id) num_units = list(map(int, args.num_units.split(','))) model = get_model(model_factor, num_units[0], num_units[1], embedding_matrix, max_features, y_aux_train.shape[-1], args.num_words, feats_nums) model = model.cuda(device=cuda) if args.optimizer == 'Nadam': from NadamLocal import Nadam learn = Learner(databunch, model, loss_func=custom_loss_aux, opt_func=Nadam) else: learn = Learner(databunch, model, loss_func=custom_loss_aux) all_test_preds = [] checkpoint_weights = [2**epoch for epoch in range(args.num_epochs)] test_loader = valid_loader n = len(learn.data.train_dl) phases = [(TrainingPhase(n).schedule_hp('lr', lr * (0.6**(i)))) for i in range(args.num_epochs)] sched = GeneralScheduler(learn, phases) learn.callbacks.append(sched) final_epoch_score = 0 for global_epoch in range(args.num_epochs): print("Fold#", fold_id, "epoch#", global_epoch) learn.fit(1) if args.minimize < 2 or (args.minimize >= 2 and global_epoch == int(args.num_epochs - 1)): test_preds = np.zeros((len(test_index), 7)) for i, x_batch in enumerate(test_loader): X = x_batch[0].cuda() y_pred = sigmoid(learn.model(X).detach().cpu().numpy()) test_preds[i * batch_size:(i + 1) * batch_size, :] = y_pred all_test_preds.append(test_preds) prediction_one = test_preds[:, 0].flatten() checkpoint_predictions.append(prediction_one) weights.append(2**global_epoch) predictions = np.average(checkpoint_predictions, weights=weights, axis=0) y_true = (y_train[test_index, 0]).reshape( (-1, )).astype(np.int) roc_sub, roc_bpsn, roc_bnsp = [], [], [] roc_sub_one, roc_bpsn_one, roc_bnsp_one = [], [], [] for col in identity_columns: if args.vervose: print("Subgroup#", col, ":") print( classification_report( y_true[index_subgroup[col][test_index]], (predictions[index_subgroup[col][test_index]] >= 0.5).astype(np.int))) if args.minimize < 2: roc_sub.append( roc_auc_score( y_true[index_subgroup[col][test_index]], predictions[index_subgroup[col][test_index]])) roc_sub_one.append( roc_auc_score( y_true[index_subgroup[col][test_index]], prediction_one[index_subgroup[col][test_index]])) if args.vervose: print("BPSN#", col, ":") print( classification_report( y_true[index_bpsn[col][test_index]], (predictions[index_bpsn[col][test_index]] >= 0.5).astype(np.int))) if args.minimize < 2: roc_bpsn.append( roc_auc_score( y_true[index_bpsn[col][test_index]], predictions[index_bpsn[col][test_index]])) roc_bpsn_one.append( roc_auc_score( y_true[index_bpsn[col][test_index]], prediction_one[index_bpsn[col][test_index]])) if args.vervose: print("BNSP#", col, ":") print( classification_report( y_true[index_bnsp[col][test_index]], (predictions[index_bnsp[col][test_index]] >= 0.5).astype(np.int))) if args.minimize < 2: roc_bnsp.append( roc_auc_score( y_true[index_bnsp[col][test_index]], predictions[index_bnsp[col][test_index]])) roc_bnsp_one.append( roc_auc_score( y_true[index_bnsp[col][test_index]], prediction_one[index_bnsp[col][test_index]])) if args.minimize < 2: roc_all = roc_auc_score(y_true, predictions) pm_roc_sub = power_mean(roc_sub) pm_roc_bpsn = power_mean(roc_bpsn) pm_roc_bnsp = power_mean(roc_bnsp) final_epoch_score = (roc_all + pm_roc_sub + pm_roc_bpsn + pm_roc_bnsp) / 4 roc_all_one = roc_auc_score(y_true, prediction_one) pm_roc_sub_one = power_mean(roc_sub_one) pm_roc_bpsn_one = power_mean(roc_bpsn_one) pm_roc_bnsp_one = power_mean(roc_bnsp_one) final_epoch_score_one = (roc_all_one + pm_roc_sub_one + pm_roc_bpsn_one + pm_roc_bnsp_one) / 4 if args.minimize >= 2: return final_epoch_score_one if args.vervose: print("roc_sub:", pm_roc_sub) print("roc_bpsn:", pm_roc_bpsn) print("roc_bnsp:", pm_roc_bnsp) print("final score:", (roc_all + pm_roc_sub + pm_roc_bpsn + pm_roc_bnsp) / 4) if global_epoch not in final_epoch_score_cv.keys(): final_epoch_score_cv[global_epoch] = [] final_epoch_score_cv[global_epoch].append( (final_epoch_score, final_epoch_score_one)) if len(args.model_file) > 0: if args.model_file.endswith('.bz2'): model_file = args.model_file else: model_file = args.model_file + '.bz2' model_json_file = model_file[:-4] + '.json' model.save_model(model_file) with open(model_json_file, 'w') as pf: pf.write('{') pf.write('\"model_factor\":[' + ','.join(list(map(str, model_factor))) + ']') pf.write(',') pf.write('\"num_units\":[' + ','.join(list(map(str, num_units))) + ']') pf.write(',') pf.write('\"num_aux_targets\":%d' % y_aux_train.shape[-1]) pf.write(',') pf.write('\"feats_nums\":%d' % feats_nums) pf.write(',') pf.write('\"max_seq_len\":%d' % args.num_words) pf.write('}') break if args.minimize > 0: break return final_epoch_score_cv
def train(train_dataset: torch.utils.data.Dataset, test_dataset: torch.utils.data.Dataset, training_config: dict = train_config, global_config: dict = global_config): """ Template training routine. Takes a training and a test dataset wrapped as torch.utils.data.Dataset type and two corresponding generic configs for both gobal path settings and training settings. Returns the fitted fastai.train.Learner object which can be used to assess the resulting metrics and error curves etc. """ for path in global_config.values(): create_dirs(path) # wrap datasets with Dataloader classes train_loader = torch.utils.data.DataLoader( train_dataset, **train_config["DATA_LOADER_CONFIG"]) test_loader = torch.utils.data.DataLoader( test_dataset, **train_config["DATA_LOADER_CONFIG"]) databunch = DataBunch(train_loader, test_loader) # instantiate model and learner if training_config["WEIGHTS"] is None: model = training_config["MODEL"](**training_config["MODEL_CONFIG"]) else: model = load_model(training_config["MODEL"], training_config["MODEL_CONFIG"], training_config["WEIGHTS"], training_config["DEVICE"]) learner = Learner(databunch, model, metrics=train_config["METRICS"], path=global_config["ROOT_PATH"], model_dir=global_config["WEIGHT_DIR"], loss_func=train_config["LOSS"]) # model name & paths name = "_".join([train_config["DATE"], train_config["SESSION_NAME"]]) modelpath = os.path.join(global_config["WEIGHT_DIR"], name) if train_config["MIXED_PRECISION"]: learner.to_fp16() learner.save(modelpath) torch.backends.cudnn.benchmark = True cbs = [ SaveModelCallback(learner), LearnerTensorboardWriter( learner, Path(os.path.join(global_config["LOG_DIR"]), "tensorboardx"), name), TerminateOnNaNCallback() ] # perform training iteration try: if train_config["ONE_CYCLE"]: learner.fit_one_cycle(train_config["EPOCHS"], max_lr=train_config["LR"], callbacks=cbs) else: learner.fit(train_config["EPOCHS"], lr=train_config["LR"], callbacks=cbs) # save model files except KeyboardInterrupt: learner.save(modelpath) raise KeyboardInterrupt learner.save(modelpath) val_loss = min(learner.recorder.val_losses) val_metrics = learner.recorder.metrics # log using the logging tool logger = log.Log(train_config, run_name=train_config['SESSION_NAME']) logger.log_metric('Validation Loss', val_loss) logger.log.metrics(val_metrics) logger.end_run() #write csv log file log_content = train_config.copy() log_content["VAL_LOSS"] = val_loss log_content["VAL_METRICS"] = val_metrics log_path = os.path.join(global_config["LOG_DIR"], train_config["LOGFILE"]) write_log(log_path, log_content) return learner, log_content, name
valid_dl=valid_loader, collate_fn=train_collator) y_train_torch = get_y_train_torch(weights) databunch = get_databunch(y_train_torch) logging.info("training model 1: para, rawl, w2v...") embedding_matrix = np.concatenate( [para_matrix, crawl_matrix, w2v_matrix, char_matrix], axis=1) seed_everything(42) model = NeuralNet(embedding_matrix, output_aux_sub=subgroup_target.shape[1]) learn = Learner(databunch, model, loss_func=custom_loss) cb = OneCycleScheduler(learn, lr_max=0.001) learn.callbacks.append(cb) learn.fit(EPOCHS) save_nn_without_embedding_weights(learn.model, "./models/Notebook_100_1.bin") logging.info("training model 2: glove, crawl, w2v...") embedding_matrix = np.concatenate( [glove_matrix, crawl_matrix, w2v_matrix, char_matrix], axis=1) seed_everything(43) model = NeuralNet(embedding_matrix, output_aux_sub=subgroup_target.shape[1]) learn = Learner(databunch, model, loss_func=custom_loss) cb = OneCycleScheduler(learn, lr_max=0.001) learn.callbacks.append(cb) learn.fit(EPOCHS) save_nn_without_embedding_weights(learn.model, "./models/Notebook_100_2.bin")