def train_execution(self): if self.arch == 'vgg19': self.model = models.vgg19(pretrained=True) elif self.arch == 'vgg16': self.model = models.vgg16(pretrained=True) elif self.arch == 'densenet121': self.model = models.densenet121(pretrained=True) else: print("Sorry {} is not a valid model for this exercise. Please use vgg16, vgg19, or densenet121".format(self.arch)) if self.device == 'gpu': self.device = 'cuda' else: self.device = 'cpu' # get the models see loss function and optimizer self.model_definition() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(self.model.classifier.parameters(), lr=self.learning_rate) # get images/data train_dataloader, valid_dataloader, test_dataloader, train_datasets = get_data(self.data_dir) # define delay of LR as training goes through decay_schedule = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # train and print validation score for e in range(self.epochs): print('epoch {}/{}'.format(e+1, self.epochs)) self.train(train_dataloader, self.device, criterion, optimizer,valid_dataloader,decay_schedule) save_model(self.model, self.save_dir, self.arch, self.hidden_units, self.dropout, self.epochs, self.learning_rate, train_datasets)
def get_model_data(): ''' This is the only function needed to run to return a dataframe useable in modeling ''' data = prepare.get_data() data = remove_columns(data) data = fix_nas(data) data = encode_categorical_columns(data) data = data.dropna() return data
def pretrain(C, data, num_epoch=10, pos_lim=300, neg_lim=300, model=None): device = 0 (trainset, devset, testset), lab_num = get_data(C, fold=data, files=True, pos_lim=pos_lim, neg_lim=neg_lim) if model is None: model = get_model(C, lab_num) optimer, loss_func = get_others(C, model) for epoch_id in range(num_epoch): model, _ = train(C, model, trainset, loss_func, optimer, epoch_id, "PT", device) return model
def main(): with open(os.path.join(C.save_path , C.save_name + ".pkl") , "rb") as fil: model = pickle.load(fil) run_id = int(re.search("/(\\d+)$" , C.save_name).group(1)) loss_func = tc.nn.CrossEntropyLoss() optimer = tc.optim.Adam(params = model.parameters() , lr = 1e-3 , weight_decay = 1e-8) C.uniform_sample = True C.grad_clip = -1 C.bs = 10 (trainset , devset , testset) , lab_num = get_data (C , fold = run_id) model , train_loss = train(C, model, trainset, loss_func, optimer, 0, run_id, 0) print(train_loss) troc_auc , tprc_auc = evaluate(C, model, testset , loss_func, 0, run_id, 0, "Test") print(troc_auc , tprc_auc) pdb.set_trace() troc_auc , tprc_auc = evaluate(C, model, testset , loss_func, 0, run_id, 0, "Test") troc_auc , tprc_auc = evaluate(C, model, testset , loss_func, 0, run_id, 0, "Test") troc_auc , tprc_auc = evaluate(C, model, testset , loss_func, 0, run_id, 0, "Test")
def kfold(C , k = 10 , choose_one = [] , p_model = None): if C.finger or C.mol2vec: finger_dict = load_fingers(C , C.data) else: finger_dict = None device = 0 roc_aucs = [] prc_aucs = [] for run_id in range(k): if len(choose_one) > 0 and run_id not in choose_one: #只跑选择的那一个 continue (trainset , devset , testset) , lab_num = get_data (C , fold = run_id) models = [] optimers = [] for j in range(C.ensemble): model = get_model (C , lab_num) if p_model is not None: copy_param(model , p_model) model = model.to(device) optimer , loss_func = get_others(C , model) models.append(model) optimers.append(optimer) ens_eval_m = EnsembleModel(models) E.log("%d th run starts on device %d\n" % (run_id , device)) best_epoch = -1 best_metric = -1 tes_roc_auc = -1 tes_prc_auc = -1 for epoch_id in range(C.num_epoch): train_loss = 0. for ens_id in range(C.ensemble): model , _train_loss = train(C, models[ens_id], trainset, loss_func, optimers[ens_id], epoch_id, "{0}-{1}".format(run_id , ens_id), device , finger_dict) train_loss += (_train_loss / C.ensemble) droc_auc , dprc_auc = evaluate(C, ens_eval_m, devset , loss_func, epoch_id, run_id, device, "Dev" , finger_dict) troc_auc , tprc_auc = evaluate(C, ens_eval_m, testset, loss_func, epoch_id, run_id, device, "Test", finger_dict) E.log("Epoch %d of run %d ended." % (epoch_id , run_id)) E.log("Dev Roc-Auc = %.4f Prc-Auc = %.4f" % (droc_auc , dprc_auc)) E.log("Test Roc-Auc = %.4f Prc-Auc = %.4f" % (troc_auc , tprc_auc)) E.log() if C.train_loss_val: metric_val = -train_loss else: metric_val = dprc_auc if (best_epoch < 0 or metric_val > best_metric) or C.no_valid: best_epoch = epoch_id best_metric = metric_val tes_roc_auc = troc_auc tes_prc_auc = tprc_auc save_model(ens_eval_m , C.save_path , E.core.id , str(run_id)) E.log("%d th run ends. best epoch = %d" % (run_id , best_epoch)) E.log("Best metric = %.4f" % (best_metric)) E.log("Got Test Roc-Auc = %.4f Prc-Auc = %.4f" % (tes_roc_auc , tes_prc_auc)) E.log() E["Test ROC-AUC"]["Best"].update(tes_roc_auc , run_id) E["Test PRC-AUC"]["Best"].update(tes_prc_auc , run_id) roc_aucs.append(tes_roc_auc) prc_aucs.append(tes_prc_auc) E.log("model saved.") E.log("--------------------------------------------------------------") roc_auc_avg = sum(roc_aucs) / len(roc_aucs) roc_auc_std = (sum([(x - roc_auc_avg) ** 2 for x in roc_aucs]) / len(roc_aucs)) ** 0.5 prc_auc_avg = sum(prc_aucs) / len(prc_aucs) prc_auc_std = (sum([(x - prc_auc_avg) ** 2 for x in prc_aucs]) / len(prc_aucs)) ** 0.5 E["Test ROC-AUC"].update("%.4f ± %.4f" % (roc_auc_avg , roc_auc_std)) E["Test PRC-AUC"].update("%.4f ± %.4f" % (prc_auc_avg , prc_auc_std)) E.log ("got avg test Roc-Auc = %.4f ± %.4f Prc-Auc = %.4f ± %.4f" % ( roc_auc_avg , roc_auc_std , prc_auc_avg , prc_auc_std) ) E.log("All run end!")
def eval_run(C , p_model = None): if C.finger or C.mol2vec: finger_dict = load_fingers(C , C.data) else: finger_dict = None device = 0 (trainset , devset , testset) , lab_num = get_data (C , fold = "test") models = [] optimers = [] for k in range(C.ensemble): model = get_model (C , lab_num) if p_model is not None: copy_param(model , p_model) model = model.to(device) optimer , loss_func = get_others(C , model) models.append(model) optimers.append(optimer) ens_eval_m = EnsembleModel(models) best_epoch = -1 best_metric = -1 for epoch_id in range(C.num_epoch): train_loss = 0. for ens_id in range(C.ensemble): model , _train_loss = train(C, models[ens_id], trainset, loss_func, optimers[ens_id], epoch_id, "{0}-{1}".format(0 , ens_id), device , finger_dict) train_loss += (_train_loss / C.ensemble) E.log("Epoch %d ended." % (epoch_id)) E.log() if C.train_loss_val: metric_val = -train_loss else: assert False if (best_epoch < 0 or metric_val > best_metric) or C.no_valid: best_epoch = epoch_id best_metric = metric_val save_model(ens_eval_m , C.save_path , E.core.id , "eval") E.log("run ends. best epoch = %d" % (best_epoch)) E.log("Best metric = %.4f" % (best_metric)) E.log() E.log("model saved.") E.log("--------------------------------------------------------------") best_model = load_model(C.save_path , E.core.id , "eval") tot_pos_ps = evaluate(C, best_model, testset , loss_func, epoch_id, 0, device, "Dev" , finger_dict , ret_preds = True) save_pred(tot_pos_ps , C.data , "to_upload.csv") E.log("All run end!")
import pandas as pd import numpy as np from fbprophet import Prophet from prepare import get_data, get_prepped, make_weighted, make_weighted_monthly from fbprophet.diagnostics import cross_validation, performance_metrics from sklearn.model_selection import ParameterGrid from sklearn.preprocessing import StandardScaler import pickle import matplotlib.pyplot as plt from predictions import get_model, store_model # gets data without imputed weather values data = get_data() # creates new dataframe with Prophet-friendly column names df = pd.DataFrame() df['y'] = data.resample('M').inflated.mean() df = df.reset_index() df = df.rename(columns={'date': 'ds'}) # creates linear prophet model to fit only on the inflated price m = Prophet(growth='linear') m.fit(df) future = m.make_future_dataframe(freq='D', periods=365 * 8) forecast = m.predict(future) cv = cross_validation(m, horizon='298 days') performance_metrics(cv).rmse.mean() # RMSE: 135.51
apply_encoding, get_test_ids import logging logging.basicConfig(level=logging.INFO, format='[tfm-nuclei] - %(message)s') import argparse parser = argparse.ArgumentParser() parser.add_argument("--model-dir", help="Ruta del modelo entrenado", type=str) parser.add_argument("--test-dir", help="Ruta de imagenes de test", type=str) parser.add_argument("--resolution", help="Redimensionar test, usar solo un entero. e.g: 128 para 128x128", type=int) args = parser.parse_args() model = load_trained_model(args.model_dir) test = get_data(args.test_dir,args.test_dir,resolution=(args.resolution,args.resolution)) logging.info("Realizando predicciones sobre el conjunto de test") predictions = np.asarray([model.predict(t) for t in tqdm(test)]) #predictions = model.predict(test) logging.info("Completado") resized_predictions = get_test_original_resolution(predictions, get_test_resolutions(args.test_dir)) individualized_masks_with_uid = get_predicted_mask_separated(get_test_ids(args.test_dir) ,resized_predictions) encoded_masks = apply_encoding(individualized_masks_with_uid) submission = pd.DataFrame(np.array(encoded_masks, dtype=object), columns=["ImageId", "EncodedPixels"]) submission['EncodedPixels'] = pd.Series(submission.EncodedPixels.values).apply(lambda x: ' '.join(str(y) for y in x)) submission.to_csv("arb_submission.csv", index=False) logging.info(submission.head())
d_loss.append(0.5 * np.add(d_loss_real, d_loss_fake)) if (epoch % 3) == 0: g_loss.append( GAN.train_on_batch( np.array(X_train[batch * epoch:batch * epoch + batch_size]), np.zeros((batch_size, 1, 1, 1)))) logging.info("\t\t perdida discriminador --> %s" % (d_loss[epoch])) if (epoch % 3) == 0: logging.info("\t\t perdida generador --> %s" % (g_loss[int(epoch / 3)])) Y_t = get_masks(args.label, resolution=(args.resolution, args.resolution)) X_t = get_data(args.train, args.train, resolution=(args.resolution, args.resolution)) logging.info("Cargando red discriminadora") discriminator = get_discriminator(resolution=(args.resolution, args.resolution)) logging.info("Cargando red discriminadora") generator = get_generator(resolution=(args.resolution, args.resolution)) logging.info("Cargando red adversaria") GAN = get_gan(generator, discriminator) #train(GAN, generator, discriminator, X_t, Y_t) train_gan_augmented(GAN, generator, discriminator, X_t, Y_t) try: logging.info("Saving model weights to file") GAN.save(args.export_dir or "generated_model") except Exception as e: