Python Experiment.log_dataset_info примеры использования

Язык программирования: Python

Пространство имен/Пакет: comet_ml

Класс/Тип: Experiment

Метод/Функция: log_dataset_info

Примеров на hotexamples.com: 4

Python Experiment.log_dataset_info - 4 примера найдено. Это лучшие примеры Python кода для comet_ml.Experiment.log_dataset_info, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Experiment(30)

log_parameters(30)

log_image(30)

log_asset(30)

log_metric(30)

log_metrics(30)

log_multiple_params(30)

get_key(30)

log_other(30)

end(30)

set_name(30)

add_tags(30)

add_tag(30)

train(30)

log_figure(30)

log_dataset_hash(26)

set_model_graph(22)

test(21)

disable_mp(18)

validate(17)

set_step(16)

log_confusion_matrix(13)

log_code(11)

log_multiple_metrics(10)

log_epoch_end(9)

set_code(8)

log_text(8)

log_model(8)

log_current_epoch(7)

log_asset_folder(7)

log_asset_data(7)

log_parameter(6)

get_keras_callback(5)

context_manager(5)

log_table(5)

get_callback(5)

log_others(5)

get_parameter(5)

log_histogram_3d(5)

log_dataset_info(4)

log_html(4)

get_tags(4)

set_epoch(2)

log_dependency(2)

_set_optimizer(1)

close(1)

log_audio(1)

set_predictor(1)

set_num_of_epocs(1)

log_accuracy(1)

Пример #1

Показать файл

Файл: test_logging_optimizer.py Проект: pu-wei/gtsam

    def test_comet(self):
        """Test with a comet hook."""
        from comet_ml import Experiment
        comet = Experiment(project_name="Testing",
                           auto_output_logging="native")
        comet.log_dataset_info(name="Karcher", path="shonan")
        comet.add_tag("GaussNewton")
        comet.log_parameter("method", "GaussNewton")
        time = datetime.now()
        comet.set_name("GaussNewton-" + str(time.month) + "/" + str(time.day) +
                       " " + str(time.hour) + ":" + str(time.minute) + ":" +
                       str(time.second))

        # I want to do some comet thing here
        def hook(optimizer, error):
            comet.log_metric("Karcher error", error, optimizer.iterations())

        gtsam_optimize(self.optimizer, self.params, hook)
        comet.end()

        actual = self.optimizer.values()
        self.gtsamAssertEquals(actual.atRot3(KEY), self.expected)

Пример #2

Показать файл

Файл: clsm_pytorch.py Проект: tk1363704/CDSSM

def run(args, train, sparse_evidences, claims_dict):
    BATCH_SIZE = args.batch_size
    LEARNING_RATE = args.learning_rate
    DATA_SAMPLING = args.data_sampling
    NUM_EPOCHS = args.epochs
    MODEL = args.model
    RANDOMIZE = args.no_randomize
    PRINT = args.print

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    logger = Logger('./logs/{}'.format(time.localtime()))

    if MODEL:
        print("Loading pretrained model...")
        model = torch.load(MODEL)
        model.load_state_dict(torch.load(MODEL).state_dict())
    else:
        model = cdssm.CDSSM()
        model = model.cuda()
        model = model.to(device)

    # model = cdssm.CDSSM()
    # model = model.cuda()
    # model = model.to(device)

    if torch.cuda.device_count() > 0:
        print("Let's use", torch.cuda.device_count(), "GPU(s)!")
        model = nn.DataParallel(model)

    print("Created model with {:,} parameters.".format(
        putils.count_parameters(model)))

    # if MODEL:
    # print("TEMPORARY change to loading!")
    # model.load_state_dict(torch.load(MODEL).state_dict())

    print("Created dataset...")

    # use an 80/20 train/validate split!
    train_size = int(len(train) * 0.80)
    #test = int(len(train) * 0.5)
    train_dataset = pytorch_data_loader.WikiDataset(
        train[:train_size],
        claims_dict,
        data_sampling=DATA_SAMPLING,
        sparse_evidences=sparse_evidences,
        randomize=RANDOMIZE)
    val_dataset = pytorch_data_loader.WikiDataset(
        train[train_size:],
        claims_dict,
        data_sampling=DATA_SAMPLING,
        sparse_evidences=sparse_evidences,
        randomize=RANDOMIZE)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  num_workers=0,
                                  shuffle=True,
                                  collate_fn=pytorch_data_loader.PadCollate())
    val_dataloader = DataLoader(val_dataset,
                                batch_size=BATCH_SIZE,
                                num_workers=0,
                                shuffle=True,
                                collate_fn=pytorch_data_loader.PadCollate())

    # Loss and optimizer
    criterion = torch.nn.NLLLoss()
    # criterion = torch.nn.SoftMarginLoss()
    # if torch.cuda.device_count() > 0:
    # print("Let's parallelize the backward pass...")
    # criterion = DataParallelCriterion(criterion)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=LEARNING_RATE,
                                 weight_decay=1e-3)

    OUTPUT_FREQ = max(int((len(train_dataset) / BATCH_SIZE) * 0.02), 20)
    parameters = {
        "batch size": BATCH_SIZE,
        "epochs": NUM_EPOCHS,
        "learning rate": LEARNING_RATE,
        "optimizer": optimizer.__class__.__name__,
        "loss": criterion.__class__.__name__,
        "training size": train_size,
        "data sampling rate": DATA_SAMPLING,
        "data": args.data,
        "sparse_evidences": args.sparse_evidences,
        "randomize": RANDOMIZE,
        "model": MODEL
    }
    experiment = Experiment(api_key="YLsW4AvRTYGxzdDqlWRGCOhee",
                            project_name="clsm",
                            workspace="moinnadeem")
    experiment.add_tag("train")
    experiment.log_asset("cdssm.py")
    experiment.log_dataset_info(name=args.data)
    experiment.log_parameters(parameters)

    model_checkpoint_dir = "models/saved_model"
    for key, value in parameters.items():
        if type(value) == str:
            value = value.replace("/", "-")
        if key != "model":
            model_checkpoint_dir += "_{}-{}".format(key.replace(" ", "_"),
                                                    value)

    print("Training...")
    beginning_time = time.time()
    best_loss = torch.tensor(float("inf"),
                             dtype=torch.float)  # begin loss at infinity

    for epoch in range(NUM_EPOCHS):
        beginning_time = time.time()
        mean_train_acc = 0.0
        train_running_loss = 0.0
        train_running_accuracy = 0.0
        model.train()
        experiment.log_current_epoch(epoch)

        with experiment.train():
            for train_batch_num, inputs in enumerate(train_dataloader):
                claims_tensors, claims_text, evidences_tensors, evidences_text, labels = inputs

                claims_tensors = claims_tensors.cuda()
                evidences_tensors = evidences_tensors.cuda()
                labels = labels.cuda()
                #claims = claims.to(device).float()
                #evidences = evidences.to(device).float()
                #labels = labels.to(device)

                y_pred = model(claims_tensors, evidences_tensors)

                y = (labels)
                # y = y.unsqueeze(0)
                # y = y.unsqueeze(0)
                # y_pred = parallel.gather(y_pred, 0)

                y_pred = y_pred.squeeze()
                # y = y.squeeze()

                loss = criterion(y_pred, torch.max(y, 1)[1])
                # loss = criterion(y_pred, y)

                y = y.float()
                binary_y = torch.max(y, 1)[1]
                binary_pred = torch.max(y_pred, 1)[1]
                accuracy = (binary_y == binary_pred).to("cuda")
                accuracy = accuracy.float()
                accuracy = accuracy.mean()
                train_running_accuracy += accuracy.item()
                mean_train_acc += accuracy.item()
                train_running_loss += loss.item()

                if PRINT:
                    for idx in range(len(y)):
                        print(
                            "Claim: {}, Evidence: {}, Prediction: {}, Label: {}"
                            .format(claims_text[0], evidences_text[idx],
                                    torch.exp(y_pred[idx]), y[idx]))

                if (train_batch_num %
                        OUTPUT_FREQ) == 0 and train_batch_num > 0:
                    elapsed_time = time.time() - beginning_time
                    binary_y = torch.max(y, 1)[1]
                    binary_pred = torch.max(y_pred, 1)[1]
                    print(
                        "[{}:{}:{:3f}s] training loss: {}, training accuracy: {}, training recall: {}"
                        .format(
                            epoch, train_batch_num /
                            (len(train_dataset) / BATCH_SIZE), elapsed_time,
                            train_running_loss / OUTPUT_FREQ,
                            train_running_accuracy / OUTPUT_FREQ,
                            recall_score(binary_y.cpu().detach().numpy(),
                                         binary_pred.cpu().detach().numpy())))

                    # 1. Log scalar values (scalar summary)
                    info = {
                        'train_loss': train_running_loss / OUTPUT_FREQ,
                        'train_accuracy': train_running_accuracy / OUTPUT_FREQ
                    }

                    for tag, value in info.items():
                        experiment.log_metric(tag,
                                              value,
                                              step=train_batch_num *
                                              (epoch + 1))
                        logger.scalar_summary(tag, value, train_batch_num + 1)

                    ## 2. Log values and gradients of the parameters (histogram summary)
                    for tag, value in model.named_parameters():
                        tag = tag.replace('.', '/')
                        logger.histo_summary(tag,
                                             value.detach().cpu().numpy(),
                                             train_batch_num + 1)
                        logger.histo_summary(tag + '/grad',
                                             value.grad.detach().cpu().numpy(),
                                             train_batch_num + 1)

                    train_running_loss = 0.0
                    beginning_time = time.time()
                    train_running_accuracy = 0.0
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # del loss
        # del accuracy
        # del claims_tensors
        # del claims_text
        # del evidences_tensors
        # del evidences_text
        # del labels
        # del y
        # del y_pred
        # torch.cuda.empty_cache()

        print("Running validation...")
        model.eval()
        pred = []
        true = []
        avg_loss = 0.0
        val_running_accuracy = 0.0
        val_running_loss = 0.0
        beginning_time = time.time()
        with experiment.validate():
            for val_batch_num, val_inputs in enumerate(val_dataloader):
                claims_tensors, claims_text, evidences_tensors, evidences_text, labels = val_inputs

                claims_tensors = claims_tensors.cuda()
                evidences_tensors = evidences_tensors.cuda()
                labels = labels.cuda()

                y_pred = model(claims_tensors, evidences_tensors)

                y = (labels)
                # y_pred = parallel.gather(y_pred, 0)

                y_pred = y_pred.squeeze()

                loss = criterion(y_pred, torch.max(y, 1)[1])

                y = y.float()

                binary_y = torch.max(y, 1)[1]
                binary_pred = torch.max(y_pred, 1)[1]
                true.extend(binary_y.tolist())
                pred.extend(binary_pred.tolist())

                accuracy = (binary_y == binary_pred).to("cuda")

                accuracy = accuracy.float().mean()
                val_running_accuracy += accuracy.item()
                val_running_loss += loss.item()
                avg_loss += loss.item()

                if (val_batch_num % OUTPUT_FREQ) == 0 and val_batch_num > 0:
                    elapsed_time = time.time() - beginning_time
                    print(
                        "[{}:{}:{:3f}s] validation loss: {}, accuracy: {}, recall: {}"
                        .format(
                            epoch,
                            val_batch_num / (len(val_dataset) / BATCH_SIZE),
                            elapsed_time, val_running_loss / OUTPUT_FREQ,
                            val_running_accuracy / OUTPUT_FREQ,
                            recall_score(binary_y.cpu().detach().numpy(),
                                         binary_pred.cpu().detach().numpy())))

                    # 1. Log scalar values (scalar summary)
                    info = {'val_accuracy': val_running_accuracy / OUTPUT_FREQ}

                    for tag, value in info.items():
                        experiment.log_metric(tag,
                                              value,
                                              step=val_batch_num * (epoch + 1))
                        logger.scalar_summary(tag, value, val_batch_num + 1)

                    ## 2. Log values and gradients of the parameters (histogram summary)
                    for tag, value in model.named_parameters():
                        tag = tag.replace('.', '/')
                        logger.histo_summary(tag,
                                             value.detach().cpu().numpy(),
                                             val_batch_num + 1)
                        logger.histo_summary(tag + '/grad',
                                             value.grad.detach().cpu().numpy(),
                                             val_batch_num + 1)

                    val_running_accuracy = 0.0
                    val_running_loss = 0.0
                    beginning_time = time.time()

        # del loss
        # del accuracy
        # del claims_tensors
        # del claims_text
        # del evidences_tensors
        # del evidences_text
        # del labels
        # del y
        # del y_pred
        # torch.cuda.empty_cache()

        accuracy = accuracy_score(true, pred)
        print("[{}] mean accuracy: {}, mean loss: {}".format(
            epoch, accuracy, avg_loss / len(val_dataloader)))

        true = np.array(true).astype("int")
        pred = np.array(pred).astype("int")
        print(classification_report(true, pred))

        best_loss = torch.tensor(
            min(avg_loss / len(val_dataloader),
                best_loss.cpu().numpy()))
        is_best = bool((avg_loss / len(val_dataloader)) <= best_loss)

        putils.save_checkpoint(
            {
                "epoch": epoch,
                "model": model,
                "best_loss": best_loss
            },
            is_best,
            filename="{}_loss_{}".format(model_checkpoint_dir,
                                         best_loss.cpu().numpy()))

Пример #3

Показать файл

Файл: light_gbm_start.py Проект: gdreiman1/thesis

def comet_lgbm(save_path):
    from comet_ml import Experiment
    exp = Experiment(api_key="sqMrI9jc8kzJYobRXRuptF5Tj",
                            project_name="baseline", workspace="gdreiman1")
    exp.log_code = True
    
    import pickle
    import pandas as pd
    import lightgbm as lgb
    import numpy as np
    import sklearn
    import matplotlib.pyplot as plt
    from sklearn.metrics import precision_recall_fscore_support as prf
    #%%
    def single_roc(y_preds,y_true):
        
        from sklearn.metrics import roc_curve, auc,precision_recall_curve
        fpr, tpr, _ = roc_curve(y_true, y_preds)
        roc_auc = auc(fpr, tpr)
        plt.figure()
        lw = 2
        plt.plot(fpr, tpr, color='darkorange',
                 lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
        plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic example')
        
        precision, recall, thresholds = precision_recall_curve(y_true, y_preds)
        plt.plot(recall, precision, color='blue',
                 lw=lw, label='Precision vs Recall')
        # show the plot
        plt.legend(loc="lower right")
        plt.show()
    def multi_roc(y_preds,y_true,name,n_classes):
        import collections
        nested_dict = lambda: collections.defaultdict(nested_dict)
        data_store = nested_dict()
        from sklearn.metrics import roc_curve, auc
        from scipy import interp
        from itertools import cycle
        lw = 2
        name_store = ['Active', 'Inactive', 'Inconclusive']
        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_preds[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
        
        # Compute micro-average ROC curve and ROC area
        fpr["micro"], tpr["micro"], _ = roc_curve(y_true[:, i].ravel(), y_preds[:, i].ravel())
        roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
        # Compute macro-average ROC curve and ROC area
        
        # First aggregate all false positive rates
        all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
        
        # Then interpolate all ROC curves at this points
        mean_tpr = np.zeros_like(all_fpr)
        for i in range(n_classes):
            mean_tpr += interp(all_fpr, fpr[i], tpr[i])
        
        # Finally average it and compute AUC
        mean_tpr /= n_classes
        
        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
        
        # Plot all ROC curves
        plt.figure()
        plt.plot(fpr["micro"], tpr["micro"],
                 label='micro-average ROC curve (area = {0:0.2f})'
                       ''.format(roc_auc["micro"]),
                 color='deeppink', linestyle=':', linewidth=4)
        
        plt.plot(fpr["macro"], tpr["macro"],
                 label='macro-average ROC curve (area = {0:0.2f})'
                       ''.format(roc_auc["macro"]),
                 color='navy', linestyle=':', linewidth=4)
        
        colors = cycle(['aqua', 'darkorange', 'cornflowerblue','green'])
        for i, color in zip(range(n_classes), colors):
            plt.plot(fpr[i], tpr[i], color=color, lw=lw,
                     label='ROC curve of '+ name_store[i]+'(area = {1:0.2f})'
                     ''.format(i, roc_auc[i]))
        
        plt.plot([0, 1], [0, 1], 'k--', lw=lw)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        #plt.title('Multi-class ROC for '+name+' Split= '+str(count+1))
        plt.title('Multi-class ROC for '+name)
    
        plt.legend(loc="lower right")
        #plt.show()
    #%%
    #save_path = r'C:\Users\gdrei\Dropbox\UCL\Thesis\May_13\AID_1345083_processed.pkl'
    model_type = 'lgbm'
    #get data cleaned
    pickle_off = open(save_path,'rb')
    activity_table=pickle.load(pickle_off)
    pickle_off.close()
    #get length of MFP
    fp_length = len(activity_table.iloc[5]['MFP'])
    
    
    from sklearn.preprocessing import StandardScaler, LabelEncoder
    scaler = StandardScaler(copy = False)
    le = LabelEncoder()
    labels = le.fit_transform(activity_table['PUBCHEM_ACTIVITY_OUTCOME'])
    #split data:
    from sklearn.model_selection import StratifiedShuffleSplit
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.5, train_size=None, random_state=2562)
    X_mfp = np.concatenate(np.array(activity_table['MFP'])).ravel()
    X_mfp = X_mfp.reshape((-1,fp_length))
    for train_ind, test_ind in splitter.split(X_mfp,labels):
        # standardize data
        X_train_molchars_std = scaler.fit_transform(np.array(activity_table.iloc[train_ind,4:]))
        X_test_molchars_std = scaler.transform(np.array(activity_table.iloc[test_ind,4:]))
        X_train = np.concatenate((X_mfp[train_ind,:],X_train_molchars_std),axis = 1)
        X_test = np.concatenate((X_mfp[test_ind,:],X_test_molchars_std),axis = 1)
        y_train = labels[train_ind]
        y_test = labels[test_ind]
        #X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X,labels,test_size = .5, shuffle = True, stratify = labels, random_state = 2562)
        bin_y_train, bin_y_test = [1 if x ==2 else x for x in y_train],[1 if x ==2 else x for x in y_test]
        
    #do light gbm
        
    #need to make a lib svm file
    train_data = lgb.Dataset(X_train,label=y_train)
    test_data = lgb.Dataset(X_test,label=y_test)
    #make model class
    lgbm_model = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=500, subsample_for_bin=200000, 
                                    objective='binary', is_unbalance=True, min_split_gain=0.0, min_child_weight=0.001, min_child_samples=20, subsample=1.0, 
                                    subsample_freq=0, colsample_bytree=1.0, reg_alpha=0.0, reg_lambda=0.0, random_state=None, n_jobs=-1, silent=True, 
                                    importance_type='split')
    #train model
    trained_mod = lgbm_model.fit(X_train,y_train)
    #predict classes and class_probs
    test_class_preds = lgbm_model.predict(X_test)
    test_prob_preds = lgbm_model.predict_proba(X_test)
    #calculate Class report
    class_rep = sklearn.metrics.classification_report(y_test,test_class_preds)
    
    print(class_rep)
    if len(set(y_test)) == 2:
        single_roc(test_prob_preds[:,1],y_test)
        prec,rec,f_1,supp = prf(y_test, test_class_preds, average=None)
    else:
        from tensorflow.keras.utils import to_categorical
        multi_roc(test_prob_preds,to_categorical(y_test),'',3)
        prec,rec,f_1,supp = prf(y_test, test_class_preds, average=None)
    
    
     #%% 
    '''Comet Saving Zone'''
    #get AID number
    import ntpath
    #get base file name
    folder,base = ntpath.split(save_path)
    #split file name at second _ assumes file save in AID_xxx_endinfo.pkl
    AID, _,end_info = base.rpartition('_')
    #save data location, AID info, and version info
    exp.log_dataset_info(name = AID, version = end_info, path = save_path)
    #save model params
    exp.log_parameters(trained_mod.get_params())
    #save metrics report to comet
    if len(f_1) == 2:
        for i,name in enumerate(['Active','Inactive']):
            exp.log_metric('f1 class '+name, f_1[i])
            exp.log_metric('Recall class'+name,rec[i])
            exp.log_metric('Precision class'+name, prec[i])
    else:
        for i,name in enumerate(['Active','Inconclusive','Inactive']):
            exp.log_metric('f1 class '+str(i), f_1[i])
            exp.log_metric('Recall class'+str(i),rec[i])
            exp.log_metric('Precision class'+str(i), prec[i])
        #exp.log_metric('f1 class '+str(i), f_1[i])
        #exp.log_metric('Recall class'+str(i),rec[i])
        #exp.log_metric('Precision class'+str(i), prec[i])
    exp.log_other('Classification Report',class_rep)
     #save model in data_folder with comet experiement number associated
    exp_num = exp.get_key()
    model_save = folder+'\\'+model_type+'_'+exp_num+'.pkl'
    pickle_on = open(model_save,'wb')
    pickle.dump(trained_mod,pickle_on)
    pickle_on.close()
    #log trained model location
    exp.log_other('Trained Model Path',model_save)
    #save some informatvie tags:
    tags = [AID,end_info,model_type]
    exp.add_tags(tags)
    #save ROC curve
    exp.log_figure(figure_name = 'ROC-Pres/Recall',figure=plt)
    plt.show()

    #tell comet that the experiement is over
    exp.end()

Пример #4

Показать файл

Файл: train_pinet.py Проект: noticeable/lane_detector

def training(dataset: str, log_comet_ml=False):
    print('Train PINet')

    # ------------------------------------------------------------
    # 1. get parameters
    network_params = NetworkParameters()
    training_params = TrainingParameters()

    # ------------------------------------------------------------
    # 2. build dataset -> output sample dict
    dataset_params = get_parameters(dataset)
    train_dataset, val_dataset = build_dataset(dataset_params)
    validate_fn = build_validate_fn(dataset_params)

    # ------------------------------------------------------------
    # 3. build dataloader -> output batch tensor
    train_generator = None
    if train_dataset is not None:
        train_generator = build_dataloader(train_dataset,
                                           training_params.batch_size)

    # ------------------------------------------------------------
    # 4. define network
    print('Get model, train from sketch')
    model = PINet()
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")  # start from gpu_0
    model.to(device)

    # ------------------------------------------------------------
    # 5. associate network model to a training instance
    # ToDo: resume training
    lane_detector = TrainerLaneDetector(model, network_params, training_params)
    # lane_detector.load_weights(p.model_path)
    lane_detector.training_mode()

    # ------------------------------------------------------------
    # 6. define logging enviroment
    if log_comet_ml:
        print('Logging with comet_ml')
        experiment = Experiment(api_key="7XBXyqF4ctwO6HnnHnBGuv7U3",
                                project_name="lanedetection",
                                workspace="masszhou")
        experiment.log_parameters(vars(network_params))
        experiment.log_parameters(vars(training_params))
        experiment.log_parameters(vars(dataset_params))
        experiment.log_dataset_info(name=dataset)
    else:
        experiment = None

    # ------------------------------------------------------------
    # 7. start training phase
    step = 0
    timestr = time.strftime("%Y%m%d-%H%M%S")
    for epoch in range(training_params.num_epochs):
        pbar = tqdm(total=len(train_dataset) / training_params.batch_size)
        loss_p = -1.0
        for batch in train_generator:
            # inputs -> ndarray[#batch, 3, 256, 512]
            # target_lanes -> List[ndarray] e.g. [[4, 48],..., ]
            # target_h -> List[ndarray] e.g. [[4, 48],..., ]
            # test_image -> ndarray [3, 256, 512]
            loss_p, metrics, outputs = lane_detector.train(batch,
                                                           epoch=epoch,
                                                           step=step)

            # parse confidence map from result
            outputs_last_block = outputs[-1]
            confidance, _, _ = outputs_last_block  # [8, 1, 32, 64]
            confidance = confidance[0].cpu().data.numpy()
            confidance = confidance.transpose((1, 2, 0))

            if log_comet_ml:
                experiment.log_metric("train total loss", loss_p)
                experiment.log_metric("learning rate", lane_detector.get_lr())
                experiment.log_metrics(metrics)
                if step % 500 == 0:
                    # log confidence output from first image in batch
                    experiment.log_image(confidance,
                                         name="image id:{}".format(
                                             batch["image_id"][0]))

            pbar.set_description(f'epoch {epoch}')
            pbar.set_postfix(total_loss=loss_p)
            pbar.update()
            step += 1
        pbar.close()

        # save model per epoch
        file_name = f"./tmp/{timestr}_epoch-{epoch}_totalstep-{step}_loss-{loss_p:.2f}.pth"
        lane_detector.save_model_v2(file_name)

        # if epoch % 10 == 0:
        if val_dataset is not None:
            file_name = f"./tmp/{timestr}_epoch-{epoch}_validation.json"
            scores = validate_fn(dataset=val_dataset,
                                 net=lane_detector,
                                 validate_file_name=file_name,
                                 logger=experiment)