X_train = train_df[args.features.split()]
    X_test = test_df[args.features.split()]
    y_train = train_df[args.target]
    y_test = test_df[args.target]

    # set remote mlflow server
    mlflow.set_tracking_uri(args.tracking_uri)
    mlflow.set_experiment(args.experiment_name)

    with mlflow.start_run():
        params = {
            "n-estimators": args.n_estimators,
            "min-samples-leaf": args.min_samples_leaf,
            "features": args.features
        }
        mlflow.log_params(params)

        # TRAIN
        logging.info('training model')
        model = RandomForestRegressor(n_estimators=args.n_estimators,
                                      min_samples_leaf=args.min_samples_leaf,
                                      n_jobs=-1)

        model.fit(X_train, y_train)

        # ABS ERROR AND LOG COUPLE PERF METRICS
        logging.info('evaluating model')
        abs_err = np.abs(model.predict(X_test) - y_test)

        for q in [10, 50, 90]:
            logging.info(
Пример #2
0
                                 shuffle=False,
                                 collate_fn=pad_sequences,
                                 drop_last=False)
    else:
        test_dataset = None
        test_loader = None

    mlflow.set_experiment(f"diplodatos.{args.language}")

    with mlflow.start_run():
        logging.info("Starting experiment")
        # Log all relevent hyperparameters
        mlflow.log_params({
            "model_type": "CNN",
            "embeddings": args.pretrained_embeddings,
            "dropout": args.dropout,
            "embeddings_size": args.embeddings_size,
            "epochs": args.epochs
        })
        device = torch.device(
            "cuda") if torch.cuda.is_available() else torch.device("cpu")

        logging.info("Building classifier")
        model = CNNClassifier(
            pretrained_embeddings_path=args.pretrained_embeddings,
            token_to_index=args.token_to_index,
            n_labels=train_dataset.n_labels,
            dropout=args.dropout,
            vector_size=args.embeddings_size,
            freeze_embedings=True,
            FILTERS_LENGTH=[2, 3, 4, 5],
Пример #3
0
                                 shuffle=False,
                                 collate_fn=pad_sequences,
                                 drop_last=False)
    else:
        test_dataset = None
        test_loader = None

    mlflow.set_experiment(f"diplodatos.{args.language}")

    with mlflow.start_run():
        logging.info("Starting experiment")
        # Log all relevent hyperparameters
        mlflow.log_params({
            "model_type": "Multilayer Perceptron",
            "embeddings": args.pretrained_embeddings,
            "hidden_layers": args.hidden_layers,
            "dropout": args.dropout,
            "embeddings_size": args.embeddings_size,
            "epochs": args.epochs
        })
        device = torch.device(
            "cuda") if torch.cuda.is_available() else torch.device("cpu")

        logging.info("Building classifier")
        model = MLPClassifier(
            pretrained_embeddings_path=args.pretrained_embeddings,
            token_to_index=args.token_to_index,
            n_labels=train_dataset.n_labels,
            hidden_layers=args.hidden_layers,
            dropout=args.dropout,
            vector_size=args.embeddings_size,
            freeze_embedings=True  # This can be a hyperparameter
Пример #4
0
 def log_params(cls, params):
     mlflow.log_params(params)
Пример #5
0
def train(args, yml_config):
    with strategy.scope():

        # @title Load tensorflow datasets: we use tensorflow flower dataset as an examplegit
        batch_size = yml_config['finetuning']['batch']
        buffer_size = yml_config['finetuning']['buffer_size']

        # @title Load tensorflow datasets: we use tensorflow flower dataset as an example
        dataset_name = yml_config['data_src']

        if dataset_name == 'tf_flowers':
            tfds_dataset, tfds_info = tfds.load(dataset_name,
                                                split='train',
                                                with_info=True)
            num_images = tfds_info.splits['train'].num_examples
            num_classes = tfds_info.features['label'].num_classes

            x = tfds_dataset.map(_preprocess).batch(batch_size)
            x = tf1.data.make_one_shot_iterator(x).get_next()

        elif dataset_name == 'chest_xray':
            if args.xray_path == '':
                data_path = yml_config['dataset']['chest_xray']
            else:
                data_path = args.xray_path
            train_dataset, tfds_info = chest_xray.XRayDataSet(data_path,
                                                              config=None,
                                                              train=True)
            num_images = np.floor(
                yml_config['finetuning']['train_data_ratio'] *
                tfds_info['num_examples'])
            num_classes = tfds_info['num_classes']

        print(f"Training: {num_images} images...")

        def _preprocess(x):
            x['image'] = preprocess_image(x['image'],
                                          224,
                                          224,
                                          is_training=False,
                                          color_distort=False)
            return x

        x_ds = train_dataset \
            .take(num_images) \
            .map(_preprocess, deterministic=False) \
            .shuffle(buffer_size)\
            .batch(yml_config['finetuning']['batch'])

        x_iter = tf1.data.make_one_shot_iterator(x_ds)
        x_init = x_iter.make_initializer(x_ds)
        x = x_iter.get_next()

        print(f"{type(x)} {type(x['image'])} {x['image']} {x['label']}")
        # @title Load module and construct the computation graph
        learning_rate = yml_config['finetuning']['learning_rate']
        momentum = yml_config['finetuning']['momentum']
        weight_decay = yml_config['finetuning']['weight_decay']
        epoch_save_step = yml_config['finetuning']['epoch_save_step']
        load_saver = yml_config['finetuning'].get('load_ckpt')

        # Load the base network and set it to non-trainable (for speedup fine-tuning)
        hub_path = str(
            Path(yml_config['finetuning']['pretrained_build']).resolve())
        hub_path = os.path.join(hub_path, 'hub')
        module = hub.Module(hub_path,
                            trainable=yml_config['finetuning']['train_resnet'])

        if yml_config['finetuning']['pretrained_model'] == 'ChestXRay':
            key = module(inputs=x['image'],
                         signature="projection-head-1",
                         as_dict=True)
        else:
            key = module(inputs=x['image'], as_dict=True)

        # Attach a trainable linear layer to adapt for the new task.
        if dataset_name == 'tf_flowers':
            with tf1.variable_scope('head_supervised_new',
                                    reuse=tf1.AUTO_REUSE):
                logits_t = tf1.layers.dense(inputs=key['default'],
                                            units=num_classes,
                                            name='proj_head')
            loss_t = tf1.reduce_mean(
                input_tensor=tf1.nn.softmax_cross_entropy_with_logits(
                    labels=tf1.one_hot(x['label'], num_classes),
                    logits=logits_t))
        elif dataset_name == 'chest_xray':
            with tf1.variable_scope('head_supervised_new',
                                    reuse=tf1.AUTO_REUSE):
                logits_t = tf1.layers.dense(inputs=key['default'],
                                            units=num_classes)
                cross_entropy = weighted_cel(labels=x['label'],
                                             logits=logits_t,
                                             bound=3.0)
                loss_t = tf1.reduce_mean(tf1.reduce_sum(cross_entropy, axis=1))

        # Setup optimizer and training op.
        if yml_config['finetuning']['optimizer'] == 'adam':
            optimizer = tf1.train.AdamOptimizer(learning_rate)
        elif yml_config['finetuning']['optimizer'] == 'lars':
            optimizer = LARSOptimizer(learning_rate,
                                      momentum=momentum,
                                      weight_decay=weight_decay,
                                      exclude_from_weight_decay=[
                                          'batch_normalization', 'bias',
                                          'head_supervised'
                                      ])
        else:
            raise RuntimeError("Optimizer not supported")

        variables_to_train = tf1.trainable_variables()
        train_op = optimizer.minimize(
            loss_t,
            global_step=tf1.train.get_or_create_global_step(),
            var_list=variables_to_train)

        print('Variables to train:', variables_to_train)

        # Add ops to save and restore all the variables.
        sess = tf1.Session()
        Saver = tf1.train.Saver()  # Default saves all variables
        current_time = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        directory = Path(args.output_dir)

        is_time_to_save_session = partial(model_ckpt.save_session,
                                          epoch_save_step,
                                          Saver,
                                          output=directory)
        if load_saver is not None:
            Saver.restore(sess, load_saver)
        else:
            sess.run(tf1.global_variables_initializer())

        # @title We fine-tune the new *linear layer* for just a few iterations.
        epochs = yml_config['finetuning']['epochs']

        # ===============Tensor board section ===============
        # with tf.name_scope('performance'):
        # tf_labels = tf1.placeholder(tf.int32, shape=[batch_size,num_classes], name='accuracy')
        tf_tot_acc_all_ph = tf1.placeholder(tf.float32,
                                            shape=None,
                                            name='accuracy_all_labels_ph')
        tf_tot_acc_all_summary = tf1.summary.scalar('accuracy_all_labels',
                                                    tf_tot_acc_all_ph)
        tf_tot_acc_per_class_ph = tf1.placeholder(tf.float32,
                                                  shape=None,
                                                  name='accuracy_per_class_ph')
        tf_tot_acc_per_class_summary = tf1.summary.scalar(
            'accuracy_per_class', tf_tot_acc_per_class_ph)
        tf_tot_acc_class_avg_ph = tf1.placeholder(
            tf.float32, shape=None, name='accuracy_per_class_averaged_ph')
        tf_tot_acc_class_avg_summary = tf1.summary.scalar(
            'accuracy_per_class_averaged', tf_tot_acc_class_avg_ph)
        tf_train_tot_loss_ph = tf1.placeholder(tf.float32,
                                               shape=None,
                                               name='train_tot_loss')
        tf_train_tot_loss_summary = tf1.summary.scalar('train_tot_loss',
                                                       tf_train_tot_loss_ph)
        tf_tot_auc_ph = tf1.placeholder(tf.float32, shape=None, name='auc_ph')
        tf_tot_auc_ph_summary = tf1.summary.scalar('auc', tf_tot_auc_ph)

        performance_summaries = tf1.summary.merge([
            tf_tot_acc_all_summary, tf_tot_acc_class_avg_summary,
            tf_train_tot_loss_summary, tf_tot_auc_ph_summary
        ])

        hyper_param = []
        print(
            f"yml_config[pretrained_build]= {yml_config['finetuning']['pretrained_build']} "
        )
        for item in yml_config['finetuning']:
            hyper_param.append(
                tf1.summary.text(
                    str(item),
                    tf.constant(str(yml_config['finetuning'][item])),
                    'HyperParam'))

        summ_writer = tf1.summary.FileWriter(directory / 'tb', sess.graph)
        tf.summary.record_if(yml_config['tensorboard'])
        # Limit the precision of floats...
        np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
        with sess.as_default() as scope:
            if yml_config['mlflow']:

                # log params in MLFLOW
                if args.mlflow_dir is None:
                    mlflow.set_tracking_uri(yml_config['mlflow_path'])
                else:
                    mlflow.set_tracking_uri(args.mlflow_dir)

                mlflow.set_experiment('results')
                mlflow.start_run()
                # open pickle file that contains the hyper params of pretuned
                fname = os.path.join(
                    yml_config['finetuning']['pretrained_build'],
                    'experiment_flags.p')
                if os.path.exists(fname):
                    with open(fname, 'rb') as f:
                        pretuned_params = pickle.load(f)
                    pretuned_params = {
                        'P-' + str(key).replace('/', '').replace(
                            '?', '').replace('$', ''): val
                        for key, val in pretuned_params.items()
                    }
                    mlflow.log_params(pretuned_params)

                # open pickle file that contains the hyper params of pretuned
                fname = os.path.join(
                    yml_config['finetuning']['pretrained_build'],
                    'mAP_result.p')
                if os.path.exists(fname):
                    with open(fname, 'rb') as f:
                        pretuned_metric = pickle.load(f)
                    mlflow.log_metrics(pretuned_metric)

                finetuned_params = {
                    'F-' + str(key).replace('/', ''): val
                    for key, val in yml_config['finetuning'].items()
                }

                mlflow.log_param('TB_Timestamp', current_time)

                mlflow.log_params(finetuned_params)

            fname = os.path.join(directory, 'finetuning_hyper_params.txt')
            with open(fname, 'w') as f:
                for key, value in yml_config['finetuning'].items():
                    f.write('%s:%s\n' % (key, value))

            writer = tf1.summary.FileWriter('./log', sess.graph)
            for index, summary_op in enumerate(hyper_param):
                text = sess.run(summary_op)
                summ_writer.add_summary(text, index)

            n_iter = int(num_images / batch_size)
            print(f"Batch:{batch_size}, n_iter:{n_iter} ")

            # =============== Main Loop (epoch) - START ===============
            for it in range(epochs):
                start_time_epoch = time.time()
                # Init dataset iterator
                sess.run(x_init)
                # Accuracy all = All class must be Correct
                # Accuracy per class = Score for each class
                # Accuracy class average: the average of the accuracy per class
                tot_acc_all = 0.0
                tot_acc_per_class = 0.0
                tot_acc_class_avg = 0.0
                train_tot_loss = 0.0
                epoch_acc_all = 0.0
                epoch_acc_per_class = 0.0
                epoch_acc_class_avg = 0.0
                #show_one_image(x['image'][0].eval())

                # =============== Main Loop (iteration) - START ===============
                all_labels = []
                all_logits = []
                for step in range(n_iter):

                    start_time_iter = time.time()
                    _, loss, image, logits, labels = sess.run(
                        fetches=(train_op, loss_t, x['image'], logits_t,
                                 x['label']))
                    # tf_labels = tf.convert_to_tensor(labels)
                    train_tot_loss += loss
                    all_labels.extend(labels)
                    if dataset_name == 'tf_flowers':
                        pred = logits.argmax(-1)
                        correct = np.sum(pred == labels)
                        acc_per_class = np.array([correct / float(batch_size)])
                    elif dataset_name == 'chest_xray':
                        # # New compute
                        logits_sig = scipy.special.expit(logits)
                        all_logits.extend(logits_sig)
                        pred = (logits_sig > 0.5).astype(np.float32)
                        acc_all = np.mean(
                            np.min(np.equal(pred, labels).astype(np.float32),
                                   axis=1))
                        acc_per_class = np.mean(np.equal(pred, labels).astype(
                            np.float32),
                                                axis=0)
                        acc_class_avg = np.mean(acc_per_class)
                        tot_acc_all += acc_all
                        tot_acc_per_class += acc_per_class
                        tot_acc_class_avg += acc_class_avg

                    #The function roc_auc_score can result in a error (ValueError: Only one class present in y_true.
                    # ROC AUC score is not defined in that) . The error occurred when each label has only one class
                    # in the batch. For example, if all the samples in the batch has hernia +1, the error will occurred.I
                    try:
                        auc_cum = roc_auc_score(np.array(all_labels),
                                                np.array(all_logits))
                    except:
                        auc_cum = None

                    current_time_iter = time.time()
                    elapsed_time_iter = current_time_iter - start_time_iter

                    if yml_config['finetuning']['verbose_train_loop']:
                        print(
                            f"[Epoch {it + 1}/{epochs} Iter: {step}/{n_iter}] Model: {yml_config['finetuning']['pretrained_model']}, Total Loss: {train_tot_loss} Loss: {np.float32(loss)}"  # Batch Acc: {np.float32(acc_all)} "
                            f" AUC Cumulative: {auc_cum}")
                        print(f"Finished iteration:{step} in: " +
                              str(int(elapsed_time_iter)) + " sec")

                    # break if logits explose
                    if np.isnan(np.sum(logits)):
                        print(f"Loss has exploded: Nan")
                        break

                epoch_acc_all = (tot_acc_all / n_iter)
                epoch_acc_per_class = (tot_acc_per_class / n_iter)
                epoch_acc_class_avg = (tot_acc_class_avg / n_iter)

                try:
                    epoch_auc = roc_auc_score(np.array(all_labels),
                                              np.array(all_logits),
                                              average=None)
                    epoch_auc_mean = epoch_auc.mean()
                    aucs = dict(zip(chest_xray.XR_LABELS.keys(), epoch_auc))
                    auc_scores = {
                        'AUC ' + str(key): val
                        for key, val in aucs.items()
                    }

                except:
                    epoch_auc = None
                    epoch_auc_mean = None

                print(
                    f"[Epoch {it + 1}/{epochs} Model: {yml_config['finetuning']['pretrained_model']}, Loss: {train_tot_loss} "
                    f" Train AUC: {epoch_auc_mean} AOC/Class {epoch_auc},")

                # Is it time to save the session?
                is_time_to_save_session(it, sess)

                current_time_epoch = time.time()
                elapsed_time_iter = current_time_epoch - start_time_epoch
                print(f"Finished EPOCH:{it + 1} in: " +
                      str(int(elapsed_time_iter)) + " sec")

                # ===================== Write Tensorboard summary ===============================
                # Execute the summaries defined above

                summ = sess.run(performance_summaries,
                                feed_dict={
                                    tf_tot_acc_all_ph: epoch_acc_all,
                                    tf_tot_acc_class_avg_ph:
                                    epoch_acc_class_avg,
                                    tf_train_tot_loss_ph: train_tot_loss,
                                    tf_tot_auc_ph: epoch_auc_mean
                                })

                # Write the obtained summaries to the file, so it can be displayed in the TensorBoard
                summ_writer.add_summary(summ, it)

                # =============== Main Loop (epoch) - END ===============

            print(f"Training Done")

            if yml_config['mlflow']:
                mlflow.log_metric('Total Train Accuracy', epoch_acc_all)
                mlflow.log_metric('Total Train Accuracy per class',
                                  np.mean(epoch_acc_per_class))
                mlflow.log_metric('Total Train Loss', train_tot_loss)
                if epoch_auc is not None:
                    mlflow.log_metrics(auc_scores)

            fname_final = str(directory / f'final.ckpt')
            ckpt_pt = Saver.save(sess=sess, save_path=fname_final)
            print(f"Final Chekpoint Saved in {fname_final}")
            return directory
Пример #6
0
                solver=config.get('hyperparams').get('log_reg').get('solver'),
                random_state=config.get('random_state'),
                max_iter=config.get('hyperparams').get('log_reg').get('max_iter'),
                tol=config.get('hyperparams').get('log_reg').get('tol')
            )
        )
        param_grid = {
            'model__penalty': config.get('hyperparams').get('log_reg').get('penalty'),
            'model__C': config.get('hyperparams').get('log_reg').get('C')
        }
        gs_clf = GridSearchCV(
            pipeline, param_grid=param_grid,
            scoring=config.get('scoring'), cv=config.get('k'), verbose=1
        )
        gs_clf.fit(X_train, y_train)
        mlflow.log_params(gs_clf.best_params_)
        mlflow.log_metrics({
                '_'.join([config.get('scoring'), 'train']): gs_clf.score(X_train, y_train),
                '_'.join([config.get('scoring'), 'test']): gs_clf.score(X_test, y_test)
        })

        print('Plotting ROC curve...')
        img_fn = 'roc_{}.png'.format(RUN_NAME)
        plot_roc_curve(gs_clf, X_test, y_test, img_fn)
        
        # print('Plotting learning curve...')
        # img_fn = 'learning_curve_{}.png'.format(RUN_NAME)
        # plot_learning_curve(gs_clf, X_train, y_train, img_fn)

        print('Recording hyperparameters used...')
        txt_fn = 'hyperparams_{}.txt'.format(RUN_NAME)
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--output_dir",
        default="output",
        type=str,
        help="The output directory where the model predictions and checkpoints "
        "will be written.")

    parser.add_argument("--bert_embeddings",
                        action='store_true',
                        help="Whether to use roberta embeddings.")
    parser.add_argument("--train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--eval",
                        action='store_true',
                        help="Whether to run eval on the test set.")
    parser.add_argument("--dataset_name", type=str)
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=32,
        help="Number of updates steps to accumulate before performing a "
        "backward/update pass.")
    parser.add_argument("--weight_decay",
                        default=1e-5,
                        type=float,
                        help="Weight deay if we apply some.")
    parser.add_argument("--dataloader_workers", default=16, type=int)
    parser.add_argument("--num_epochs",
                        default=40,
                        type=int,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument("--batch_size",
                        default=128,
                        type=int,
                        help="Batch size per GPU/CPU.")
    parser.add_argument("--batch_size_eval",
                        default=256,
                        type=int,
                        help="Batch size per GPU/CPU.")
    parser.add_argument("--run_name", type=str, help="name of the mlflow run")
    parser.add_argument("--overwrite_output_dir",
                        action='store_true',
                        help="Overwrite the content of the output directory")

    args = parser.parse_args()
    mlflow.set_experiment("article2image")
    mlflow.start_run(run_name=args.run_name)
    mlflow.log_params(vars(args))

    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir) and not args.overwrite_output_dir and args.train:
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to "
            "overcome.".format(args.output_dir))

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    best_model_img = os.path.join(args.output_dir, "best_model_img.model")
    best_model_text = os.path.join(args.output_dir, "best_model_text.model")

    set_seed(args.seed)
    n_gpu = torch.cuda.device_count()
    batch_size = args.batch_size * max(1, n_gpu)
    batch_size_eval = args.batch_size * max(1, n_gpu)

    test_dataloader = torch.utils.data.DataLoader(dataset=MyDataset(
        'val', args.dataset_name, args.bert_embeddings),
                                                  batch_size=batch_size_eval,
                                                  shuffle=False,
                                                  num_workers=4)
    input_size = 768 if args.bert_embeddings else 512
    img_model = torch.nn.DataParallel(ImageProjectModel()).cuda()
    text_model = torch.nn.DataParallel(TextProjectModel(input_size)).cuda()
    if args.train:
        train_dataloader = torch.utils.data.DataLoader(
            dataset=MyDataset('train', args.dataset_name,
                              args.bert_embeddings),
            batch_size=batch_size,
            shuffle=True,
            num_workers=args.dataloader_workers)

        optimizer = torch.optim.Adam(params=itertools.chain(
            img_model.parameters(), text_model.parameters()),
                                     lr=args.learning_rate,
                                     weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               mode='min',
                                                               verbose=True,
                                                               patience=5)
        itr = 0
        triplet_loss = TripletLoss()
        best_loss = sys.maxsize
        for e in tqdm(range(1, args.num_epochs + 1), ascii=True, desc='Epoch'):
            img_model.train()
            text_model.train()
            with tqdm(total=len(train_dataloader),
                      ascii=True,
                      leave=False,
                      desc='iter') as pbar:
                for i, (images, articles_ids,
                        articles_mask) in enumerate(train_dataloader):
                    itr += 1

                    image_projections = img_model(images.float().cuda())
                    article_projections = text_model(articles_ids.cuda(),
                                                     articles_mask.cuda())
                    loss = triplet_loss(image_projections, article_projections)
                    if args.gradient_accumulation_steps > 1:
                        loss = loss / args.gradient_accumulation_steps

                    if (i + 1) % args.gradient_accumulation_steps == 0:
                        loss.backward()
                        optimizer.step()
                        optimizer.zero_grad()
                    if itr % 100 == 0:
                        mlflow.log_metric(
                            "training loss",
                            loss.item() / max((len(image_projections) *
                                               len(article_projections) -
                                               len(image_projections)), 1),
                            itr)

                    pbar.update()
            img_model.eval()
            text_model.eval()
            losses = []
            with tqdm(total=len(test_dataloader),
                      ascii=True,
                      leave=False,
                      desc='eval') as pbar, torch.no_grad():
                for i, (images, articles_ids,
                        articles_mask) in enumerate(test_dataloader):
                    with torch.no_grad():
                        image_projections = img_model(images.float().cuda())
                        article_projections = text_model(
                            articles_ids.cuda(), articles_mask.cuda())
                        loss = triplet_loss(image_projections,
                                            article_projections)
                        if args.gradient_accumulation_steps > 1:
                            loss = loss / args.gradient_accumulation_steps

                    losses.append(loss.item() / max(
                        (len(image_projections) * len(article_projections) -
                         len(image_projections)), 1))

                    pbar.update()
            test_loss = np.mean(losses)
            mlflow.log_metric("test loss", test_loss, e)
            scheduler.step(test_loss)
            # save only the best model
            if test_loss < best_loss:
                best_loss = test_loss
                if os.path.exists(best_model_img):
                    os.remove(best_model_img)
                if os.path.exists(best_model_text):
                    os.remove(best_model_text)
                torch.save(img_model.state_dict(), best_model_img)
                torch.save(text_model.state_dict(), best_model_text)

    if args.eval:
        img_model.load_state_dict(torch.load(best_model_img))
        text_model.load_state_dict(torch.load(best_model_text))
        embeddings_img, embeddings_cap = compute_embeddings(
            img_model, text_model, test_dataloader, batch_size_eval)
        recall = t2i(embeddings_img, embeddings_cap)
        avg_recall = (recall[0] + recall[1] + recall[2]) / 3
        print("Average t2i Recall: %.1f" % avg_recall)
        print("Text to image: %.1f %.1f %.1f %.1f %.1f" % recall)
        mlflow.log_metric("R1", recall[0])
        mlflow.log_metric("R5", recall[1])
        mlflow.log_metric("R10", recall[2])
        mlflow.log_metric("MdR", recall[3])
        mlflow.log_metric("MeR", recall[4])
def log_parameters(ax_experiment):
    arm_name = ax_experiment.fetch_data().df.iloc[-1, :]['arm_name']
    arm = ax_experiment.arms_by_name[arm_name]
    mlflow.log_params(arm.parameters)
Пример #9
0
 def log_params(cls, params):
     try:
         mlflow.log_params(params)
     except ConnectionError:
         logger.warning("ConnectionError in logging params to MLFlow")
Пример #10
0
    def run(self, args: argparse.Namespace) -> None:
        logger.info("Load config from %s", args.config)
        config = load_yaml(minato.cached_path(args.config), args.overrides)

        logger.info("Configuration: %s", str(config))
        builder = ConfigBuilder.build(config)
        model = builder.model
        train_file = args.train or builder.train_file
        validation_file = args.validation or builder.validation_file

        if not train_file:
            raise ConfigurationError("train file is required.")

        logger.info("Start training...")
        logger.info("Training data: %s", str(train_file))
        logger.info("Validation data: %s", str(validation_file))

        params = {
            "command": " ".join(sys.argv),
            "config_file": args.config,
            "train_file": train_file,
            "validation_file": validation_file,
            "serialization_dir": args.serialization_dir,
            "config": config,
        }

        with _mlflow_start_run():
            serialization_dir = args.serialization_dir
            if args.serialization_dir is None and mlflow is None:
                serialization_dir = "./output"

            with create_workdir(
                    serialization_dir,
                    exist_ok=args.force,
            ) as workdir:
                workdir = workdir.absolute()
                try:
                    with open(workdir / "config.yaml", "w") as f:
                        yaml.dump(config, f)

                    with open(workdir / "params.json", "w") as f:
                        json.dump(params, f, indent=2)

                    if mlflow is not None:
                        logger.info("Log params to mlflow")
                        mlflow.log_params(params)

                    metrics = model.train(train_file, validation_file, workdir)

                    if mlflow is not None:
                        logger.info("Log metrics to mlflow")
                        mlflow.log_metrics(metrics)

                    logger.info("Training completed")
                    logger.info("Training metrics: %s",
                                json.dumps(metrics, indent=2))

                    with open(workdir / "metrics.json", "w") as metrics_file:
                        json.dump(metrics, metrics_file)

                    with open(workdir / "model.pkl", "wb") as model_file:
                        pickle.dump(model, model_file)
                finally:
                    if mlflow is not None:
                        logger.info("Log metrics to mlflow")
                        mlflow.log_artifacts(str(workdir))

        logger.info("Done!")
Пример #11
0
def train(
    model: str,
    experiment_name: str = None,
    data_dir=None,
    root_dir=None,
    best_metric="val_accuracy",
    **kwargs,
):
    """Base method to train a model. Will train the model input based on `MODEL_DICT` correspondance, and define the `experiment_name` in MlFlow tracking.

    Args:
        model (str): the model to train. Only two choices: `model1` or `model2`.
        experiment_name (str, optional): The experiment name to define in MlFlow tracking server. Defaults to None. If None, will be define with `model` value.
        best_metric (str, optional): The metrics on which performing evaluation of the model, and to check if performance has improved since best last model. Defaults to "val_accuracy".
    """
    _check_input(model)

    if experiment_name is None:
        experiment_name = model

    owd = os.getcwd()
    root_dir = Paths(root_dir=root_dir).root_dir
    os.chdir(root_dir)

    mlflow.set_experiment(experiment_name)
    tracker = MlFlowTracker(root_dir=root_dir)
    print(tracker.root_dir)

    timestamp = time.strftime("%Y%m%d%H%M")
    run_name = f"{experiment_name}_{timestamp}"

    learner = MODEL_DICT.get(model)(data_dir=data_dir)
    print(learner.name)

    version = tracker.get_new_version(experiment_name)
    logging.info(version)

    with mlflow.start_run(run_name=run_name):
        run_uuid = mlflow.active_run().info.run_uuid
        logging.info(f"MLflow Run ID: {run_uuid}")

        learner.train(**kwargs)

        # Get training params
        params = learner.get_params()

        # Log parameters
        mlflow.log_params(params)

        # calculate metrics
        metrics = {}
        for metric in learner.metrics:
            metrics[metric] = learner.history[metric][-1]
            metrics[f"val_{metric}"] = learner.history[f"val_{metric}"][-1]
        metrics["loss"] = learner.history["loss"][-1]
        metrics["val_loss"] = learner.history["val_loss"][-1]

        final_metric = metrics.get(best_metric)

        # log metrics
        mlflow.log_metrics(metrics)

        # log model
        model_name = learner.model.name
        X_train = learner.X_train
        y_pred = learner.predict(X_train)
        signature = infer_signature(X_train, y_pred)
        mlflow.keras.log_model(learner.model.model,
                               model_name,
                               signature=signature,
                               save_format="tf")

    models_path = Paths(root_dir=root_dir).model / "models"
    if not models_path.exists():
        models_path.mkdir()

    final_metric_best = tracker.get_best_model_metric(experiment_name,
                                                      metric=best_metric)

    if final_metric >= final_metric_best:
        logging.info(
            "Best model found. Saving to model dir to use with Tensorflow Serving"
        )
        model_path = os.path.join(str(models_path), model)
        if not os.path.exists(model_path):
            os.mkdir(model_path)
            logging.info(f"Folder ")
        if model == "model2":
            tfmodel = TFModel(learner.model.model)
            tf.saved_model.save(
                tfmodel.model,
                os.path.join(model_path, "0"),
                signatures={"serving_default": tfmodel.prediction},
            )
            print(tfmodel)
        else:
            learner.model.model.save(os.path.join(model_path, "0"))
        logging.info(f"Model exported at {model_path}.")
    else:
        logging.info(
            f"Model logged but best performance not improved for experiment {experiment_name} (current version: {version})."
        )

    os.chdir(owd)
Пример #12
0
def run_training_with_mlflow(mlflow_conf, 
                             wrapped_model,
                             train_dataloader, 
                             val_dataloader=None, 
                             test_dataloader=None,
                             **kwargs):
    """
    Function to run supervised training for classifcation

    Parameters
    ----------
    mlflow_conf: dict
        mlflow configuration e,g, MLFLOW_URI
    wrapped_model: SKModel
        wrapped SKModel 
    train_dataloader:
        training dataloader
    val_dataloader:
        validation dataloader, optional
    test_dataloader:
        optional
    kwargs: dict of dicts, optional
        can contain `artifacts` to log with models, `model_path` to specify model output path, and remianing used as experiment tags
        
    Returns
    -------
    tuple:
        (run_id, run_metrics, val_y, val_yhat, val_pred_proba, test_y, test_yhat, test_pred_proba)
    """
    tune = kwargs.get('tune', False)
    if tune:
        inner_cv = kwargs.get('inner_cv', C.DEFAULT_CV)
        h_search = kwargs.pop('h_search', None)
        if h_search is None:
            raise AttributeError(f'if tuner is requested, h_search should be provided')
        scoring = kwargs.get('scoring', C.DEFAULT_SCORING_CLASSIFIER)
        
    model_path = kwargs.pop('model_path', 'model')
    # model_save_dir = Path(kwargs.get('model_save_dir', C.MODEL_SAVE_DIR))
    # model_save_dir.mkdir(parents=True, exist_ok=True)
    artifacts = kwargs.pop('artifacts', dict())

    mlflow_conf.setdefault('problem_type', 'classifier')
    mlflow_setup = setup_mlflow(**mlflow_conf)

    calculate_metrics = Metrics(mlflow_conf['problem_type'])
    log.debug(f"Mlflow setup: {mlflow_setup}")
    log.debug(f"Used metrics: {calculate_metrics}")

    experiment_name = mlflow_setup['experiment_name']

    experiment_tags = dict()
    experiment_tags.update(**kwargs)

    with mlflow.start_run():
        run_id = mlflow.active_run().info.run_id 
        _start_time = time.time()

        X_train, y_train = train_dataloader.get_data()
        
        if val_dataloader is not None:
            X_val, y_val = val_dataloader.get_data()
            outer_cv, _X, _y = get_predefined_split(X_train, y_train, X_val, y_val)
        else:
            warnings.warn("This path is untested...use with caution")
            outer_cv = kwargs.get('outer_cv', None)
            if outer_cv is None:
                warnings.warn(f'Neither validation, nor outer_cv provided. using KFold({C.DEFAULT_CV}) to get validation split')
                outer_cv = KFold(C.DEFAULT_CV)
            _X = X_train.values if hasattr(X_train, 'values') else X_train
            _y = y_train.values if hasattr(y_train, 'values') else y_train

        if test_dataloader is not None:
            X_test, y_test = test_dataloader.get_data()

        # mlflow.log_params(wrapped_model.model.get_params())
        if tune:
            m, gs = wrapped_model.tune(X=_X, y=_y,
                                       hyper_params=h_search,
                                       cv=inner_cv, 
                                       experiment_name=experiment_name, 
                                       scoring=scoring)
            
            mlflow.sklearn.log_model(m, experiment_name + '_model')
            mlflow.sklearn.log_model(gs, experiment_name + '_GridSearchCV')
            
            log.info(f"Experiment: {experiment_name} has finished hyperparameter tuning")
            log.info("Hyperparameter search space: " + str(h_search))
            # log params
            mlflow.log_params(wrapped_model.params)
            print(f"Best_params:\n {gs.best_params_}")
        else:
            wrapped_model.fit(X=X_train, y=y_train)#, Xstd = X_train_std)
        
            mlflow.sklearn.log_model(wrapped_model.model, experiment_name + '_model')
            mlflow.log_params(wrapped_model.params)
            log.info(f"Experiment: {experiment_name} has finished training")

        for split_id, (train_index, val_index) in enumerate(outer_cv.split(_X, _y)):
            if split_id >= 1:
                warnings.warn("Current logic for tune and implicit outer_cv not correct")
                break

            _X_train, _X_val = _X[train_index, :], _X[val_index, :]
            _y_train, _y_val = _y[train_index], _y[val_index]
            
            y_val_proba = wrapped_model.predict_proba(_X_val)
            if y_val_proba.ndim > 1:
                y_val_proba = y_val_proba[:,1]

            y_val_hat = wrapped_model.predict(_X_val)
            val_score = wrapped_model.score(_X_val, _y_val)

        if test_dataloader is not None:
            y_test_proba = wrapped_model.predict_proba(X_test)
            if y_test_proba.ndim > 1:
                y_test_proba = y_test_proba[:, 1]
            y_test_hat = wrapped_model.predict(X_test)
            test_score = wrapped_model.score(X_test, y_test)
        else:
            y_test=None
            y_test_hat=None
            y_test_proba=None
            test_score =None

        # Calculate metrics
        wrapped_model.metrics = calculate_metrics(y_val=y_val, 
                                             y_val_proba=y_val_proba, 
                                             y_val_hat=y_val_hat,
                                             val_score=val_score, 
                                             y_test=y_test, 
                                             y_test_proba=y_test_proba, 
                                             y_test_hat=y_test_hat,
                                             test_score=test_score
                                            )
        _end_time = time.time()
        run_time = (_end_time - _start_time)
        
        # log metrics
        mlflow.log_metrics(wrapped_model.metrics)

        experiment_tags.update(dict(run_time=run_time))
        if experiment_tags is not None:
            mlflow.set_tags(experiment_tags)

        # Other artifacts
        _tmp = {f"artifact/{art_name}": art_val 
                for art_name, art_val in six.iteritems(artifacts)}
        helper.log_artifacts(_tmp, run_id, mlflow_uri=mlflow_setup['mlflow_uri'], delete=True) 

        return (run_id,
                wrapped_model.metrics,
                y_val, y_val_hat, y_val_proba,
                y_test, y_test_hat, y_test_proba,
                )
Пример #13
0
def mlflow_callback(study, trial):
    trial_value = trial.value if trial.value is not None else float("nan")
    with mlflow.start_run(run_name=study.study_name):
        mlflow.log_params(trial.params)
        mlflow.log_metrics({"mean_squared_error": trial_value})
Пример #14
0
 def before_pipeline_run(self, run_params: Dict[str, Any]) -> None:
     """Hook implementation to start an MLflow run
     with the same run_id as the Kedro pipeline run.
     """
     mlflow.start_run(run_name=run_params["run_id"])
     mlflow.log_params(run_params)
Пример #15
0
def main(params: dict):
    """
    Identify the class to which each image belongs.
    :param params: (dict) Parameters found in the yaml config file.

    """
    since = time.time()

    # MANDATORY PARAMETERS
    img_dir_or_csv = get_key_def('img_dir_or_csv_file',
                                 params['inference'],
                                 expected_type=str)
    state_dict = get_key_def('state_dict_path', params['inference'])
    task = get_key_def('task', params['global'], expected_type=str)
    if task not in ['classification', 'segmentation']:
        raise ValueError(
            f'Task should be either "classification" or "segmentation". Got {task}'
        )
    model_name = get_key_def('model_name', params['global'],
                             expected_type=str).lower()
    num_classes = get_key_def('num_classes',
                              params['global'],
                              expected_type=int)
    num_bands = get_key_def('number_of_bands',
                            params['global'],
                            expected_type=int)
    chunk_size = get_key_def('chunk_size',
                             params['inference'],
                             default=512,
                             expected_type=int)
    BGR_to_RGB = get_key_def('BGR_to_RGB',
                             params['global'],
                             expected_type=bool)

    # OPTIONAL PARAMETERS
    dontcare_val = get_key_def("ignore_index",
                               params["training"],
                               default=-1,
                               expected_type=int)
    num_devices = get_key_def('num_gpus',
                              params['global'],
                              default=0,
                              expected_type=int)
    default_max_used_ram = 25
    max_used_ram = get_key_def('max_used_ram',
                               params['global'],
                               default=default_max_used_ram,
                               expected_type=int)
    max_used_perc = get_key_def('max_used_perc',
                                params['global'],
                                default=25,
                                expected_type=int)
    scale = get_key_def('scale_data',
                        params['global'],
                        default=[0, 1],
                        expected_type=List)
    debug = get_key_def('debug_mode',
                        params['global'],
                        default=False,
                        expected_type=bool)
    raster_to_vec = get_key_def('ras2vec', params['inference'], False)

    # benchmark (ie when gkpgs are inputted along with imagery)
    dontcare = get_key_def("ignore_index", params["training"], -1)
    targ_ids = get_key_def('target_ids',
                           params['sample'],
                           None,
                           expected_type=List)

    # SETTING OUTPUT DIRECTORY
    working_folder = Path(
        params['inference']['state_dict_path']).parent.joinpath(
            f'inference_{num_bands}bands')
    Path.mkdir(working_folder, parents=True, exist_ok=True)

    # mlflow logging
    mlflow_uri = get_key_def('mlflow_uri',
                             params['global'],
                             default=None,
                             expected_type=str)
    if mlflow_uri and not Path(mlflow_uri).is_dir():
        warnings.warn(f'Mlflow uri path is not valid: {mlflow_uri}')
        mlflow_uri = None
    # SETUP LOGGING
    import logging.config  # See: https://docs.python.org/2.4/lib/logging-config-fileformat.html
    if mlflow_uri:
        log_config_path = Path('utils/logging.conf').absolute()
        logfile = f'{working_folder}/info.log'
        logfile_debug = f'{working_folder}/debug.log'
        console_level_logging = 'INFO' if not debug else 'DEBUG'
        logging.config.fileConfig(log_config_path,
                                  defaults={
                                      'logfilename': logfile,
                                      'logfilename_debug': logfile_debug,
                                      'console_level': console_level_logging
                                  })

        # import only if mlflow uri is set
        from mlflow import log_params, set_tracking_uri, set_experiment, start_run, log_artifact, log_metrics
        if not Path(mlflow_uri).is_dir():
            logging.warning(
                f"Couldn't locate mlflow uri directory {mlflow_uri}. Directory will be created."
            )
            Path(mlflow_uri).mkdir()
        set_tracking_uri(mlflow_uri)
        exp_name = get_key_def('mlflow_experiment_name',
                               params['global'],
                               default='gdl-inference',
                               expected_type=str)
        set_experiment(f'{exp_name}/{working_folder.name}')
        run_name = get_key_def('mlflow_run_name',
                               params['global'],
                               default='gdl',
                               expected_type=str)
        start_run(run_name=run_name)
        log_params(params['global'])
        log_params(params['inference'])
    else:
        # set a console logger as default
        logging.basicConfig(level=logging.DEBUG)
        logging.info(
            'No logging folder set for mlflow. Logging will be limited to console'
        )

    if debug:
        logging.warning(
            f'Debug mode activated. Some debug features may mobilize extra disk space and '
            f'cause delays in execution.')

    # Assert that all items in target_ids are integers (ex.: to benchmark single-class model with multi-class labels)
    if targ_ids:
        for item in targ_ids:
            if not isinstance(item, int):
                raise ValueError(
                    f'Target id "{item}" in target_ids is {type(item)}, expected int.'
                )

    logging.info(f'Inferences will be saved to: {working_folder}\n\n')
    if not (0 <= max_used_ram <= 100):
        logging.warning(
            f'Max used ram parameter should be a percentage. Got {max_used_ram}. '
            f'Will set default value of {default_max_used_ram} %')
        max_used_ram = default_max_used_ram

    # AWS
    bucket = None
    bucket_file_cache = []
    bucket_name = get_key_def('bucket_name', params['global'])

    # list of GPU devices that are available and unused. If no GPUs, returns empty dict
    gpu_devices_dict = get_device_ids(num_devices,
                                      max_used_ram_perc=max_used_ram,
                                      max_used_perc=max_used_perc)
    if gpu_devices_dict:
        logging.info(
            f"Number of cuda devices requested: {num_devices}. Cuda devices available: {gpu_devices_dict}. "
            f"Using {list(gpu_devices_dict.keys())[0]}\n\n")
        device = torch.device(
            f'cuda:{list(range(len(gpu_devices_dict.keys())))[0]}')
    else:
        logging.warning(
            f"No Cuda device available. This process will only run on CPU")
        device = torch.device('cpu')

    # CONFIGURE MODEL
    num_classes_backgr = add_background_to_num_class(task, num_classes)
    model, loaded_checkpoint, model_name = net(model_name=model_name,
                                               num_bands=num_bands,
                                               num_channels=num_classes_backgr,
                                               dontcare_val=dontcare_val,
                                               num_devices=1,
                                               net_params=params,
                                               inference_state_dict=state_dict)
    try:
        model.to(device)
    except RuntimeError:
        logging.info(f"Unable to use device 0")
        device = torch.device(f'cuda' if gpu_devices_dict else 'cpu')
        model.to(device)

    # CREATE LIST OF INPUT IMAGES FOR INFERENCE
    list_img = list_input_images(img_dir_or_csv,
                                 bucket_name,
                                 glob_patterns=["*.tif", "*.TIF"])

    # VALIDATION: anticipate problems with imagery and label (if provided) before entering main for loop
    valid_gpkg_set = set()
    for info in tqdm(list_img, desc='Validating imagery'):
        # validate_raster(info['tif'], num_bands, meta_map)
        if 'gpkg' in info.keys(
        ) and info['gpkg'] and info['gpkg'] not in valid_gpkg_set:
            validate_num_classes(vector_file=info['gpkg'],
                                 num_classes=num_classes,
                                 attribute_name=info['attribute_name'],
                                 ignore_index=dontcare,
                                 target_ids=targ_ids)
            assert_crs_match(info['tif'], info['gpkg'])
            valid_gpkg_set.add(info['gpkg'])

    logging.info('Successfully validated imagery')
    if valid_gpkg_set:
        logging.info('Successfully validated label data for benchmarking')

    if task == 'classification':
        classifier(
            params, list_img, model, device, working_folder
        )  # FIXME: why don't we load from checkpoint in classification?

    elif task == 'segmentation':
        gdf_ = []
        gpkg_name_ = []

        # TODO: Add verifications?
        if bucket:
            bucket.download_file(
                loaded_checkpoint,
                "saved_model.pth.tar")  # TODO: is this still valid?
            model, _ = load_from_checkpoint("saved_model.pth.tar", model)
        else:
            model, _ = load_from_checkpoint(loaded_checkpoint, model)
        # LOOP THROUGH LIST OF INPUT IMAGES
        for info in tqdm(list_img,
                         desc='Inferring from images',
                         position=0,
                         leave=True):
            with start_run(run_name=Path(info['tif']).name, nested=True):
                img_name = Path(info['tif']).name
                local_gpkg = Path(
                    info['gpkg']
                ) if 'gpkg' in info.keys() and info['gpkg'] else None
                gpkg_name = local_gpkg.stem if local_gpkg else None
                if bucket:
                    local_img = f"Images/{img_name}"
                    bucket.download_file(info['tif'], local_img)
                    inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif"
                    if info['meta']:
                        if info['meta'] not in bucket_file_cache:
                            bucket_file_cache.append(info['meta'])
                            bucket.download_file(info['meta'],
                                                 info['meta'].split('/')[-1])
                        info['meta'] = info['meta'].split('/')[-1]
                else:  # FIXME: else statement should support img['meta'] integration as well.
                    local_img = Path(info['tif'])
                    Path.mkdir(working_folder.joinpath(local_img.parent.name),
                               parents=True,
                               exist_ok=True)
                    inference_image = working_folder.joinpath(
                        local_img.parent.name,
                        f"{img_name.split('.')[0]}_inference.tif")
                temp_file = working_folder.joinpath(
                    local_img.parent.name, f"{img_name.split('.')[0]}.dat")
                raster = rasterio.open(local_img, 'r')
                logging.info(f'Reading original image: {raster.name}')
                inf_meta = raster.meta
                label = None
                if local_gpkg:
                    logging.info(f'Burning label as raster: {local_gpkg}')
                    local_img = clip_raster_with_gpkg(raster, local_gpkg)
                    raster.close()
                    raster = rasterio.open(local_img, 'r')
                    logging.info(f'Reading clipped image: {raster.name}')
                    inf_meta = raster.meta
                    label = vector_to_raster(
                        vector_file=local_gpkg,
                        input_image=raster,
                        out_shape=(inf_meta['height'], inf_meta['width']),
                        attribute_name=info['attribute_name'],
                        fill=0,  # background value in rasterized vector.
                        target_ids=targ_ids)
                    if debug:
                        logging.debug(
                            f'Unique values in loaded label as raster: {np.unique(label)}\n'
                            f'Shape of label as raster: {label.shape}')
                pred, gdf = segmentation(param=params,
                                         input_image=raster,
                                         label_arr=label,
                                         num_classes=num_classes_backgr,
                                         gpkg_name=gpkg_name,
                                         model=model,
                                         chunk_size=chunk_size,
                                         device=device,
                                         scale=scale,
                                         BGR_to_RGB=BGR_to_RGB,
                                         tp_mem=temp_file,
                                         debug=debug)
                if gdf is not None:
                    gdf_.append(gdf)
                    gpkg_name_.append(gpkg_name)
                if local_gpkg:
                    pixelMetrics = ComputePixelMetrics(label, pred,
                                                       num_classes_backgr)
                    log_metrics(pixelMetrics.update(pixelMetrics.iou))
                    log_metrics(pixelMetrics.update(pixelMetrics.dice))
                pred = pred[np.newaxis, :, :].astype(np.uint8)
                inf_meta.update({
                    "driver": "GTiff",
                    "height": pred.shape[1],
                    "width": pred.shape[2],
                    "count": pred.shape[0],
                    "dtype": 'uint8',
                    "compress": 'lzw'
                })
                logging.info(
                    f'Successfully inferred on {img_name}\nWriting to file: {inference_image}'
                )
                with rasterio.open(inference_image, 'w+', **inf_meta) as dest:
                    dest.write(pred)
                del pred
                try:
                    temp_file.unlink()
                except OSError as e:
                    logging.warning(f'File Error: {temp_file, e.strerror}')
                if raster_to_vec:
                    start_vec = time.time()
                    inference_vec = working_folder.joinpath(
                        local_img.parent.name,
                        f"{img_name.split('.')[0]}_inference.gpkg")
                    ras2vec(inference_image, inference_vec)
                    end_vec = time.time() - start_vec
                    logging.info(
                        'Vectorization completed in {:.0f}m {:.0f}s'.format(
                            end_vec // 60, end_vec % 60))

        if len(gdf_) >= 1:
            if not len(gdf_) == len(gpkg_name_):
                raise ValueError('benchmarking unable to complete')
            all_gdf = pd.concat(
                gdf_)  # Concatenate all geo data frame into one geo data frame
            all_gdf.reset_index(drop=True, inplace=True)
            gdf_x = gpd.GeoDataFrame(all_gdf)
            bench_gpkg = working_folder / "benchmark.gpkg"
            gdf_x.to_file(bench_gpkg, driver="GPKG", index=False)
            logging.info(
                f'Successfully wrote benchmark geopackage to: {bench_gpkg}')
        # log_artifact(working_folder)
    time_elapsed = time.time() - since
    logging.info('Inference Script completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
Пример #16
0
def evaluate_model(
    model: keras.Model,
    thr: float,
    dpath: Path,
    rpath: Path,
    vids: Tuple[str],
    batch_size: int,
    bands: Tuple[int] = (4, 3, 2, 5),
    bands_names: Tuple[str] = ("red", "green", "blue", "nir"),
    img_ids: List[str] = None,
    resize: bool = False,
    normalize: bool = True,
    standardize: bool = False,
    metric_fns: List[Callable] = [
        normalized_mutual_info_score, adjusted_rand_score
    ],
    mlflow: bool = False,
    run_name: str = None,
) -> Tuple[Dict, List]:
    """
    Get evaluation metrics for given model on L8CCA testset.

    :param model: trained model to make predictions.
    :param thr: threshold to be used during evaluation.
    :param dpath: path to dataset.
    :param rpath: path to directory where results
                  and artifacts should be logged.
    :param vids: tuple of ids of images which should be used to create
                 visualisations. If contains '*' visualisations will be
                 created for all images in the dataset.
    :param batch_size: size of generated batches, only one batch is loaded
          to memory at a time.
    :param bands: band numbers to load
    :param bands_names: names of the bands to load. Should have the same number
                        of elements as bands.
    :param img_ids: if given, process only these images.
    :param resize: whether to resize loaded img to gt.
    :param normalize: whether to normalize the image.
    :param standardize: whether to standardize the image.
    :param metric_fns: non-Tensorflow metric functions to run evaluation
                       of the model. Must be of the form
                       func(labels_true, labels_pred).
    :param mlflow: whether to use MLFlow.
    :param run_name: name of the run.
    :return: evaluation metrics and evaluation times for scenes.
    """
    Path(rpath).mkdir(parents=True, exist_ok=False)
    if mlflow:
        setup_mlflow(run_name)
        params = dict(locals())
        del params["model"]
        del params["metric_fns"]
        log_params(params)
    metrics = {}
    scene_times = []
    for metric_fn in model.metrics:
        if type(metric_fn) is str:
            metric_name = metric_fn
        else:
            metric_name = metric_fn.__name__
        metrics[f"L8CCA_{metric_name}"] = {}
    for metric_fn in metric_fns:
        metrics[f"L8CCA_{metric_fn.__name__}"] = {}

    for tname in os.listdir(dpath):
        tpath = dpath / tname
        for img_id in os.listdir(tpath):
            if img_ids is not None:
                if img_id not in img_ids:
                    continue
            print(f"Processing {tname}-{img_id}", flush=True)
            img_path = tpath / img_id
            img_pred, scene_time = get_img_pred(path=img_path,
                                                model=model,
                                                batch_size=batch_size,
                                                bands=bands,
                                                bands_names=bands_names,
                                                resize=resize,
                                                normalize=normalize,
                                                standardize=standardize)
            scene_times.append(scene_time)
            img_gt = load_l8cca_gt(path=img_path)
            img_pred = unpad(img_pred, img_gt.shape)
            img_metrics = get_metrics_tf(
                np.expand_dims(img_gt, axis=0),
                np.expand_dims((img_pred > thr), axis=0), model.metrics)
            for metric_fn in model.metrics:
                if type(metric_fn) is str:
                    metric_name = metric_fn
                else:
                    metric_name = metric_fn.__name__
                metrics[f"L8CCA_{metric_name}"][img_id] = img_metrics[
                    f"{metric_name}"]
            for metric_fn in metric_fns:
                metrics[f"L8CCA_{metric_fn.__name__}"][img_id] = metric_fn(
                    img_gt.reshape(-1), (img_pred > thr).reshape(-1))
            print("Average inference time: " +
                  f"{ sum(scene_times) / len(scene_times) } seconds")
            if img_id in vids or "*" in vids:
                print(f"Creating visualisation for {img_id}")
                img_vis = build_rgb_scene_img(img_path, img_id)
                save_vis(img_id, img_vis, img_pred > thr, img_gt, rpath)

            if img_metrics["jaccard_index_metric"] < 0.6:
                print(f"Will make insights for {img_id}", flush=True)
                y_gt = img_gt.ravel()
                y_pred = np.round(img_pred.ravel(), decimals=5)

                make_roc(y_gt, y_pred, rpath / img_id, thr_marker=thr)
                make_precision_recall(y_gt,
                                      y_pred,
                                      rpath / img_id,
                                      thr_marker=thr)

                # Make histogram with more rounded predictions
                # for performance reasons
                y_pred = np.round(y_pred, decimals=2)
                make_activation_hist(y_pred, rpath / img_id)

    return metrics, scene_times
Пример #17
0
def train(config, run_time):
    # mlflow_path = os.path.join(os.getcwd(), 'mlflow')
    # uri = f'file://{mlflow_path}'
    # mlflow.set_tracking_uri(uri)

    with mlflow.start_run():
        print('Starting model training')
        dpl = DataPipeline(config, run_time)

        # TODO: convert to DB
        dpl.read_data('interim_train_data')
        train_data = dpl._data
        print('train:', train_data.shape)

        # TODO: convert to DB
        dpl.read_data('interim_test_data')
        test_data = dpl._data
        print(test_data.shape)

        print('Getting model selection')
        model_selection = int(config.get('DEFAULT', 'model_selection'))

        # splitting data
        print('splitting X, y data')
        X_train = train_data['comment_text']
        y_train = train_data.iloc[:, 1:7]
        X_test = test_data['comment_text']
        y_test = test_data.iloc[:, 1:7]

        # get model params
        print('getting model params')
        model_params = _get_model_params(config,
                                         model_selection,
                                         no_defaults=True)
        model_params['run_time'] = run_time
        print('model_params', model_params)

        # init model
        print('model init')
        model = BiLSTM(X_train=X_train,
                       y_train=y_train,
                       validation_data=None,
                       **model_params)

        # train model
        print('training model')
        model.train()

        # evaluate TODO: create another split from train
        print('evaluating model')
        evaluation = model.evaluate(X_test, y_test)
        print('evaluation', evaluation)

        # save
        print('saving model')
        model.save_model(mlflow, model_params['save_path'])

        print('logging params')
        mlflow.log_params(model_params)
        print(model._history.history)
        print('logging metrics')
        mlflow.log_metrics({'loss': evaluation[0], 'accuracy': evaluation[1]})

        mlflow.end_run()
    return
Пример #18
0
            pretrained_embeddings_path=args.pretrained_embeddings,
            token_to_index=args.token_to_index,
            n_labels=train_dataset.n_labels,
            dropout=args.dropout,
            batch_size=args.batch_size,
            vector_size=args.embeddings_size,
            filter_count=args.filter_count,
            filters_length=args.filters_length,
            freeze_embedings=True  # This can be a hyperparameter
        )
        mlflow.log_params({
            "model_type": "Convolutional Neural Network",
            "embeddings": args.pretrained_embeddings,
            "batch_size": args.batch_size,
            "filter_count": args.filter_count,
            "filters_length": args.filters_length,
            "dropout": args.dropout,
            "embeddings_size": args.embeddings_size,
            "epochs": args.epochs,
            "comments": args.comments[:249]
        })
        logging.info(str(model))

        model = model.to(device)
        loss = nn.CrossEntropyLoss()
        optimizer = optim.Adam(
            model.parameters(),
            lr=1e-3,  # This can be a hyperparameter
            weight_decay=1e-5  # This can be a hyperparameter
        )
Пример #19
0
 def log_config():
     mlflow.log_param("seed", Config.seed)
     mlflow.log_param("n_splits", Config.n_splits)
     mlflow.log_param("bert_model", Config.bert_model)
     mlflow.log_params({f"lgb_{k}": v for k, v in Config.lgb_params.items()})
     mlflow.log_params({f"cat_{k}": v for k, v in Config.cat_params.items()})
Пример #20
0
def main(c, r):
    r.scores = {}

    with blocktimer('Preprocess', level=INFO):
        # unpack feature set list. set[i]={name: cols}
        for name, col_list in c.feature.set.items():
            in_train_path = f'data/feature/{name}_train.pkl'
            in_test_path = f'data/feature/{name}_test.pkl'
            cols = col_list
            train = pd.read_pickle(in_train_path)
            test = pd.read_pickle(in_test_path)
            logger.debug(f'Loaded feature {name}')

        if c.runtime.use_small_data:
            frac = 0.001
            train = train.sample(frac=frac, random_state=42)
            test = test.sample(frac=frac, random_state=42)

        logger.debug(f'train.shape: {train.shape}, test.shape: {test.shape}')

        # Split into X, y
        X_train = train.drop(c.feature.target, axis=1)
        y_train = train[c.feature.target].copy(deep=True)
        X_test = test
        del train, test

    with blocktimer('Tune hyper params', level=INFO):
        '''
        Run optimization
        '''
        mlflow.log_param('type', c.model.type)
        mlflow.log_param('num_boost_round', c.train.num_boost_round)
        mlflow.log_param('early_stopping_rounds',
                         c.train.early_stopping_rounds)

        f = partial(objective,
                    X_train=X_train,
                    y_train=y_train,
                    X_test=X_test,
                    cols=cols,
                    c=c)
        opt = optuna.create_study(
            direction='maximize',
            study_name=
            f'{experiment_type}_{c.runtime.version}{c.runtime.dsize}',
            storage=
            f'sqlite:///data/optimization/{experiment_type}_{c.runtime.version}{c.runtime.dsize}.db',
            load_if_exists=True)
        opt.optimize(f, n_trials=c.optimize.n_trials)
        trial = opt.best_trial

        r.optimize = {}
        r.scores.best_trial = trial.number
        r.scores.best_score = trial.value
        r.optimize.best_params = trial.params
        tuned_params = c.model.params.copy()
        tuned_params.update(trial.params)
        r.model.tuned_params = tuned_params

        logger.debug(f'Best trial: {trial.number}')
        logger.debug(f'Best score: {trial.value}')
        logger.debug(f'Best params: {trial.params}')

        mlflow.log_metric('best_trial', trial.number)
        mlflow.log_metric('best_score', trial.value)
        mlflow.log_params(trial.params)

        return r
Пример #21
0
from __future__ import print_function  #dev1
# from sklearn import *
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn import metrics
import mlflow
import mlflow.sklearn

if __name__ == '__main__':
    # mlflow.create_experiment("mlflowproject_demo1")
    # mlflow.set_experiment("mlflowproject_demo1")
    X, y = load_iris(return_X_y=True)
    clf = svm.SVC(kernel='linear', C=10)
    scores = cross_val_score(clf, X, y, cv=5, scoring='f1_macro')
    mlflow.log_params({'kernel': 'linear', 'C': 10})
    mlflow.log_metrics({"score": scores.mean(), 'score2': scores[0]})
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    # mlflow.sklearn.log_model(clf, "iris classification model")
    print("Model saved in run %s" % mlflow.active_run().info.run_uuid)
    mlflow.sklearn.log_model(clf, "SVM_CLF_model")

    # mlflow.sklearn.save_model(clf, 'iris_SVM_clf')
    # saved this model for mlflow
    # adding random line for testing

    # mdl = mlflow.pyfunc.load_model(model_path)
Пример #22
0
 def log_params(self, params):
     mlflow.log_params(params)
Пример #23
0
            mlflow.tensorflow.autolog()

            #         mlflow.log_params(experiment_config)

            print(
                f'BEGINNING: DATASET:{args.dataset_name}|MODEL:{args.model_name}|bsz:{args.batch_size}|lr:{args.base_learning_rate}|Color_type={args.color_type}|regularizer={regularizer}'
            )
            print('-' * 30)

            trainer = main(experiment_config, experiment_dir)

            history = trainer.history

            histories.append((args.dataset_name, args.model_name, history))

            mlflow.log_params(args.__dict__)
            for k, v in trainer.configs.items():
                mlflow.log_params(v)
                print('logged ', k)
            mlflow_log_history(history)

#     #########################################
#     #########################################
#     for model_name in model_names:
#         for dataset_name in dataset_names:
#             for lr in learning_rates:
#                 for bsz in batch_sizes:
#                     with mlflow.start_run(run_name=f'{args.model_name}-{args.dataset_name}-{color_type}-lr_{args.base_learning_rate}-bsz_{args.batch_size}', nested=True):
#                         for regularizer in regularizations:
#                             args.batch_size = bsz
#                             args.base_learning_rate = lr
Пример #24
0
 def add_params(self, params_dict):
     if self.use_mlflow:
         mlflow.log_params(params_dict)
Пример #25
0
def train(config):
    mlflow.set_experiment(config['dataset'])
    mlflow.log_params(config)
    print('Random seed: %d' % int(config['seed']))
    torch.manual_seed(config['seed'])
    print("Training {} epochs".format(config['nepochs']))
    torch.backends.cudnn.benchmark = True
    dataset = S3dDataset(root=config['root'],
                         npoints=config['npoints'],
                         train=True,
                         load=True)
    test_dataset = S3dDataset(root=config['root'],
                              npoints=config['npoints'],
                              train=False,
                              load=True)
    num_classes = dataset.num_classes
    if config['balance']:
        train_weights = get_weights(dataset,
                                    'train',
                                    root=config['root'],
                                    n_classes=num_classes)
        test_weights = get_weights(test_dataset,
                                   'test',
                                   root=config['root'],
                                   n_classes=num_classes)
        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_sampler=BatchSampler(WeightedRandomSampler(
                weights=train_weights, num_samples=len(dataset)),
                                       batch_size=config['batchsize'],
                                       drop_last=True),
            num_workers=config['workers'])
        test_dataloader = torch.utils.data.DataLoader(
            test_dataset,
            batch_sampler=BatchSampler(WeightedRandomSampler(
                weights=test_weights, num_samples=len(test_dataset)),
                                       batch_size=config['batchsize'],
                                       drop_last=True),
            num_workers=config['workers'])
    else:
        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=config['batchsize'],
            shuffle=True,
            num_workers=config['workers'],
            drop_last=True)
        test_dataloader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config['batchsize'],
            shuffle=True,
            num_workers=config['workers'],
            drop_last=True)
    print('number of classes: %d' % num_classes)
    print('train set size: %d | test set size: %d' %
          (len(dataset), len(test_dataset)))
    try:
        os.makedirs(config['outf'])
    except:
        pass
    blue = lambda x: '\033[94m' + x + '\033[0m'
    yellow = lambda x: '\033[93m' + x + '\033[0m'
    red = lambda x: '\033[91m' + x + '\033[0m'
    classifier = PointNetSeg(k=num_classes)
    model_epoch_cumulatiove_base = 0
    if config.get('model'):
        print('Loading model from: {}'.format(config.get('model')))
        classifier.load_state_dict(torch.load(config['model']))
    elif config.get('continue'):
        model_path, model_epoch_cumulatiove_base = get_path_of_last_model(
            config)
        if model_path:
            print('Loading model from: {}'.format(model_path))
            classifier.load_state_dict(torch.load(model_path))
        # model_path_dir = ...
        # run_id = "96771d893a5e46159d9f3b49bf9013e2"
        # pytorch_model = mlflow.pytorch.load_model(
        #     "runs:/" + run_id + "/" + model_path_dir)
    optimizer = optim.SGD(classifier.parameters(),
                          lr=config['lr'],
                          momentum=config['momentum'])
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    classifier.to(device)
    if config.get('mgpu'):
        classifier = torch.nn.DataParallel(classifier,
                                           device_ids=config['gpuids'])
    num_batch = len(dataset) / config['batchsize']
    for epoch in range(config['nepochs']):
        train_acc_epoch, train_iou_epoch, test_acc_epoch, test_iou_epoch = [], [], [], []
        try:
            for i, data in enumerate(dataloader):
                t0 = time.time()
                points, labels = data
                points = points.transpose(2, 1)
                points, labels = points.to(device), labels.to(device)
                optimizer.zero_grad()
                classifier = classifier.train()
                pred, _ = classifier(points)
                pred = pred.view(-1, num_classes)
                labels = labels.view(-1, 1)[:, 0]
                if config['weight']:
                    # unique labels, counts
                    u, c = torch.unique(labels, return_counts=True)
                    print(u, c)
                    w = c.sum().float() / c  # weights
                    w = w / w.sum()  # normalized weights
                    # filling missing labels weights with zeros
                    w = torch.zeros(num_classes).scatter_(0, u, w)
                    print(w)
                    loss = F.nll_loss(pred, labels, weight=w)
                else:
                    loss = F.nll_loss(pred, labels)
                loss.backward()
                optimizer.step()
                time_per_step = time.time() - t0
                t0 = time.time()
                pred_choice = pred.data.max(1)[1]
                correct = pred_choice.eq(labels.data).cpu().sum()
                train_acc = correct.item() / float(
                    config['batchsize'] * config['npoints'])
                train_iou = correct.item() / float(2 * config['batchsize'] *
                                                   config['npoints'] -
                                                   correct.item())
                train_acc_epoch.append(train_acc)
                train_iou_epoch.append(train_iou)
                mlflow.log_metric('train_acc', train_acc)
                mlflow.log_metric('train_iou', train_iou)
                mlflow.log_metric('train_loss', loss.item())
                log_metric_to_mlflow(labels.data.numpy(),
                                     pred_choice.numpy(),
                                     'train',
                                     target_names=[
                                         'board', 'floor', 'door', 'bookcase',
                                         'column', 'ceiling', 'wall', 'stairs',
                                         'beam', 'chair', 'clutter', 'table',
                                         'window', 'sofa'
                                     ],
                                     cm_plot=False,
                                     cm_norm='true',
                                     root=config['root'],
                                     ds_name=config['dataset'],
                                     verbose=0)
                time_per_log = time.time() - t0
                mlflow.log_metric('time_per_step', time_per_step)
                mlflow.log_metric('time_per_log', time_per_log)
                print(
                    'epoch %d: %d/%d | train loss: %f | train acc: %f | train iou: %f'
                    % (epoch + 1, i + 1, num_batch + 1, loss.item(), train_acc,
                       train_iou))
                if (i + 1) % 10 == 0:
                    j, data = next(enumerate(test_dataloader, 0))
                    points, labels = data
                    points = points.transpose(2, 1)
                    points, labels = points.to(device), labels.to(device)
                    classifier = classifier.eval()
                    with torch.no_grad():
                        pred, _ = classifier(points)
                    pred = pred.view(-1, num_classes)
                    labels = labels.view(-1, 1)[:, 0]
                    loss = F.nll_loss(pred, labels)
                    pred_choice = pred.data.max(1)[1]
                    correct = pred_choice.eq(labels.data).cpu().sum()
                    test_acc = correct.item() / float(
                        config['batchsize'] * config['npoints'])
                    test_iou = correct.item() / float(2 * config['batchsize'] *
                                                      config['npoints'] -
                                                      correct.item())
                    test_acc_epoch.append(test_acc)
                    test_iou_epoch.append(test_iou)
                    mlflow.log_metric('test_acc', test_acc)
                    mlflow.log_metric('test_iou', test_iou)
                    mlflow.log_metric('test_loss', loss.item())
                    mlflow.log_metric('train_acc', train_acc)
                    mlflow.log_metric('train_iou', train_iou)
                    mlflow.log_metric('train_loss', loss.item())
                    log_metric_to_mlflow(labels.data.numpy(),
                                         pred_choice.numpy(),
                                         'test',
                                         target_names=[
                                             'board', 'floor', 'door',
                                             'bookcase', 'column', 'ceiling',
                                             'wall', 'stairs', 'beam', 'chair',
                                             'clutter', 'table', 'window',
                                             'sofa'
                                         ],
                                         cm_plot=True,
                                         root=config['root'],
                                         ds_name=config['dataset'],
                                         verbose=0)
                    # mlflow.pytorch.log_model(classifier, 'model')  # mlfow has no attribute 'pytorch'
                    # mlflow.pytorch.save_model(classifier, os.path.join(
                    #     config['outf'], '{}_model_{}.pth'.format(
                    #         config['dataset'],model_epoch_cumulatiove_base + epoch))
                    torch.save(
                        classifier.state_dict(),
                        os.path.join(
                            config['outf'], '{}_model_{}.pth'.format(
                                config['dataset'],
                                model_epoch_cumulatiove_base + epoch)))
                    print(
                        blue(
                            'epoch %d: %d/%d | test loss: %f | test acc: %f | test iou: %f'
                        ) % (epoch + 1, i + 1, num_batch + 1, loss.item(),
                             test_acc, test_iou))
            print(
                yellow('epoch %d | mean train acc: %f | mean train IoU: %f') %
                (epoch + 1, np.mean(train_acc_epoch),
                 np.mean(train_iou_epoch)))
            print(
                red('epoch %d | mean test acc: %f | mean test IoU: %f') %
                (epoch + 1, np.mean(test_acc_epoch), np.mean(test_iou_epoch)))
        except KeyboardInterrupt:
            print('User interruption')
            break
        finally:
            torch.save(
                classifier.state_dict(),
                os.path.join(
                    config['outf'], '{}_model_{}.pth'.format(
                        config['dataset'],
                        model_epoch_cumulatiove_base + epoch)))
Пример #26
0
def main(args: DictConfig):

    # Non-strict access to fields
    OmegaConf.set_struct(args, False)

    # Adding default estimator params
    default_names, _, _, default_values, _, _, _ = \
        inspect.getfullargspec(instantiate(args.estimator, context_size=0).__class__.__init__)
    if default_values is not None:
        args.estimator['defaults'] = {
            n: str(v)
            for (n, v) in zip(
                default_names[len(default_names) -
                              len(default_values):], default_values)
        }
    logger.info(OmegaConf.to_yaml(args, resolve=True))

    # Data-generating DAG
    data_path = hydra.utils.to_absolute_path(
        f'{ROOT_PATH}/{args.data.relative_path}')
    exp_name = args.data.relative_path.split('/')[-1]
    adjacency_matrix = np.load(
        f'{data_path}/DAG{args.data.sample_ind}.npy').astype(int)
    if exp_name == 'sachs_2005':
        var_names = np.load(f'{data_path}/sachs-header.npy')
    else:
        var_names = [f'x{i}' for i in range(len(adjacency_matrix))]
    dag = DirectedAcyclicGraph(adjacency_matrix, var_names)

    # Experiment tracking
    mlflow.set_tracking_uri(args.exp.mlflow_uri)
    mlflow.set_experiment(exp_name)

    # Checking if run exist
    if check_existing_hash(args, exp_name):
        logger.info('Skipping existing run.')
        return
    else:
        logger.info('No runs found - perfoming one.')

    # Loading Train-test data
    data = np.load(f'{data_path}/data{args.data.sample_ind}.npy')
    if args.data.standard_normalize:
        standard_normalizer = StandardScaler()
        data = standard_normalizer.fit_transform(data)
    data_train, data_test = train_test_split(data,
                                             test_size=args.data.test_ratio,
                                             random_state=args.data.split_seed)
    train_df = pd.DataFrame(data_train, columns=dag.var_names)
    test_df = pd.DataFrame(data_test, columns=dag.var_names)

    mlflow.start_run()
    mlflow.log_params(flatten_dict(args))
    mlflow.log_param('data_generator/dag/n', len(var_names))
    mlflow.log_param('data_generator/dag/m', int(adjacency_matrix.sum()))
    mlflow.log_param('data/n_train', len(train_df))
    mlflow.log_param('data/n_test', len(test_df))

    # Saving artifacts
    train_df.to_csv(
        hydra.utils.to_absolute_path(f'{mlflow.get_artifact_uri()}/train.csv'),
        index=False)
    test_df.to_csv(
        hydra.utils.to_absolute_path(f'{mlflow.get_artifact_uri()}/test.csv'),
        index=False)
    dag.plot_dag()
    plt.savefig(
        hydra.utils.to_absolute_path(f'{mlflow.get_artifact_uri()}/dag.png'))
    if len(dag.var_names) <= 20:
        df = pd.concat([train_df, test_df],
                       keys=['train',
                             'test']).reset_index().drop(columns=['level_1'])
        g = sns.pairplot(df, plot_kws={'alpha': 0.25}, hue='level_0')
        g.fig.suptitle(exp_name)
        plt.savefig(
            hydra.utils.to_absolute_path(
                f'{mlflow.get_artifact_uri()}/data.png'))

    metrics = {}

    for var_ind, target_var in enumerate(dag.var_names):

        var_results = {}

        # Considering all the variables for input
        input_vars = [var for var in dag.var_names if var != target_var]
        y_train, X_train = train_df.loc[:,
                                        target_var].values, train_df.loc[:,
                                                                         input_vars].values
        y_test, X_test = test_df.loc[:,
                                     target_var].values, test_df.loc[:,
                                                                     input_vars].values

        # Initialising risks
        risks = {}
        for risk in args.predictors.risks:
            risks[risk] = getattr(importlib.import_module('sklearn.metrics'),
                                  risk)

        # Fitting predictive model
        models = {}
        for pred_model in args.predictors.pred_models:
            logger.info(
                f'Fitting {pred_model._target_} for target = {target_var} and inputs {input_vars}'
            )
            model = instantiate(pred_model)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            models[pred_model._target_] = model
            for risk, risk_func in risks.items():
                var_results[f'test_{risk}_{pred_model._target_}'] = risk_func(
                    y_test, y_pred)

        sampler = instantiate(args.estimator.sampler,
                              X_train=X_train,
                              fit_method=args.estimator.fit_method,
                              fit_params=args.estimator.fit_params)

        # =================== Relative feature importance ===================
        # 1. G = MB(target_var), FoI = input_vars / MB(target_var)
        G_vars_1 = list(dag.get_markov_blanket(target_var))
        fsoi_vars_1 = [
            var for var in input_vars
            if var not in list(dag.get_markov_blanket(target_var))
        ]
        prefix_1 = 'mb'

        # 2. G = input_vars / MB(target_var), FoI = MB(target_var)
        fsoi_vars_2 = list(dag.get_markov_blanket(target_var))
        G_vars_2 = [
            var for var in input_vars
            if var not in list(dag.get_markov_blanket(target_var))
        ]
        prefix_2 = 'non_mb'

        for (G_vars, fsoi_vars, prefix) in zip([G_vars_1, G_vars_2],
                                               [fsoi_vars_1, fsoi_vars_2],
                                               [prefix_1, prefix_2]):
            G = search_nonsorted(input_vars, G_vars)
            fsoi = search_nonsorted(input_vars, fsoi_vars)

            rfi_gof_metrics = {}
            for f, f_var in zip(fsoi, fsoi_vars):
                estimator = sampler.train([f], G)

                # GoF diagnostics
                rfi_gof_results = {}
                if estimator is not None:

                    rfi_gof_results[f'rfi/gof/{prefix}_mean_log_lik'] = \
                        estimator.log_prob(inputs=X_test[:, f], context=X_test[:, G]).mean()

                rfi_gof_metrics = {
                    k: rfi_gof_metrics.get(k, []) +
                    [rfi_gof_results.get(k, np.nan)]
                    for k in set(
                        list(rfi_gof_metrics.keys()) +
                        list(rfi_gof_results.keys()))
                }

            # Feature importance
            if len(fsoi) > 0:
                var_results[f'rfi/{prefix}_cond_size'] = len(G_vars)

                for model_name, model in models.items():
                    for risk, risk_func in risks.items():

                        rfi_explainer = explainer.Explainer(
                            model.predict,
                            fsoi,
                            X_train,
                            sampler=sampler,
                            loss=risk_func,
                            fs_names=input_vars)
                        mb_explanation = rfi_explainer.rfi(
                            X_test, y_test, G, nr_runs=args.exp.rfi.nr_runs)
                        var_results[f'rfi/{prefix}_mean_rfi_{risk}_{model_name}'] = \
                            np.abs(mb_explanation.fi_vals(return_np=True)).mean()

                var_results = {
                    **var_results,
                    **{
                        k: np.nanmean(v) if len(G_vars) > 0 else np.nan
                        for (k, v) in rfi_gof_metrics.items()
                    }
                }

        # TODO  =================== Global SAGE ===================

        mlflow.log_metrics(var_results, step=var_ind)

        metrics = {
            k: metrics.get(k, []) + [var_results.get(k, np.nan)]
            for k in set(list(metrics.keys()) + list(var_results.keys()))
        }

    # Logging mean statistics
    mlflow.log_metrics({k: np.nanmean(v)
                        for (k, v) in metrics.items()},
                       step=len(dag.var_names))
    mlflow.end_run()
def sociable_weavers_transforms(transform_config):
    """
    Creating transforms for birds classification
    Training:
        1. Resize to (image_size, image_size) pixels
        2. Apply random rotation for +-[rotation_angle] degrees
        3. Apply with [transforms_prob] gaussian blurring with [blur_kernel_size] and variance~U[[min_noise_variance],[max_noise_variance]]
        4. Random resized crop to (net_input_size, net_input_size) pixels (according to the network's input size)
        with ratio=(1.0,1.0) and scale=(0.8,1.2)
        5. Applying random horizontal flip
        6. Transforming to tensor type
        7. Normalizing according to mu=[dataset_means], sigma=[dataset_stds]
        Validation:
        1. Resize to (image_size, image_size) pixels
        2. Center crop to (net_input_size, net_input_size) pixels (according to the network's input size)
        3. Transforming to tensor type
        4. Normalizing according to mu=[dataset_means], sigma=[dataset_stds]
    """
    mlflow.log_params({
        'rotation_angle':
        int(transform_config['rotation_angle']),
        'blur_kernel_size':
        int(transform_config['blur_kernel_size']),
    })
    im_size = int(transform_config['image_size'])
    input_size = int(transform_config['net_input_size'])
    dataset_mean = json.loads(transform_config['dataset_means'])
    dataset_std = json.loads(transform_config['dataset_stds'])
    normalize = transforms.Normalize(dataset_mean, dataset_std)
    transforms_prob = float(transform_config['transforms_prob'])
    min_std = float(transform_config['blur_min_std'])
    max_std = float(transform_config['blur_max_std'])
    mlflow.log_param('Gaussian blur kernel min std', min_std)
    mlflow.log_param('Gaussian blur kernel max std', max_std)
    max_noise_variance = float(transform_config['max_noise_variance'])
    min_noise_variance = float(transform_config['min_noise_variance'])

    return {
        TRAIN_PHASE:
        transforms.Compose([
            transforms.Resize([im_size, im_size]),
            transforms.RandomRotation(int(transform_config['rotation_angle'])),
            transforms.RandomApply([
                transforms.GaussianBlur(int(
                    transform_config['blur_kernel_size']),
                                        sigma=(min_std, max_std))
            ],
                                   p=transforms_prob),
            transforms.RandomResizedCrop(input_size,
                                         scale=(0.8, 1.2),
                                         ratio=(1.0, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            # transforms.RandomApply([transforms.Lambda(
            #     lambda x: x + (float(torch.rand(1)) * (
            #                 max_noise_variance - min_noise_variance) + min_noise_variance ** 0.5) * torch.randn(3,
            #                                                                                                     input_size,
            #                                                                                                     input_size))],
            #     p=transforms_prob),
            normalize
        ]),
        TEST_PHASE:
        transforms.Compose([
            transforms.Resize(im_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(), normalize
        ])
    }
Пример #28
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
    parser.add_argument(
        "--batch-size",
        type=int,
        default=64,
        metavar="N",
        help="input batch size for training (default: 64)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=1000,
        metavar="N",
        help="input batch size for testing (default: 1000)",
    )
    parser.add_argument(
        "--epochs", type=int, default=5, metavar="N", help="number of epochs to train (default: 5)"
    )
    parser.add_argument(
        "--lr", type=float, default=1.0, metavar="LR", help="learning rate (default: 1.0)"
    )
    parser.add_argument(
        "--gamma",
        type=float,
        default=0.7,
        metavar="M",
        help="Learning rate step gamma (default: 0.7)",
    )
    parser.add_argument("--seed", type=int, default=1, metavar="S", help="random seed (default: 1)")

    parser.add_argument("--run-name", type=str, default=None, help="Mlflow run name.")

    parser.add_argument(
        "--save-model", action="store_true", default=False, help="For Saving the current Model"
    )
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    dataset = functools.partial(
        datasets.MNIST,
        root="../data",
        download=True,
        transform=transforms.Compose(
            [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
        ),
    )
    train_loader = torch.utils.data.DataLoader(
        dataset(train=True), batch_size=args.batch_size, shuffle=True
    )
    test_loader = torch.utils.data.DataLoader(
        dataset(train=False), batch_size=args.test_batch_size, shuffle=True
    )

    model = modelling.Net()
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    mlflow.set_experiment("MNIST CNN")
    with mlflow.start_run(run_name=args.run_name):
        mlflow.log_params(vars(args))
        for epoch in range(1, args.epochs + 1):
            train(args, model, train_loader, optimizer, epoch)
            test(args, model, test_loader)
            scheduler.step()

        if args.save_model:
            model_path = "mnist_cnn.pt"
            torch.save(model.state_dict(), model_path)
        mlflow.log_artifact(model_path)
Пример #29
0
def _mlflow_log_params(params_dict):
    mlflow.log_params({
        "pytorch version": torch.__version__,
        "ignite version": ignite.__version__,
    })
    mlflow.log_params(params_dict)
Пример #30
0
def main():

    parser = argparse.ArgumentParser(description="PyTorch CIFAR10 Training")
    parser.add_argument("--model",
                        default="resnet56",
                        type=str,
                        help="Model architecture")
    parser.add_argument("--dataset",
                        type=str,
                        default="CIFAR10",
                        help="Name of the dataset")
    parser.add_argument("--batch",
                        type=int,
                        default=128,
                        help="Test batch size")
    parser.add_argument("--perturb",
                        type=float,
                        default=10.0,
                        help="Magnitude of noise to the input")
    parser.add_argument("--lr", default=1e-4, type=float, help="learning rate")
    parser.add_argument("--epochs",
                        type=int,
                        default=20,
                        help="Number of epochs to fine tune")
    args = parser.parse_args()

    EXPERIMENT_NAME = "Entropy Minimization"

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("==> Preparing data..")
    trainloader, testloader = read_vision_dataset("./data",
                                                  batch_size=args.batch,
                                                  dataset=args.dataset)

    print("==> Building model..")
    net = resnet.__dict__[args.model]()
    net = net.to(device)
    net = torch.nn.DataParallel(net)
    # cudnn.benchmark = True

    checkpoint = torch.load("ckpt.pth", map_location=device)
    net.load_state_dict(checkpoint["net"])
    net.eval()
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=5e-4)

    experiment_id = mlflow.set_experiment(EXPERIMENT_NAME)
    with mlflow.start_run(experiment_id=experiment_id):
        log_params(vars(args))
        for epoch in range(1, args.epochs + 1):

            train(
                net,
                args.perturb,
                optimizer,
                testloader,
                device,
                epoch,
            )
            test(net, testloader, device, epoch)

        mlflow.pytorch.log_model(net, artifact_path="tuned model")