Python evaluation示例，utils.data_helpers.evaluation Python示例

示例#1

0

显示文件

文件： train.py 项目： T110E4/Question-Difficulty-Prediction

    def eval_model(val_loader, epoch):
        """
        Evaluate on the validation set.
        """
        net.eval()
        eval_loss = 0.0
        true_labels, predicted_scores = [], []
        for batch in val_loader:
            x_val_fb_content, x_val_fb_question, x_val_fb_option, \
            x_val_fb_clens, x_val_fb_qlens, x_val_fb_olens, y_val_fb = batch

            logits, scores = net(x_val_fb_content, x_val_fb_question, x_val_fb_option)
            avg_batch_loss = criterion(scores, y_val_fb)
            eval_loss = eval_loss + avg_batch_loss.item()
            for i in y_val_fb[0].tolist():
                true_labels.append(i)
            for j in scores[0].tolist():
                predicted_scores.append(j)

        # Calculate the Metrics
        eval_rmse = mean_squared_error(true_labels, predicted_scores) ** 0.5
        eval_r2 = r2_score(true_labels, predicted_scores)
        eval_pcc, eval_doa = dh.evaluation(true_labels, predicted_scores)
        eval_loss = eval_loss / len(val_loader)
        cur_value = eval_rmse
        logger.info("All Validation set: Loss {0:g} | PCC {1:.4f} | DOA {2:.4f} | RMSE {3:.4f} | R2 {4:.4f}"
                    .format(eval_loss, eval_pcc, eval_doa, eval_rmse, eval_r2))
        writer.add_scalar('validation loss', eval_loss, epoch)
        writer.add_scalar('validation PCC', eval_pcc, epoch)
        writer.add_scalar('validation DOA', eval_doa, epoch)
        writer.add_scalar('validation RMSE', eval_rmse, epoch)
        writer.add_scalar('validation R2', eval_r2, epoch)
        return cur_value

示例#2

0

显示文件

文件： train_rmidp.py 项目： T110E4/Question-Difficulty-Prediction

            def validation_step(x_val_content,
                                x_val_question,
                                x_val_option,
                                y_val,
                                writer=None):
                """Evaluates model on a validation set"""
                batches_validation = dh.batch_iter(
                    list(
                        zip(x_val_content, x_val_question, x_val_option,
                            y_val)), args.batch_size, 1)

                eval_counter, eval_loss = 0, 0.0

                true_labels = []
                predicted_scores = []

                for batch_validation in batches_validation:
                    x_batch_content, x_batch_question, x_batch_option, y_batch = zip(
                        *batch_validation)
                    feed_dict = {
                        rmidp.input_x_content: x_batch_content,
                        rmidp.input_x_question: x_batch_question,
                        rmidp.input_x_option: x_batch_option,
                        rmidp.input_y: y_batch,
                        rmidp.dropout_keep_prob: 1.0,
                        rmidp.is_training: False
                    }
                    step, summaries, scores, cur_loss = sess.run([
                        rmidp.global_step, validation_summary_op, rmidp.scores,
                        rmidp.loss
                    ], feed_dict)

                    # Prepare for calculating metrics
                    for i in y_batch:
                        true_labels.append(i)
                    for j in scores:
                        predicted_scores.append(j)

                    eval_loss = eval_loss + cur_loss
                    eval_counter = eval_counter + 1

                    if writer:
                        writer.add_summary(summaries, step)

                eval_loss = float(eval_loss / eval_counter)

                # Calculate PCC & DOA
                pcc, doa = dh.evaluation(true_labels, predicted_scores)
                # Calculate RMSE
                rmse = mean_squared_error(true_labels, predicted_scores)**0.5
                r2 = r2_score(true_labels, predicted_scores)

                return eval_loss, pcc, doa, rmse, r2

示例#3

0

显示文件

def test():
    logger.info("Loading Data...")
    logger.info("Data processing...")
    test_data = dh.load_data_and_labels(args.test_file, args.word2vec_file)
    logger.info("Data padding...")
    test_dataset = dh.MyData(test_data, args.pad_seq_len, device)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False)
    VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(
        args.word2vec_file)

    criterion = Loss()
    net = HMIDP(args, VOCAB_SIZE, EMBEDDING_SIZE,
                pretrained_word2vec_matrix).to(device)
    checkpoint_file = cm.get_best_checkpoint(CPT_DIR,
                                             select_maximum_value=False)
    checkpoint = torch.load(checkpoint_file)
    net.load_state_dict(checkpoint['model_state_dict'])
    net.eval()

    logger.info("Scoring...")
    true_labels, predicted_scores = [], []
    batches = trange(len(test_loader), desc="Batches", leave=True)
    for batch_cnt, batch in zip(batches, test_loader):
        x_test_fb_content, x_test_fb_question, x_test_fb_option, \
        x_test_fb_clens, x_test_fb_qlens, x_test_fb_olens, y_test_fb = batch
        logits, scores = net(x_test_fb_content, x_test_fb_question,
                             x_test_fb_option)
        for i in y_test_fb[0].tolist():
            true_labels.append(i)
        for j in scores[0].tolist():
            predicted_scores.append(j)

    # Calculate the Metrics
    test_rmse = mean_squared_error(true_labels, predicted_scores)**0.5
    test_r2 = r2_score(true_labels, predicted_scores)
    test_pcc, test_doa = dh.evaluation(true_labels, predicted_scores)
    logger.info(
        "All Test set: PCC {0:.4f} | DOA {1:.4f} | RMSE {2:.4f} | R2 {3:.4f}".
        format(test_pcc, test_doa, test_rmse, test_r2))
    logger.info('Test Finished.')

    logger.info('Creating the prediction file...')
    dh.create_prediction_file(save_dir=SAVE_DIR,
                              identifiers=test_data['f_id'],
                              predictions=predicted_scores)

    logger.info('All Finished.')

示例#4

0

显示文件

文件： test_rmidp.py 项目： T110E4/Question-Difficulty-Prediction

def test_rmidp():
    """Test RMIDP model."""
    # Print parameters used for the model
    dh.tab_printer(args, logger)

    # Load data
    logger.info("Loading data...")
    logger.info("Data processing...")
    test_data = dh.load_data_and_labels(args.test_file, args.word2vec_file, data_aug_flag=False)

    logger.info("Data padding...")
    x_test_content, x_test_question, x_test_option, y_test = dh.pad_data(test_data, args.pad_seq_len)

    # Load rmidp model
    OPTION = dh.option(pattern=1)
    if OPTION == 'B':
        logger.info("Loading best model...")
        checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True)
    else:
        logger.info("Loading latest model...")
        checkpoint_file = tf.train.latest_checkpoint(CPT_DIR)
    logger.info(checkpoint_file)

    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=args.allow_soft_placement,
            log_device_placement=args.log_device_placement)
        session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x_content = graph.get_operation_by_name("input_x_content").outputs[0]
            input_x_question = graph.get_operation_by_name("input_x_question").outputs[0]
            input_x_option = graph.get_operation_by_name("input_x_option").outputs[0]
            input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
            is_training = graph.get_operation_by_name("is_training").outputs[0]

            # Tensors we want to evaluate
            scores = graph.get_operation_by_name("output/scores").outputs[0]
            loss = graph.get_operation_by_name("loss/loss").outputs[0]

            # Split the output nodes name by '|' if you have several output nodes
            output_node_names = "output/scores"

            # Save the .pb model file
            output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def,
                                                                            output_node_names.split("|"))
            tf.train.write_graph(output_graph_def, "graph", "graph-rmidp-{0}.pb".format(MODEL), as_text=False)

            # Generate batches for one epoch
            batches = dh.batch_iter(list(zip(x_test_content, x_test_question, x_test_option, y_test)),
                                    args.batch_size, 1, shuffle=False)

            test_counter, test_loss = 0, 0.0

            # Collect the predictions here
            true_labels = []
            predicted_scores = []

            for batch_test in batches:
                x_batch_content, x_batch_question, x_batch_option, y_batch = zip(*batch_test)
                feed_dict = {
                    input_x_content: x_batch_content,
                    input_x_question: x_batch_question,
                    input_x_option: x_batch_option,
                    input_y: y_batch,
                    dropout_keep_prob: 1.0,
                    is_training: False
                }
                batch_scores, cur_loss = sess.run([scores, loss], feed_dict)

                # Prepare for calculating metrics
                for i in y_batch:
                    true_labels.append(i)
                for j in batch_scores:
                    predicted_scores.append(j)

                test_loss = test_loss + cur_loss
                test_counter = test_counter + 1

            # Calculate PCC & DOA
            pcc, doa = dh.evaluation(true_labels, predicted_scores)
            # Calculate RMSE
            rmse = mean_squared_error(true_labels, predicted_scores) ** 0.5
            r2 = r2_score(true_labels, predicted_scores)

            test_loss = float(test_loss / test_counter)

            logger.info("All Test Dataset: Loss {0:g} | PCC {1:g} | DOA {2:g} | RMSE {3:g} | R2 {4:g}"
                        .format(test_loss, pcc, doa, rmse, r2))

            # Save the prediction result
            if not os.path.exists(SAVE_DIR):
                os.makedirs(SAVE_DIR)
            dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", all_id=test_data.id,
                                      all_labels=true_labels, all_predict_scores=predicted_scores)

    logger.info("All Done.")

示例#5

0

显示文件

def test_hmidp():
    """Test HMIDP model."""

    # Load data
    logger.info("✔︎ Loading data...")
    logger.info("Recommended padding Sequence length is: {0}".format(
        FLAGS.pad_seq_len))

    logger.info("✔︎ Test data processing...")
    test_data = dh.load_data_and_labels(FLAGS.test_data_file,
                                        FLAGS.embedding_dim,
                                        data_aug_flag=False)

    logger.info("✔︎ Test data padding...")
    x_test_content, x_test_question, x_test_option, y_test = dh.pad_data(
        test_data, FLAGS.pad_seq_len)

    # Load hmidp model
    BEST_OR_LATEST = input("☛ Load Best or Latest Model?(B/L): ")

    while not (BEST_OR_LATEST.isalpha()
               and BEST_OR_LATEST.upper() in ['B', 'L']):
        BEST_OR_LATEST = input(
            "✘ The format of your input is illegal, please re-input: ")
    if BEST_OR_LATEST.upper() == 'B':
        logger.info("✔︎ Loading best model...")
        checkpoint_file = cm.get_best_checkpoint(FLAGS.best_checkpoint_dir,
                                                 select_maximum_value=True)
    else:
        logger.info("✔︎ Loading latest model...")
        checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
    logger.info(checkpoint_file)

    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = FLAGS.gpu_options_allow_growth
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph(
                "{0}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x_content = graph.get_operation_by_name(
                "input_x_content").outputs[0]
            input_x_question = graph.get_operation_by_name(
                "input_x_question").outputs[0]
            input_x_option = graph.get_operation_by_name(
                "input_x_option").outputs[0]
            input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name(
                "dropout_keep_prob").outputs[0]
            is_training = graph.get_operation_by_name("is_training").outputs[0]

            # Tensors we want to evaluate
            scores = graph.get_operation_by_name("output/scores").outputs[0]
            loss = graph.get_operation_by_name("loss/loss").outputs[0]

            # Split the output nodes name by '|' if you have several output nodes
            output_node_names = "output/scores"

            # Save the .pb model file
            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, sess.graph_def, output_node_names.split("|"))
            tf.train.write_graph(output_graph_def,
                                 "graph",
                                 "graph-hmidp-{0}.pb".format(MODEL),
                                 as_text=False)

            # Generate batches for one epoch
            batches = dh.batch_iter(list(
                zip(x_test_content, x_test_question, x_test_option, y_test)),
                                    FLAGS.batch_size,
                                    1,
                                    shuffle=False)

            test_counter, test_loss = 0, 0.0

            # Collect the predictions here
            true_labels = []
            predicted_scores = []

            for batch_test in batches:
                x_batch_content, x_batch_question, x_batch_option, y_batch = zip(
                    *batch_test)
                feed_dict = {
                    input_x_content: x_batch_content,
                    input_x_question: x_batch_question,
                    input_x_option: x_batch_option,
                    input_y: y_batch,
                    dropout_keep_prob: 1.0,
                    is_training: False
                }
                batch_scores, cur_loss = sess.run([scores, loss], feed_dict)

                # Prepare for calculating metrics
                for i in y_batch:
                    true_labels.append(i)
                for j in batch_scores:
                    predicted_scores.append(j)

                test_loss = test_loss + cur_loss
                test_counter = test_counter + 1

            # Calculate PCC & DOA
            pcc, doa = dh.evaluation(true_labels, predicted_scores)
            # Calculate RMSE
            rmse = mean_squared_error(true_labels, predicted_scores)**0.5

            test_loss = float(test_loss / test_counter)

            logger.info(
                "☛ All Test Dataset: Loss {0:g} | PCC {1:g} | DOA {2:g} | RMSE {1:g}"
                .format(test_loss, pcc, doa, rmse))

            # Save the prediction result
            if not os.path.exists(SAVE_DIR):
                os.makedirs(SAVE_DIR)
            dh.create_prediction_file(output_file=SAVE_DIR +
                                      "/predictions.json",
                                      all_id=test_data.id,
                                      all_labels=true_labels,
                                      all_predict_scores=predicted_scores)

    logger.info("✔︎ Done.")