예제 #1
0
def offset_eval(runs):
    summaries = []
    for run in runs:
        run_info, model, loader = load_run(run, data=args.data, offset='all')
        params = run_info[-1]
        dataset = loader.dataset

        _, targets, confidences = predict(model, loader, cuda=params['cuda'])

        n_samples = len(dataset) // dataset.skip
        targets = targets[:n_samples]
        confidences = np.concatenate(confidences, axis=0)
        confidences = confidences.reshape(dataset.skip, n_samples,
                                          -1).mean(axis=0)
        predictions = np.argmax(confidences, axis=1)
        multi_offset_accuracy = accuracy_score(targets, predictions)
        summary = get_run_summary(run_info,
                                  multi_offset_acc=multi_offset_accuracy)
        summaries.append(summary)

    summary = pd.concat(summaries,
                        ignore_index=True).sort_values('multi_offset_acc',
                                                       ascending=False)
    if args.output:
        summary.to_csv(args.output, index=False)
    else:
        with pd.option_context('display.width', None), \
             pd.option_context('max_columns', None):
            print(summary)
예제 #2
0
def main(args):
    run_info, model, loader = load_run(args.run_dir, data=args.data)
    params = run_info[-1]

    features = []
    for x, _ in tqdm(loader):
        if params['cuda']:
            x = x.cuda()
        x = Variable(x, volatile=True)
        f = model.extract(x).data
        features.append(f.cpu().numpy().squeeze())

    if args.format == 'jan':
        with open(args.output, 'w') as f:
            for sample, feature in tqdm(zip(loader.dataset.data, features),
                                        total=len(features)):
                f.write(
                    '#objectKey messif.objects.keys.AbstractObjectKey {}\n'.
                    format(sample['id']))
                feature.tofile(f, sep=',')
                f.write('\n')
        return

    if args.format == 'numpy':
        features = np.stack(features)
        np.save(args.output, features)
        return
예제 #3
0
def main_old(args):
    dataset = MotionDataset('data/split1.pkl', fps=10)
    actions = dataset.actions.keys()
    run_info, model, loader = load_run(args.run_dir, args.data, keep_actions=actions)
    params = run_info[-1]

    out = os.path.join(args.run_dir, 'time-analysis.pdf')
    with PdfPages(out) as pdf:
        for i in tqdm(range(len(loader.dataset))):
            x, annotations = loader.dataset[i]
            if params['cuda']:
                x = x.cuda()
            x = Variable(x, volatile=True)

            outs = model.steps_forward(x)
            head = params.get('head', 'softmax')
            if head == 'softmax':
                outs = torch.nn.functional.softmax(outs, dim=1)
            elif head == 'sigmoid':
                outs = torch.nn.functional.sigmoid(outs)

            confidences = outs.data.cpu().numpy()  # [:, y]
            n_samples, n_classes = outs.shape
            time = np.arange(n_samples)
            # time = torch.linspace(0, 1, seq_len).numpy()

            groundtruth = np.zeros_like(confidences, dtype=int)
            for a in annotations:
                class_id = loader.dataset.action_id_to_ix[a['action_id']]
                start = int(round(a['start_frame'] / loader.dataset.skip))
                end = int(round((a['start_frame'] + a['duration']) / loader.dataset.skip))
                groundtruth[start:end, class_id] = 1

            cmap = plt.get_cmap('jet')
            colors = cmap(np.linspace(0, 1.0, n_classes))
            fig, axes = plt.subplots(3, 1)

            for ax in axes:
                ax.set_ylim([0, 1])
                ax.set_color_cycle(colors)

            (ax1, ax2, ax3) = axes
            ax1.plot(time, confidences)
            ax2.plot(time, groundtruth)
            ax3.plot(time, confidences * groundtruth)
            pdf.savefig()
            # plt.savefig('time-analysis.pdf')
            plt.close()
예제 #4
0
def confusion_plot(runs):
    for run in runs:
        run_info, model, loader = load_run(run, data=args.data)
        run_dir, _, label, _, params = run_info
        loader = loader[1]
        dataset = loader.dataset

        predictions, targets, _ = predict(model, loader, cuda=params['cuda'])
        overall_accuracy = accuracy_score(targets, predictions)
        confusion = confusion_matrix(targets, predictions)
        mask = confusion == 0
        # Normalize it
        # confusion = confusion.astype('float') / confusion.sum(axis=1)[:, None]
        # fig, ax = plt.subplots()
        # im = ax.imshow(confusion, interpolation='nearest', cmap=plt.cm.Blues)
        # fig.colorbar(im)

        plt.figure(figsize=(30, 30))
        plt.title('{}: (Overall Accuracy: {:4.2%}'.format(
            label, overall_accuracy))
        ax = sns.heatmap(confusion, annot=True, fmt='d', mask=mask, cbar=False)
        classes = dataset.action_descriptions
        tick_marks = np.arange(len(classes))
        for axis in [ax.xaxis, ax.yaxis]:
            axis.set_ticks(tick_marks + 0.5, minor=True)
            axis.set(ticks=tick_marks, ticklabels=classes)

        labels = ax.get_xticklabels()
        for label in labels:
            label.set_rotation(90)
        plt.tight_layout()
        ax.set_ylabel('True label')
        ax.set_xlabel('Predicted label')
        ax.grid(True, which='minor')

        plot_fname = os.path.join(run_dir, 'confusion.pdf')
        plt.savefig(plot_fname, bbox_inches='tight')
        plt.close()

        del model, loader, predictions, targets
for world_size in ((5, 5), ):
    dataset_name = '{}x{}_greedy_random'.format(*world_size)
    results_folder = base_path / '{}x{}_greedy_test'.format(*world_size)
    if results_folder.exists():
        results_folder.rmdir()  # will throw error if not empty
    os.makedirs(str(results_folder))
    for episodes in (1, 5, 20, 50, 200):
        adhoc = AdhocAgent(3,
                           mcts_c=mcts_c,
                           mcts_k=mcts_k,
                           mcts_n=mcts_n,
                           behavior_model_size=bsize,
                           environment_model_size=esize)
        load_run(dataset_folder / dataset_name,
                 adhoc,
                 episodes,
                 fit=False,
                 compute_metrics=False)
        adhoc_filename = 'adhoc_' + str(episodes)
        adhoc.save(adhoc_filename)
        for j in range(n_threads):
            threads.append(
                pool.apply_async(run,
                                 args=(progress_q, results_q, j,
                                       adhoc_filename, episodes,
                                       results_folder, world_size)))

for thread in threads:
    thread.get()

progress_q.put(None)
예제 #6
0
def main(args):

    if args.delay:
        delay_plot(args)
        return

    if args.pr:
        targets, predictions, _ = get_predictions(args.run_dir, stream=False)
        pr_fname = os.path.join(args.run_dir, 'pr.npz')
        _ = find_thresholds(targets, predictions, stream=False, pr=pr_fname)
        return

    if args.compute_metrics or args.plot_predictions:
        run_info, model, loader = load_run(args.run_dir, data=args.data)
        params = run_info[-1]
        labels = np.array([
            a.replace('hdm05_', '')
            for a in loader[1].dataset.action_descriptions
        ])

    if args.compute_metrics:
        rows = []
        thr_tab = pd.DataFrame(
            index=labels,
            columns=pd.MultiIndex.from_product([['fair', 'unfair'],
                                                ['stream', 'sequences']]))
        for stream, fair in itertools.product((False, True), repeat=2):

            targets, predictions, annot_time = get_predictions(
                args.run_dir, stream=stream, force=args.force)
            thr_targets, thr_predictions = targets, predictions
            if fair:
                thr_targets, thr_predictions, _ = get_predictions(
                    args.run_dir, train=True, stream=stream, force=args.force)
                train_targets = thr_targets

            print('Stream: {} Fair: {}'.format(stream, fair))
            thrs = find_thresholds(thr_targets, thr_predictions, stream=stream)

            thr_tab[(('fair' if fair else 'unfair'),
                     ('stream' if stream else 'sequences'))] = thrs[1]
            metrics = compute_metrics(targets,
                                      predictions,
                                      thrs,
                                      stream=stream)
            row = (stream, fair) + metrics + (annot_time, )
            rows.append(row)

        thr_tab['train_support'] = train_targets.sum(axis=0)
        thr_tab['test_support'] = targets.sum(axis=0)
        thresholds_file = os.path.join(args.run_dir, 'thresholds.csv')
        thr_tab.to_csv(thresholds_file)

        columns = ('Stream', 'Fair', 'microAP', 'macroAP', 'microF1',
                   'macroF1', 'catMicroF1', 'catMacroF1', 'AnnotTime')

        metrics = pd.DataFrame.from_records(rows, columns=columns)
        metrics_file = os.path.join(args.run_dir, 'metrics.csv')
        metrics.to_csv(metrics_file)
        print(metrics)

    if args.plot_predictions:
        stream, fair = False, False
        targets, predictions, annot_time = get_predictions(args.run_dir,
                                                           stream=stream,
                                                           force=args.force)
        thr_targets, thr_predictions = targets, predictions
        if fair:
            thr_targets, thr_predictions, _ = get_predictions(args.run_dir,
                                                              train=True,
                                                              stream=stream,
                                                              force=args.force)
            train_targets = thr_targets

        thrs = find_thresholds(thr_targets, thr_predictions, stream=stream)

        global_thr, multiple_thrs = thrs
        out = os.path.join(args.run_dir, 'time-analysis.pdf')
        seq_ids = [
            int(loader[1].dataset.data[i]['seq_id'])
            for i in range(len(targets))
        ]

        plot_preditctions(targets, predictions, seq_ids, labels, global_thr,
                          out)
        return
예제 #7
0
parser.add_argument('--queries', required=True, help='Queries file')
parser.add_argument('--run', required=True, help='Run file')
parser.add_argument('--metadata', required=True, help='Metadata file')
parser.add_argument('--k',
                    type=int,
                    default=10,
                    help='number of documents per query to print.')
parser.add_argument('--abstract',
                    action='store_true',
                    default=False,
                    help='Print abstract.')

args = parser.parse_args()

queries = utils.load_queries(args.queries)
run = utils.load_run(args.run)
metadata = load_metadata(args.metadata)

for query_id, (query, question) in queries.items():
    if query_id not in run:
        print(f'>> Missing query_id: {query_id}')
        continue

    print(f'query id: {query_id} | query: {query} | question: {question}')
    output = 'rank | doc_id | title'
    if args.abstract:
        output += ' | abstract'
    print(output)
    for rank, doc_id in enumerate(run[query_id][:args.k]):
        title, abstract = metadata[doc_id]
        output = [str(rank + 1), doc_id, title]
예제 #8
0
def main(args):
    run_info, model, loader = load_run(args.run_dir, data=args.data)
    params = run_info[-1]

    out = os.path.join(args.run_dir, 'time-analysis.pdf')
    labels = np.array([a.replace('hdm05_', '') for a in loader.dataset.action_descriptions])

    best_f1s = []
    targets = []
    predictions = []
    with PdfPages('/tmp/app.pdf') as pdf:
        for i, (x, y) in enumerate(tqdm(loader)):
            y = y.numpy().squeeze()
            targets.append(y)
            if params['cuda']:
                x = x.cuda()

            x = Variable(x, volatile=True)

            logits = model.segment(x)
            y_hat = torch.nn.functional.sigmoid(logits)

            y_hat = y_hat.data.cpu().numpy().squeeze()  # [:, y]
            predictions.append(y_hat)

            n_samples, n_classes = y_hat.shape
            time = np.arange(n_samples)
            # time = torch.linspace(0, 1, seq_len).numpy()

            # np.savez('segmentation_outs_n_preds.npz', y=y, y_hat=y_hat)
            # break

            ap = average_precision_score(y, y_hat, average='micro')
            p, r, t = precision_recall_curve(y.ravel(), y_hat.ravel())
            t = np.insert(t, 0, 0)

            f1 = 2 * (p * r) / (p + r)
            best_f1, best_thr = max(zip(f1, t))

            best_f1s.append(best_f1)
            cmap = plt.get_cmap('jet')
            colors = cmap(np.linspace(0, 1.0, n_classes))
            fig, axes = plt.subplots(3, 1)

            for ax in axes:
                ax.set_ylim([0, 1.1])
                ax.set_prop_cycle('color', colors)

            (ax1, ax2, ax3) = axes
            # (ax1, ax2) = axes
            ax1.set_title('Prediction [AP={:.1%}, F1={:.1%} (thr={})]'.format(ap, best_f1, best_thr))
            ax1.plot(time, y_hat, label=labels)
            ax2.set_title('Groundtruth ({})'.format(loader.dataset.data[i]['seq_id']))
            lines = ax2.plot(time, y)
            ax3.set_title('Masked Prediction')
            lines = ax3.plot(time, y_hat * y)

            legends_ix = set(y.sum(axis=0).nonzero()[0].tolist() +
                             (y_hat > 0.2).sum(axis=0).nonzero()[0].tolist())

            legends_ix = np.array(list(legends_ix))
            lines = np.array(lines)

            lines = lines[legends_ix]
            legends = labels[legends_ix]

            sns.despine()
            lgd = ax2.legend(lines, legends, loc='center', ncol=6, bbox_to_anchor=(0.5, -0.42))
            plt.tight_layout()
            pdf.savefig(bbox_extra_artists=(lgd,), bbox_inches='tight')
            # plt.savefig('time-analysis.pdf')
            plt.close()

    best_f1s = np.array(best_f1s)
    order = np.argsort(best_f1s)[::-1] + 1
    order = " ".join(map(str, order))
    os.system('pdftk /tmp/app.pdf cat {} output {}'.format(order, out))

    targets = np.concatenate(targets, axis=0)
    predictions = np.concatenate(predictions, axis=0)

    p, r, t = precision_recall_curve(targets.ravel(), predictions.ravel())
    t = np.insert(t, 0, 0)

    f1 = 2 * (p * r) / (p + r)
    best_f1, best_thr = max(zip(f1, t))

    print('Single Thr F1: {} {}'.format(best_f1, best_thr))

    cat_f1s = []
    cat_thr = []
    for i in range(n_classes):
        p, r, t = precision_recall_curve(targets[:, i], predictions[:, i])
        f1 = 2 * (p * r) / (p + r)
        t = np.insert(t, 0, 0)
        b_f1, b_thr = max(zip(f1, t))
        cat_f1s.append(b_f1)
        cat_thr.append(b_thr)

    data = pd.DataFrame(dict(BestF1=cat_f1s, Threshold=cat_thr), index=labels)
    print(data)
    support = targets.sum(axis=0)
    avgF1 = (data['BestF1'].values * support).sum() / support.sum()
    print(avgF1)
예제 #9
0
def main(_):
    bert_config = modeling.BertConfig.from_json_file(config_dict[FLAGS.model_size])

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tpu_cluster_resolver = None
    if use_tpu:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        keep_checkpoint_max=1,
        model_dir=FLAGS.output_path,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=iterations_per_loop,
            num_shards=num_tpu_cores,
            per_host_input_for_training=is_per_host))

    model_fn = model_fn_builder(
        bert_config=bert_config,
        num_labels=2,
        init_checkpoint=init_checkpoint,
        use_tpu=use_tpu,
        use_one_hot_embeddings=use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.batch_size,
        eval_batch_size=FLAGS.batch_size,
        predict_batch_size=FLAGS.batch_size,
        params={"qc_scores": "qc_scores"})

    tf.logging.info("***** Running evaluation *****")
    tf.logging.info("  Batch size = %d", FLAGS.batch_size)

    for split in ["valid", "test"]:

        maxp_run = load_run(os.path.join(FLAGS.first_model_path, "{}_{}_result.trec".format(FLAGS.dataset, split)))

        query_docids_map = []
        data_path = os.path.join(FLAGS.output_path, "rerank-{0}_kc-{1}".format(FLAGS.rerank_num, FLAGS.kc), "data")
        result_path = os.path.join(FLAGS.output_path, "rerank-{0}_kc-{1}".format(FLAGS.rerank_num, FLAGS.kc), "result")
        if not tf.gfile.Exists(result_path):
            tf.gfile.MakeDirs(result_path)

        with tf.gfile.Open(os.path.join(data_path, "chunk_passage_ids_{0}.txt".format(split))) as ref_file:
            for line in ref_file:
                query_docids_map.append(line.strip().split("\t"))

        predict_input_fn = input_fn_builder(
            dataset_path=os.path.join(data_path, "chunk_passage_{0}.tf".format(split)),
            is_training=False,
            seq_length=FLAGS.max_seq_length,
            drop_remainder=False)

        total_count = 0

        result_file = tf.gfile.Open(os.path.join(result_path, "{0}_{1}_result.trec".format(FLAGS.dataset, split)), 'w')

        ckpt = tf.train.latest_checkpoint(checkpoint_dir=FLAGS.third_model_path)
        print("use latest ckpt: {0}".format(ckpt))

        result = estimator.predict(input_fn=predict_input_fn,
                                   yield_single_examples=True,
                                   checkpoint_path=ckpt)

        start_time = time.time()
        results = []
        result_dict = collections.OrderedDict()
        for item in result:

            results.append((item["qc_scores"], item["probs"]))
            total_count += 1

            if total_count == len(query_docids_map) or query_docids_map[total_count][0] != \
                    query_docids_map[total_count - 1][0]:

                chunk_num = len(results) // FLAGS.rerank_num
                assert chunk_num <= FLAGS.kc

                qc_scores, probs = list(zip(*results))
                qc_scores = np.stack(qc_scores)
                cp_scores = np.stack(probs)[:, 1]

                qc_scores = np.reshape(qc_scores, [FLAGS.rerank_num, chunk_num])
                cp_scores = np.reshape(cp_scores, [FLAGS.rerank_num, chunk_num])

                # softmax normalization
                qc_scores = softmax(qc_scores, axis=-1)

                scores = np.sum(np.multiply(qc_scores, cp_scores), axis=-1, keepdims=False)

                start_idx = total_count - FLAGS.rerank_num * chunk_num
                end_idx = total_count
                query_ids, chunk_ids, passage_ids, labels, qc_scores = zip(*query_docids_map[start_idx:end_idx])
                assert len(set(query_ids)) == 1, "Query ids must be all the same."
                query_id = query_ids[0]

                candidate_docs = list()
                for pid in passage_ids:
                    doc_id = pid.split("_")[0]
                    if doc_id not in candidate_docs:
                        candidate_docs.append(doc_id)

                result_dict[query_id] = dict()

                for i, doc in enumerate(candidate_docs):
                    result_dict[query_id][doc] = scores[i]

                rerank_list = sorted(result_dict[query_id].items(), key=lambda x: x[1], reverse=True)

                last_score = rerank_list[-1][1]
                for doc in maxp_run[query_id][FLAGS.rerank_num:]:
                    current_score = last_score - 0.01
                    result_dict[query_id][doc] = current_score
                    last_score = current_score

                ranking_list = sorted(result_dict[query_id].items(), key=lambda x: x[1], reverse=True)

                for rank, (doc_id, score) in enumerate(ranking_list):
                    result_file.write(
                        "\t".join([query_id, "Q0", doc_id, str(rank + 1), str(score), "chunk_passage_PRF"]) + "\n")

                results = []

            if total_count % 1000 == 0:
                tf.logging.warn("Read {} examples in {} secs".format(
                    total_count, int(time.time() - start_time)))

        result_file.close()
        tf.logging.info("Done Evaluating!")
예제 #10
0
def main(_):
    print('Loading Tokenizer...')
    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab,
                                           do_lower_case=True)

    if not tf.gfile.Exists(FLAGS.output_path):
        tf.gfile.MakeDirs(FLAGS.output_path)

    qrels = None
    if FLAGS.qrels:
        qrels = load_qrels(path=FLAGS.qrels)

    if FLAGS.task == "passage":
        run = load_run(path=FLAGS.run_file)

        useful_docids = set()
        for ids in run.values():
            for docid in ids:
                useful_docids.add(docid)

        queries = load_queries(path=FLAGS.queries,
                               type="title",
                               dataset=FLAGS.dataset)
        data = merge(qrels=qrels, run=run, queries=queries)

        print('Loading Collection...')
        collection = load_collection(FLAGS.collection_file, FLAGS.dataset,
                                     useful_docids)

        print("queries_num:{}".format(len(queries)))

        print('Converting to TFRecord...')
        convert_dataset(main_path=FLAGS.output_path,
                        data=data,
                        collection=collection,
                        tokenizer=tokenizer)
    else:
        for split in ["valid", "test"]:
            run_file = os.path.join(
                FLAGS.first_model_path,
                "{0}_{1}_result.tsv".format(FLAGS.dataset, split))

            run = load_run(path=run_file, has_pid=True, return_pid=True)
            queries = load_queries(path=FLAGS.queries,
                                   type="title",
                                   dataset=FLAGS.dataset,
                                   fold=FLAGS.fold,
                                   split=split)
            data = merge(qrels=qrels, run=run, queries=queries)

            print('Loading Collection...')
            collection = load_two_columns_file(FLAGS.collection_file)

            print("queries_num:{}".format(len(queries)))

            print('Converting to TFRecord...')
            convert_dataset(main_path=os.path.join(FLAGS.output_path,
                                                   "fold-" + str(FLAGS.fold)),
                            data=data,
                            collection=collection,
                            tokenizer=tokenizer,
                            split=split)

    print('done!')