Exemplo n.º 1
0
def test_unzip():
    def _to_lists(seq, n=10):
        """iter of iters -> finite list of finite lists
        """
        def initial(s):
            return list(take(n, s))

        return initial(map(initial, seq))

    def _assert_initial_matches(a, b, n=10):
        assert list(take(n, a)) == list(take(n, b))

    # Unzips a simple list correctly
    assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \
        == [['a', 'b', 'c'], [1, 2, 3]]

    # Can handle a finite number of infinite iterators (the naive unzip
    # implementation `zip(*args)` impelementation fails on this example).
    a, b, c = unzip(zip(count(1), repeat(0), repeat(1)))
    _assert_initial_matches(a, count(1))
    _assert_initial_matches(b, repeat(0))
    _assert_initial_matches(c, repeat(1))

    # Sensibly handles empty input
    assert list(unzip(zip([]))) == []
Exemplo n.º 2
0
def test_unzip():
    def _to_lists(seq, n=10):
        """iter of iters -> finite list of finite lists
        """
        def initial(s):
            return list(take(n, s))

        return initial(map(initial, seq))

    def _assert_initial_matches(a, b, n=10):
        assert list(take(n, a)) == list(take(n, b))

    # Unzips a simple list correctly
    assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \
        == [['a', 'b', 'c'], [1, 2, 3]]

    # Can handle a finite number of infinite iterators (the naive unzip
    # implementation `zip(*args)` impelementation fails on this example).
    a, b, c = unzip(zip(count(1), repeat(0), repeat(1)))
    _assert_initial_matches(a, count(1))
    _assert_initial_matches(b, repeat(0))
    _assert_initial_matches(c, repeat(1))

    # Sensibly handles empty input
    assert list(unzip(zip([]))) == []
Exemplo n.º 3
0
    def coll_stride(tokenizer, batch, max_len=1024, stride=256):
        def is_good_data(d):
            """ make sure data is not empty"""
            source_sents, extracts = d
            return source_sents and extracts

        @curry
        def prepro(tokenizer, d, max_len=1024, stride=256):
            """ make sure data is not empty"""
            source_sents, extracts = d
            tokenized_sents = [
                tokenizer.tokenize(source_sent.lower())
                for source_sent in source_sents
            ]
            tokenized_sents = [['[CLS]'] + tokenized_sent
                               for tokenized_sent in tokenized_sents]
            tokenized_sents = [
                tokenizer.convert_tokens_to_ids(tokenized_sent)
                for tokenized_sent in tokenized_sents
            ]
            word_num = [
                len(tokenized_sent) for tokenized_sent in tokenized_sents
            ]
            truncated_word_num = []
            total_count = 0
            for num in word_num:
                if total_count + num < max_len:
                    truncated_word_num.append(num)
                else:
                    truncated_word_num.append(max_len - total_count)
                    break
                total_count += num
            tokenized_sents = list(concat(tokenized_sents))[:max_len]
            tokenized_sents_lists = [tokenized_sents[:BERT_MAX_LEN]]
            length = len(tokenized_sents) - BERT_MAX_LEN
            i = 1
            while length > 0:
                tokenized_sents_lists.append(
                    tokenized_sents[(i * BERT_MAX_LEN -
                                     stride):((i + 1) * BERT_MAX_LEN -
                                              stride)])
                i += 1
                length -= (BERT_MAX_LEN - stride)
            abs_sents = tokenize(None, extracts)
            art_sents = tokenize(None, source_sents)

            return (art_sents, tokenized_sents_lists,
                    truncated_word_num), abs_sents

        art_batch, abs_batch = unzip(batch)
        art_batch, abs_batch = list(
            zip(*list(filter(is_good_data, zip(art_batch, abs_batch)))))
        art_sents, abs_sents = list(
            zip(*list(
                map(prepro(tokenizer, max_len=max_len, stride=stride),
                    zip(art_batch, abs_batch)))))
        return art_sents, abs_sents
 def coll(batch):
     art_batch, abs_batch, query_batch = unzip(batch)
     query_batch_list = list(query_batch)
     art_sents = list(filter(bool, map(tokenize(None), art_batch)))
     abs_sents = list(filter(bool, map(tokenize(None), abs_batch)))
     queries = list(
         filter(
             bool,
             map(tokenize(None), [[query] for query in query_batch_list])))
     return art_sents, abs_sents, queries
Exemplo n.º 5
0
    def coll(batch):
        def is_good_data(d):
            """ make sure data is not empty"""
            source_sents, extracts = d
            return source_sents and extracts

        art_batch, abs_batch = unzip(batch)
        art_batch, abs_batch = list(
            zip(*list(filter(is_good_data, zip(art_batch, abs_batch)))))
        art_sents = list(filter(bool, map(tokenize(None), art_batch)))
        abs_sents = list(filter(bool, map(tokenize(None), abs_batch)))
        return art_sents, abs_sents
def position_random_with_filter(tokens, filer_fn, window_size=1):
    windowed_tokens = more_itertools.windowed(tokens, window_size)
    # for t in tokens:
    #     print("tokens:{}, {}".format(t.value, filer_fn([t])))
    random_pos_list = unzip(filter(lambda x:filer_fn(x[1]), enumerate(windowed_tokens)))
    if random_pos_list:
        random_pos_list = list(random_pos_list[0])
    else:
        raise NotFoundChangePositionException
    pos = random.sample(random_pos_list, k=1)[0]
    token = tokens[pos]
    return token.lexpos, pos
Exemplo n.º 7
0
 def pad_collate(features):
     """ pad the input features to same length"""
     input_ids, input_masks, segment_ids, lm_label_ids = map(
         list, unzip(features))
     max_len = max(map(len, input_ids))
     for ids, masks, segs, labels in zip(input_ids, input_masks,
                                         segment_ids, lm_label_ids):
         while len(ids) < max_len:
             ids.append(0)
             masks.append(0)
             segs.append(0)
             labels.append(-1)
     input_ids = torch.tensor(input_ids)
     input_mask = torch.tensor(input_masks)
     segment_ids = torch.tensor(segment_ids)
     lm_label_ids = torch.tensor(lm_label_ids)
     return input_ids, input_mask, segment_ids, lm_label_ids
Exemplo n.º 8
0
    def coll(tokenizer, batch):
        def is_good_data(d):
            """ make sure data is not empty"""
            source_sents, extracts = d
            return source_sents and extracts

        @curry
        def prepro(tokenizer, d, max_len=512):
            """ make sure data is not empty"""
            source_sents, extracts = d
            tokenized_sents = [
                tokenizer.tokenize(source_sent.lower())
                for source_sent in source_sents
            ]
            tokenized_sents = [
                tokenized_sent + ['[SEP]']
                for tokenized_sent in tokenized_sents
            ]
            tokenized_sents[0] = ['[CLS]'] + tokenized_sents[0]
            word_num = [
                len(tokenized_sent) for tokenized_sent in tokenized_sents
            ]
            truncated_word_num = []
            total_count = 0
            for num in word_num:
                if total_count + num < max_len:
                    truncated_word_num.append(num)
                else:
                    truncated_word_num.append(512 - total_count)
                    break
                total_count += num
            tokenized_sents = list(concat(tokenized_sents))[:max_len]
            tokenized_sents = tokenizer.convert_tokens_to_ids(tokenized_sents)
            abs_sents = tokenize(None, extracts)
            art_sents = tokenize(None, source_sents)

            return (art_sents, tokenized_sents, truncated_word_num), abs_sents

        art_batch, abs_batch = unzip(batch)
        art_batch, abs_batch = list(
            zip(*list(filter(is_good_data, zip(art_batch, abs_batch)))))
        art_sents, abs_sents = list(
            zip(*list(map(prepro(tokenizer), zip(art_batch, abs_batch)))))
        return art_sents, abs_sents
Exemplo n.º 9
0
def fetch(pdu_type: PduType,
          df: Any,
          obj_type: Type[ObjectType],
          parameter: Optional[Text] = None,
          config: Optional[SnmpConfig] = None,
          **kwargs: Text) -> Tuple[Any, Sequence[SnmpError]]:
    """Fetch SNMP results and map to a DataFrame."""
    def _fetch() -> Iterator[Tuple[Any, Sequence[SnmpError]]]:
        for hosts, data, index in distribute(df, None, **kwargs):
            results, errors = distributed_fetch(pdu_type,
                                                hosts,
                                                obj_type,
                                                parameter,
                                                config=config)
            yield obj_type.to_pandas(results, data, index), errors

    result_dfs, errors_lists = unzip(list(_fetch()))

    return (pd.concat(result_dfs),
            [error for errors in errors_lists for error in errors])
Exemplo n.º 10
0
    def coll_sent(tokenizer, batch):
        def is_good_data(d):
            """ make sure data is not empty"""
            source_sents, extracts = d
            return source_sents and extracts

        @curry
        def prepro(tokenizer, d, max_len=150, max_sent_len=60):
            """ make sure data is not empty"""
            source_sents, extracts = d
            tokenized_sents = [
                tokenizer.tokenize(source_sent.lower())
                for source_sent in source_sents
            ]
            tokenized_sents = tokenized_sents[:max_sent_len]
            tokenized_sents = [['[CLS]'] + tokenized_sent[:max_len - 1]
                               for tokenized_sent in tokenized_sents]
            tokenized_sents = [
                tokenizer.convert_tokens_to_ids(tokenized_sent)
                for tokenized_sent in tokenized_sents
            ]
            word_num = [
                len(tokenized_sent) for tokenized_sent in tokenized_sents
            ]
            tokenized_sents = [
                tokenizer.convert_tokens_to_ids(tokenized_sent)
                for tokenized_sent in tokenized_sents
            ]
            abs_sents = tokenize(None, extracts)
            art_sents = tokenize(None, source_sents)

            return (art_sents, tokenized_sents, word_num), abs_sents

        art_batch, abs_batch = unzip(batch)
        art_batch, abs_batch = list(
            zip(*list(filter(is_good_data, zip(art_batch, abs_batch)))))
        art_sents, abs_sents = list(
            zip(*list(map(prepro(tokenizer), zip(art_batch, abs_batch)))))
        return art_sents, abs_sents
Exemplo n.º 11
0
    def pad_collate(features):
        """ pad the input features to same length"""
        # need to sort by src lens (support RNN encoder)
        features = sorted(features, key=_feature_sort_key, reverse=True)
        (ids, src_ids, src_lens, tgt_ids, topk_logits,
         topk_inds) = map(list, unzip(features))
        src_ids = pad_sequence(src_ids, batch_first=False,
                               padding_value=PAD).unsqueeze(2)
        src_len = torch.tensor(src_lens)
        tgt_ids = pad_sequence(tgt_ids, batch_first=False,
                               padding_value=PAD).unsqueeze(2)
        ids = torch.tensor(ids)
        # pad bert hiddens
        len_, batch, _ = tgt_ids.size()
        k = topk_logits[0].size(-1)
        topk_logit = torch.zeros(
            len_ - 1,  # minus BOS
            batch,
            k,
            dtype=topk_logits[0].dtype)
        topk_index = torch.zeros(
            len_ - 1,  # minus BOS
            batch,
            k,
            dtype=topk_inds[0].dtype)
        for i, (logit, index) in enumerate(zip(topk_logits, topk_inds)):
            topk_logit.data[:logit.size(0), i, :] = logit.data
            topk_index.data[:index.size(0), i, :] = index.data

        batch = InputFeatures(src=(src_ids, src_len),
                              tgt=tgt_ids,
                              indices=ids,
                              batch_size=len(ids),
                              topk_logit=topk_logit,
                              topk_indices=topk_index)
        return batch
Exemplo n.º 12
0
def paired_shuffle(iterable1, iterable2):
    """paired_shuffle."""
    i1i2 = list(zip(iterable1, iterable2))
    random.shuffle(i1i2)
    i1, i2 = unzip(i1i2)
    return list(i1), list(i2)
Exemplo n.º 13
0
def paired_shuffle(iterable1, iterable2):
    """paired_shuffle."""
    i1i2 = list(zip(iterable1, iterable2))
    random.shuffle(i1i2)
    i1, i2 = unzip(i1i2)
    return list(i1), list(i2)
 def coll(batch):
     art_batch, abs_batch, extracted = unzip(batch)
     art_sents = list(filter(bool, map(tokenize(None), art_batch)))
     abs_sents = list(filter(bool, map(tokenize(None), abs_batch)))
     extracted = list(filter(bool, extracted))
     return art_sents, abs_sents, extracted
Exemplo n.º 15
0
 def coll(batch):
     art_batch, abs_batch, i_batch = unzip(batch)
     art_sents = list(filter(bool, map(tokenize(None), art_batch)))
     abs_sents = list(filter(bool, map(tokenize(None), abs_batch)))
     return art_sents, abs_sents, list(i_batch)
    def coll(batch):
        split_token = '<split>'
        pad = 0
        art_batch, abs_batch, all_clusters = unzip(batch)
        art_sents = []
        abs_sents = []

        def is_good_data(d):
            """ make sure data is not empty"""
            source_sents, extracts = d
            return source_sents and extracts

        art_batch, abs_batch = list(
            zip(*list(filter(is_good_data, zip(art_batch, abs_batch)))))
        art_sents = list(filter(bool, map(tokenize(None), art_batch)))
        abs_sents = list(filter(bool, map(tokenize(None), abs_batch)))
        inputs = []
        # merge cluster
        for art_sent, clusters in zip(art_sents, all_clusters):
            cluster_words = []
            cluster_wpos = []
            cluster_spos = []
            for cluster in clusters:
                scluster_word = []
                scluster_wpos = []
                scluster_spos = []

                for mention in cluster:
                    if len(mention['text'].strip().split(' ')) == len(
                            list(
                                range(mention['position'][3] + 1,
                                      mention['position'][4] + 1))):
                        scluster_word += mention['text'].lower().strip().split(
                            ' ')
                        scluster_wpos += list(
                            range(mention['position'][3] + 1,
                                  mention['position'][4] + 1))
                        scluster_spos += [
                            mention['position'][0] + 1 for _ in range(
                                len(mention['text'].strip().split(' ')))
                        ]
                        scluster_word.append(split_token)
                        scluster_wpos.append(pad)
                        scluster_spos.append(pad)
                    else:
                        sent_num = mention['position'][0]
                        word_start = mention['position'][3]
                        word_end = mention['position'][4]
                        # if word_end > 99:
                        #     word_end = 99
                        if sent_num > len(art_sent) - 1:
                            print('bad cluster')
                            continue
                        scluster_word += art_sent[sent_num][
                            word_start:word_end]
                        scluster_wpos += list(range(word_start, word_end))
                        scluster_spos += [
                            mention['position'][0] + 1
                            for _ in range(word_start + 1, word_end + 1)
                        ]
                        scluster_word.append(split_token)
                        scluster_wpos.append(pad)
                        scluster_spos.append(pad)
                if scluster_word != []:
                    scluster_word.pop()
                    scluster_wpos.pop()
                    scluster_spos.pop()
                    cluster_words.append(scluster_word)
                    cluster_wpos.append(scluster_wpos)
                    cluster_spos.append(scluster_spos)
                    if len(scluster_word) != len(scluster_wpos):
                        print(scluster_word)
                        print(scluster_wpos)
                        print('cluster:', cluster)
                    if len(scluster_word) != len(scluster_spos):
                        print(scluster_word)
                        print(scluster_spos)
                        print('cluster:', cluster)
                    assert len(scluster_word) == len(scluster_spos) and len(
                        scluster_spos) == len(scluster_wpos)

            new_clusters = (cluster_words, cluster_wpos, cluster_spos)
            inputs.append((art_sent, new_clusters))
        assert len(inputs) == len(abs_sents)
        return inputs, abs_sents
Exemplo n.º 17
0
 def coll(batch):
     art_batch, topics, abs_batch = unzip(batch)
     #art_batch, topics, abs_batch, topic_label = unzip(batch)
     art_sents = list(filter(bool, map(tokenize(None), art_batch)))
     abs_sents = list(filter(bool, map(tokenize(None), abs_batch)))
     return art_sents, topics, abs_sents
 def coll(batch):
     articles, abstract, extracted = unzip(batch)
     articles = list(filter(bool, articles))
     abstract = list(filter(bool, abstract))
     extracted =  list(filter(bool, extracted))
     return articles, abstract, extracted
Exemplo n.º 19
0
def run(*options, cfg=None, debug=False):
    """Run testing of model

    Notes:
        Options can be passed in via the options argument and loaded from the cfg file
        Options from default.py will be overridden by options loaded from cfg file
        Options passed in via options argument will override option loaded from cfg file
    
    Args:
        *options (str,int ,optional): Options used to overide what is loaded from the
                                      config. To see what options are available consult
                                      default.py
        cfg (str, optional): Location of config file to load. Defaults to None.
    """

    update_config(config, options=options, config_file=cfg)

    # Start logging
    load_log_configuration(config.LOG_CONFIG)
    logger = logging.getLogger(__name__)
    logger.debug(config.WORKERS)
    torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK

    torch.manual_seed(config.SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(config.SEED)
    np.random.seed(seed=config.SEED)

    # Setup Augmentations
    test_aug = Compose(
        [
            Normalize(mean=(config.TRAIN.MEAN,), std=(config.TRAIN.STD,), max_pixel_value=config.TRAIN.MAX,),
            PadIfNeeded(
                min_height=config.TRAIN.PATCH_SIZE,
                min_width=config.TRAIN.PATCH_SIZE,
                border_mode=cv2.BORDER_CONSTANT,
                always_apply=True,
                mask_value=mask_value,
                value=0,
            ),
            Resize(
                config.TRAIN.AUGMENTATIONS.RESIZE.HEIGHT, config.TRAIN.AUGMENTATIONS.RESIZE.WIDTH, always_apply=True,
            ),
            PadIfNeeded(
                min_height=config.TRAIN.AUGMENTATIONS.PAD.HEIGHT,
                min_width=config.TRAIN.AUGMENTATIONS.PAD.WIDTH,
                border_mode=cv2.BORDER_CONSTANT,
                always_apply=True,
                mask_value=mask_value,
                value=0,
            ),
        ]
    )

    PenobscotDataset = get_patch_dataset(config)

    test_set = PenobscotDataset(
        config.DATASET.ROOT,
        config.TRAIN.PATCH_SIZE,
        config.TRAIN.STRIDE,
        split="test",
        transforms=test_aug,
        n_channels=config.MODEL.IN_CHANNELS,
        complete_patches_only=config.TEST.COMPLETE_PATCHES_ONLY,
    )

    logger.info(str(test_set))
    n_classes = test_set.n_classes

    test_loader = data.DataLoader(
        test_set, batch_size=config.VALIDATION.BATCH_SIZE_PER_GPU, num_workers=config.WORKERS,
    )

    model = getattr(models, config.MODEL.NAME).get_seg_model(config)
    logger.info(f"Loading model {config.TEST.MODEL_PATH}")
    model.load_state_dict(torch.load(config.TEST.MODEL_PATH), strict=False)

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda"
    model = model.to(device)  # Send to GPU

    try:
        output_dir = generate_path(config.OUTPUT_DIR, git_branch(), git_hash(), config.MODEL.NAME, current_datetime(),)
    except TypeError:
        output_dir = generate_path(config.OUTPUT_DIR, config.MODEL.NAME, current_datetime(),)

    summary_writer = create_summary_writer(log_dir=path.join(output_dir, config.LOG_DIR))

    # weights are inversely proportional to the frequency of the classes in
    # the training set
    class_weights = torch.tensor(config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False)

    criterion = torch.nn.CrossEntropyLoss(weight=class_weights, ignore_index=mask_value, reduction="mean")

    def _select_pred_and_mask(model_out_dict):
        return (model_out_dict["y_pred"].squeeze(), model_out_dict["mask"].squeeze())

    def _select_all(model_out_dict):
        return (
            model_out_dict["y_pred"].squeeze(),
            model_out_dict["mask"].squeeze(),
            model_out_dict["ids"],
            model_out_dict["patch_locations"],
        )

    inline_mean_iou = InlineMeanIoU(
        config.DATASET.INLINE_HEIGHT,
        config.DATASET.INLINE_WIDTH,
        config.TRAIN.PATCH_SIZE,
        n_classes,
        padding=_padding_from(config),
        scale=_scale_from(config),
        output_transform=_select_all,
    )

    evaluator = create_supervised_evaluator(
        model,
        _prepare_batch,
        metrics={
            "nll": Loss(criterion, output_transform=_select_pred_and_mask, device=device),
            "inIoU": inline_mean_iou,
            "pixa": pixelwise_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device),
            "cacc": class_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device),
            "mca": mean_class_accuracy(n_classes, output_transform=_select_pred_and_mask, device=device),
            "ciou": class_iou(n_classes, output_transform=_select_pred_and_mask, device=device),
            "mIoU": mean_iou(n_classes, output_transform=_select_pred_and_mask, device=device),
        },
        device=device,
    )

    evaluator.add_event_handler(
        Events.EPOCH_COMPLETED,
        logging_handlers.log_metrics(
            "Test results",
            metrics_dict={
                "nll": "Avg loss :",
                "mIoU": "Avg IoU :",
                "pixa": "Pixelwise Accuracy :",
                "mca": "Mean Class Accuracy :",
                "inIoU": "Mean Inline IoU :",
            },
        ),
    )
    evaluator.add_event_handler(
        Events.EPOCH_COMPLETED,
        tensorboard_handlers.log_metrics(
            summary_writer,
            evaluator,
            "epoch",
            metrics_dict={"mIoU": "Test/IoU", "nll": "Test/Loss", "mca": "Test/MCA", "inIoU": "Test/MeanInlineIoU",},
        ),
    )

    def _select_max(pred_tensor):
        return pred_tensor.max(1)[1]

    def _tensor_to_numpy(pred_tensor):
        return pred_tensor.squeeze().cpu().numpy()

    transform_func = compose(
        np_to_tb, decode_segmap(n_classes=n_classes, label_colours=_SEG_COLOURS), _tensor_to_numpy,
    )

    transform_pred = compose(transform_func, _select_max)

    evaluator.add_event_handler(
        Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Test/Image", "image"),
    )
    evaluator.add_event_handler(
        Events.EPOCH_COMPLETED, create_image_writer(summary_writer, "Test/Mask", "mask", transform_func=transform_func),
    )
    evaluator.add_event_handler(
        Events.EPOCH_COMPLETED,
        create_image_writer(summary_writer, "Test/Pred", "y_pred", transform_func=transform_pred),
    )

    logger.info("Starting training")
    if debug:
        logger.info("Running in Debug/Test mode")
        test_loader = take(3, test_loader)

    evaluator.run(test_loader, max_epochs=1)

    # Log top N and bottom N inlines in terms of IoU to tensorboard
    inline_ious = inline_mean_iou.iou_per_inline()
    sorted_ious = sorted(inline_ious.items(), key=lambda x: x[1], reverse=True)
    topk = ((inline_mean_iou.predictions[key], inline_mean_iou.masks[key]) for key, iou in take(_TOP_K, sorted_ious))
    bottomk = (
        (inline_mean_iou.predictions[key], inline_mean_iou.masks[key]) for key, iou in tail(_BOTTOM_K, sorted_ious)
    )
    stack_and_decode = compose(transform_func, torch.stack)
    predictions, masks = unzip(chain(topk, bottomk))
    predictions_tensor = stack_and_decode(list(predictions))
    masks_tensor = stack_and_decode(list(masks))
    _log_tensor_to_tensorboard(predictions_tensor, "Test/InlinePredictions", summary_writer, evaluator)
    _log_tensor_to_tensorboard(masks_tensor, "Test/InlineMasks", summary_writer, evaluator)

    summary_writer.close()
                          len(dataset) + 1, 100))

    data_p0_7 = map(lambda n: (n, precision_at(take(n, dataset), 0.7)),
                    range(100,
                          len(dataset) + 1, 100))

    data_p0_8 = map(lambda n: (n, precision_at(take(n, dataset), 0.8)),
                    range(100,
                          len(dataset) + 1, 100))

    data_p0_85 = map(lambda n: (n, precision_at(take(n, dataset), 0.85)),
                     range(100,
                           len(dataset) + 1, 100))

    # Data for plotting
    n_p0_6, p_p0_6 = unzip(data_p0_6)
    n_p0_7, p_p0_7 = unzip(data_p0_7)
    n_p0_8, p_p0_8 = unzip(data_p0_8)
    n_p0_85, p_p0_85 = unzip(data_p0_85)

    # Note that using plt.subplots below is equivalent to using
    # fig = plt.figure and then ax = fig.add_subplot(111)
    fig, ax = plt.subplots()
    ax.plot(list(n_p0_6), list(p_p0_6), label='p >= 0.6')
    ax.plot(list(n_p0_7), list(p_p0_7), label='p >= 0.7')
    ax.plot(list(n_p0_8), list(p_p0_8), label='p >= 0.8')
    ax.plot(list(n_p0_85), list(p_p0_85), label='p >= 0.85')

    ax.set(xlabel='population size',
           ylabel='precision',
           title='precision / population size')