Exemplo n.º 1
0
def _remove_non_models(elements):
    rv = set()
    for element in elements:
        try:
            model_resolver.lookup(element)
        except ValueError:  # invalid model name - aka not actually a model
            continue
        else:
            rv.add(element)
    return rv
Exemplo n.º 2
0
def _remove_non_models(
        elements: Iterable[Union[str, Type[Model]]]) -> Set[Type[Model]]:
    rv = set()
    for element in elements:
        try:
            model_cls = model_resolver.lookup(element)
        except KeyError:  # invalid model name - aka not actually a model
            continue
        else:
            rv.add(model_cls)
    return rv
Exemplo n.º 3
0
def run_inverse_stability_workflow(dataset: str,
                                   model: str,
                                   training_loop: str,
                                   random_seed=0,
                                   device="cpu"):
    """Run an inverse stability experiment."""
    dataset_instance: Dataset = get_dataset(
        dataset=dataset,
        dataset_kwargs=dict(create_inverse_triples=True, ),
    )
    dataset_name = dataset_instance.get_normalized_name()
    model_cls: Type[Model] = model_resolver.lookup(model)
    model_name = model_cls.__name__.lower()

    dataset_dir = INVERSE_STABILITY / dataset_name
    dataset_dir.mkdir(exist_ok=True, parents=True)

    pipeline_result = pipeline(
        dataset=dataset_instance,
        model=model,
        training_loop=training_loop,
        training_kwargs=dict(
            num_epochs=1000,
            use_tqdm_batch=False,
        ),
        stopper="early",
        stopper_kwargs=dict(patience=5, frequency=5),
        random_seed=random_seed,
        device=device,
    )
    test_tf = dataset_instance.testing
    model = pipeline_result.model
    # Score with original triples
    scores_forward = model.score_hrt(test_tf.mapped_triples)
    scores_forward_np = scores_forward.detach().numpy()[:, 0]

    # Score with inverse triples
    scores_inverse = model.score_hrt_inverse(test_tf.mapped_triples)
    scores_inverse_np = scores_inverse.detach().numpy()[:, 0]

    scores_path = dataset_dir / f"{model_name}_{training_loop}_scores.tsv"
    df = pd.DataFrame(
        list(
            zip(
                itt.repeat(training_loop),
                itt.repeat(dataset_name),
                itt.repeat(model_name),
                scores_forward_np,
                scores_inverse_np,
            )),
        columns=["training_loop", "dataset", "model", "forward", "inverse"],
    )
    df.to_csv(scores_path, sep="\t", index=False)

    fig, ax = plt.subplots(1, 1)
    sns.histplot(data=df,
                 x="forward",
                 label="Forward",
                 ax=ax,
                 color="blue",
                 stat="density")
    sns.histplot(data=df,
                 x="inverse",
                 label="Inverse",
                 ax=ax,
                 color="orange",
                 stat="density")
    ax.set_title(f"{dataset_name} - {model_name} - {training_loop}")
    ax.set_xlabel("Score")
    plt.legend()
    plt.savefig(dataset_dir / f"{model_name}_{training_loop}_overlay.png",
                dpi=300)
    plt.close(fig)

    fig, ax = plt.subplots(1, 1)
    sns.histplot(scores_forward_np - scores_inverse_np, ax=ax, stat="density")
    ax.set_title(f"{dataset_name} - {model_name} - {training_loop}")
    ax.set_xlabel("Forward - Inverse Score Difference")
    plt.savefig(dataset_dir / f"{model_name}_{training_loop}_residuals.png",
                dpi=300)
    plt.close(fig)

    return df