Exemplo n.º 1
0
def main():
    # load dataset
    dataset = Dataset("../benchmarks/fb15k.nt")

    # load embeddings
    embeddings = Embedding(dataset, TransE)
    embeddings.restore(prefix='../checkpoints/TransE/TransE')

    # alternatively load embeddings from numpy matrix
    embs = Embedding(dataset, TransE)
    embs.load_embeddings_from_npy('../embeddings/TransE/TransE.npy')

    # query embeddings
    print(embeddings['/m/02f75t'])
    print(embs['/m/02f75t'])
    print()
    print(embeddings['foobar'])
    print(embs['foobar'])
Exemplo n.º 2
0
def compute(model,
            n_batches,
            epochs,
            neg_ent,
            neg_rel,
            bern,
            workers,
            optimizer,
            dims,
            margin,
            eval,
            out,
            json,
            valid_file,
            test_file,
            file_path):
    """Initializes the repository."""
    file_path = Path(file_path)

    if file_path.suffix == '.npz':
        dataset = Dataset.from_npz(file_path)
    elif valid_file is not None and test_file is not None:
        dataset = Dataset(train_file=str(file_path), valid_file=valid_file, test_file=test_file, generate_valid_test=True)
    elif valid_file is None and test_file is None and eval:
        dataset = Dataset(train_file=str(file_path), generate_valid_test=True)
    else:
        dataset = Dataset(train_file=str(file_path))

    click.echo("Start training using the following parameters: ")
    click.echo("-----------------------------------------------")
    click.echo(f"Knowledge Base: {file_path}")
    click.echo(f"Batch number: {n_batches} => {int(dataset.size / n_batches)} total batch size")
    click.echo(f"Epochs: {epochs}")
    click.echo(f"Neg_Ent: {neg_ent}")
    click.echo(f"Neg_Rel: {neg_rel}")
    click.echo(f"bern: {bern}")
    click.echo(f"Workers: {workers}")
    click.echo(f"Optimizer: {optimizer}")
    click.echo(f"Dimensionality: {dims}")
    click.echo(f"Margin: {margin}")
    click.echo(f"Output directory: {out}")
    click.echo("-----------------------------------------------")

    embedding = Embedding(
        dataset,
        get_model(model),
        folds=n_batches,
        epochs=epochs,
        neg_ent=neg_ent,
        neg_rel=neg_rel,
        bern=bern,
        workers=workers,
        optimizer=optimizer,
        dimension=dims,  # TransE-specific
        margin=margin,  # TransE-specific
        out_path=out
    )

    checkpoint_path = Path(f'./checkpoints/{model}')
    out_path = Path(f'{out}/{model}/{dataset.name}')

    if not out_path.exists():
        click.echo(f'Creating output path: {out_path}')
        out_path.mkdir(parents=True)

    # Train the model. It is saved in the process.
    if not checkpoint_path.exists():
        click.echo(f'Creating checkpoint directory: {checkpoint_path}')
        checkpoint_path.mkdir(parents=True)

    # if dataset is not written out, do so
    # if not (out_path / f'{dataset.name}_dataset.npz').exists():
    #    dataset.to_npz(out_path / f'{dataset.name}_dataset.npz')

    embedding.train(prefix=str(checkpoint_path / dataset.name))

    # Save the embedding to a JSON file
    if json:
        embedding.save_to_json(f'{out_path}/{dataset.name}_{model.lower()}_{optimizer.lower()}_{dims}_embs.json')
    # Save the embedding as numpy (.npz) file
    archive_name = f'{out_path}/{dataset.name}_{model.lower()}_{optimizer.lower()}_{dims}_embs.npz'
    embedding.save_to_npz(archive_name)

    if eval:
        rank_predictions = embedding.get_predictions()
        # rank_predictions.to_csv(f'{out_path}/{dataset.name}_rank_predictions.csv')

        results = calc_metrics(rank_predictions=rank_predictions)
        if (out_path / f'{dataset.name}_metrics.csv').exists():
            df = pd.read_csv(str(out_path / f'{dataset.name}_metrics.csv'), index_col=0)
            prev_epochs = df.iloc[-1]['epochs']
            results['epochs'] = int(prev_epochs + epochs)
            df = df.append(results, ignore_index=True)
            df.to_csv(str(out_path / f'{dataset.name}_metrics.csv'))
            rank_predictions.to_csv(f'{out_path}/{dataset.name}_rank_predictions_{int(prev_epochs + epochs)}.csv')
            print(df)
        else:
            results['epochs'] = epochs
            results.to_csv(str(out_path / f'{dataset.name}_metrics.csv'))
            rank_predictions.to_csv(f'{out_path}/{dataset.name}_rank_predictions_{epochs}.csv')
            print(results)
Exemplo n.º 3
0
from pyke.dataset import Dataset
from pyke.embedding import Embedding
from pyke.models import DistMult

# Read the dataset
dataset = Dataset("./benchmarks/fb15k.nt")
embedding = Embedding(
    dataset,
    DistMult,
    folds=100,
    epochs=20,
    neg_ent=1,
    neg_rel=0,
    bern=False,
    workers=4,
    dimension=50,  # DistMult-specific
    weight=0.0001,  # DistMult-specific
    learning_rate=0.1,
    optimizer="Adagrad",
)

# Train the model. It is saved in the process.
embedding.train(prefix="./DistMult")

# Save the embedding to a JSON file
embedding.save_to_json("DistMult.json")
Exemplo n.º 4
0
from pyke.dataset import Dataset
from pyke.embedding import Embedding
from pyke.models import TransR

# Read the dataset
dataset = Dataset("../benchmarks/fb15k.nt")
embedding = Embedding(
    dataset,
    TransR,
    folds=20,
    epochs=20,
    neg_ent=1,
    neg_rel=0,
    bern=False,
    workers=4,
    ent_dim=50,  # TransR-specific
    rel_dim=10,  # TransR-specific
    margin=1.0,  # TransR-specific
)

# Train the model. It is saved in the process.
# TODO: Currently not working
embedding.train(prefix="./TransR")

# Save the embedding to a JSON file
embedding.save_to_json("TransR.json")
Exemplo n.º 5
0
from pyke.dataset import Dataset
from pyke.embedding import Embedding
from pyke.models import TransH

# Read the dataset
dataset = Dataset("./benchmarks/fb15k.nt")
embedding = Embedding(
    dataset,
    TransH,
    folds=20,
    epochs=20,
    neg_ent=1,
    neg_rel=0,
    bern=False,
    workers=4,
    dimension=50,  # TransH-specific
    margin=1.0,  # TransH-specific
)

# Train the model. It is saved in the process.
embedding.train(prefix="./TransH")

# Save the embedding to a JSON file
embedding.save_to_json("TransH.json")
Exemplo n.º 6
0
 def test_get_ent_embeddings(self):
     em = Embedding(self.dataset, TransE)
     em.train("./tests/tmp/TransE", continue_training=False)
     embedding_values = em.get_ent_embeddings()
     self.assertIsInstance(embedding_values, np.ndarray)
     self.assertEqual(embedding_values.shape, (11, 50))
Exemplo n.º 7
0
 def test_train_transe(self):
     em = Embedding(self.dataset, TransE, epochs=5)
     em.train("./tests/tmp/TransE", continue_training=False)
     self.assertLessEqual(0, em.get_loss())
     self.assertGreaterEqual(4, em.get_loss())
Exemplo n.º 8
0
 def test_init_transe(self):
     Embedding(self.dataset, TransE)
Exemplo n.º 9
0
from pyke.dataset import Dataset
from pyke.embedding import Embedding
from pyke.models import HolE

# Read the dataset
dataset = Dataset("./benchmarks/fb15k.nt")
embedding = Embedding(
    dataset,
    HolE,
    folds=20,
    epochs=20,
    neg_ent=1,
    neg_rel=0,
    bern=False,
    workers=4,
    dimension=50,  # HolE-specific
    margin=1.0,  # HolE-specific
)

# Train the model. It is saved in the process.
embedding.train(prefix="./HolE")

# Save the embedding to a JSON file
embedding.save_to_json("HolE.json")
Exemplo n.º 10
0
from pyke.dataset import Dataset
from pyke.embedding import Embedding
from pyke.models import ComplEx

# Read the dataset
dataset = Dataset("./benchmarks/fb15k.nt")
embedding = Embedding(
    dataset,
    ComplEx,
    folds=100,
    epochs=20,
    neg_ent=1,
    neg_rel=0,
    bern=False,
    workers=4,
    dimension=100,  # ComplEx-specific
    weight=0.0001,  # ComplEx-specific
    learning_rate=0.1,
    optimizer="Adagrad",
)

# Train the model. It is saved in the process.
embedding.train(prefix="./ComplEx")

# Save the embedding to a JSON file
embedding.save_to_json("ComplEx.json")
Exemplo n.º 11
0
from pyke.dataset import Dataset
from pyke.embedding import Embedding
from pyke.models import RESCAL

# Read the dataset
dataset = Dataset("./benchmarks/fb15k.nt")
embedding = Embedding(
    dataset,
    RESCAL,
    folds=20,
    epochs=20,
    neg_ent=1,
    neg_rel=0,
    bern=False,
    workers=4,
    dimension=50,  # RESCAL-specific
    margin=1.0,  # RESCAL-specific
)

# Train the model. It is saved in the process.
embedding.train(prefix="./RESCAL")

# Save the embedding to a JSON file
embedding.save_to_json("RESCAL.json")