Exemplo n.º 1
0
def makeranks(df, model, entidxs, relidxs):
    tt = TT("Ranker")
    tt.tick("making ranks")
    rankings = []
    for _, row in df.iterrows():
        rankings.append(makerank(row, model, entidxs))
    tt.tock("made ranks")
    return rankings
Exemplo n.º 2
0
def makeranks(df, model, entidxs, relidxs):
    tt = TT("Ranker")
    tt.tick("making ranks")
    rankings = []
    for _, row in df.iterrows():
        rankings.append(makerank(row, model, entidxs))
    tt.tock("made ranks")
    return rankings
Exemplo n.º 3
0
 def evaluate(self, model, data, labels):
     tt = TT("Evaluator")
     preds = model.predict(data)
     for n in range(data.shape[0]): # iterate over all examples
         #tt.progress(n, data.shape[0])
         for metric in self.metrics:
             metric(labels[n], preds[n])
     results = {}
     for metric in self.metrics:
         results[metric.name] = metric()
     tt.tock("computed")
     return results
Exemplo n.º 4
0
 def evaluate(self, model, data, labels):
     tt = TT("Evaluator")
     preds = model.predict(data)
     for n in range(data.shape[0]):  # iterate over all examples
         #tt.progress(n, data.shape[0])
         for metric in self.metrics:
             metric(labels[n], preds[n])
     results = {}
     for metric in self.metrics:
         results[metric.name] = metric()
     tt.tock("computed")
     return results
Exemplo n.º 5
0
 def evaluate(self, model, data, labels):
     tt = TT("Evaluator")
     entidxs = set(data[:, 0]).union(set(labels))
     for n in range(data.shape[0]):
         ranking = self.makerank(data[n], model, entidxs)
         tt.progress(n, data.shape[0])
         for metric in self.metrics:
             metric([labels[n]], ranking)
     results = {}
     for metric in self.metrics:
         results[metric.name] = metric()
     tt.tock("computed")
     return results
Exemplo n.º 6
0
 def evaluate(self, model, data, labels):
     tt = TT("Evaluator")
     entidxs = set(data[:, 0]).union(set(labels))
     for n in range(data.shape[0]):
         ranking = self.makerank(data[n], model, entidxs)
         tt.progress(n, data.shape[0])
         for metric in self.metrics:
             metric([labels[n]], ranking)
     results = {}
     for metric in self.metrics:
         results[metric.name] = metric()
     tt.tock("computed")
     return results
Exemplo n.º 7
0
def traincompat(entemb, erfile): # entemb: (vocabsize, dim) matrix of entity embeddings
                                 # erfile: path to file containing which entity has which relation
    # params
    negrate = 3
    numbats = 100 # 100
    epochs = 200 #20
    wreg = 0.0000001
    evalinter = 1
    lr = 0.001/numbats # 8

    toy = False

    tt = TT("data")

    if toy:
        dims = 10
        numbats=10
        wreg = 0.0
        lr=0.1/numbats
        datafileprefix = "../../data/"
        tensorfile = "toy.ssd"
        vocabsize=11
        epochs=100
    else:
        # get the data and split
        datafileprefix = "../../data/nycfilms/triples.flat/"
        fulldic = loaddic(datafileprefix+"compatreldic.flatidx.pkl")
        vocabsize = len(fulldic)

    data = loaddata(datafileprefix+erfile)
    data = data.keys.lok
    trainX = data[:, :1]
    labels = data[:, 1]
    tt.tock("loaded")

    # train model
    model = FixedEntCompat(entembs=entemb, vocabsize=vocabsize, maxiter=epochs, wreg=wreg, numbats=numbats, negrate=negrate)\
                .autosave.normalize \
            + SGD(lr=lr)
    err = model.train(trainX, labels, evalinter=evalinter)
    plt.plot(err, "r")
    plt.show(block=True)
Exemplo n.º 8
0
def evaluate(data, model, *metrics):
    tt = TT("Evaluator")
    datadf = pd.DataFrame(data)
    datadf.columns = ["s", "r", "o"]
    datadfgb = datadf.groupby(by=["s", "r"]).apply(lambda x: set(x["o"].unique())).reset_index()
    datadf.describe()
    entidxs = set(datadf["s"].unique()).union(set(datadf["o"].unique()))
    results = {}
    tt.tock("initialized").tick()
    for n, row in datadfgb.iterrows():
        ranking = makerank(row, model, entidxs)
        tt.progress(n, datadfgb.shape[0])
        for metric in metrics:
            metric(row, ranking)
    for metric in metrics:
        results[metric.name] = metric()
    tt.tock("computed")
    return results
Exemplo n.º 9
0
def evaluate(data, model, *metrics):
    tt = TT("Evaluator")
    datadf = pd.DataFrame(data)
    datadf.columns = ["s", "r", "o"]
    datadfgb = datadf.groupby(
        by=["s", "r"]).apply(lambda x: set(x["o"].unique())).reset_index()
    datadf.describe()
    entidxs = set(datadf["s"].unique()).union(set(datadf["o"].unique()))
    results = {}
    tt.tock("initialized").tick()
    for n, row in datadfgb.iterrows():
        ranking = makerank(row, model, entidxs)
        tt.progress(n, datadfgb.shape[0])
        for metric in metrics:
            metric(row, ranking)
    for metric in metrics:
        results[metric.name] = metric()
    tt.tock("computed")
    return results
Exemplo n.º 10
0
def run():
    # params
    dims = 100 # 100
    innerdims = dims
    negrate = 10
    numbats = 100 # 100
    epochs = 100
    wreg = 0.0000001
    lr = 0.01/numbats #0.0001 # for SGD
    lr2 = 1.
    evalinter = 1
    rho = 0.95


    ############"
    dims = 20
    innerdims = dims#50
    lr = 0.001/numbats # 8

    toy = False

    threshold = 0.5
    #paths
    datatt = TT("data")

    # get the data and split
    datafileprefix = "../../data/nycfilms/triples.flat/"
    tensorfile = "alltripletensor.train.flat.ssd"
    testtensorfile = "alltripletensor.test.flat.ssd"
    fulldic = loaddic(datafileprefix+"tripletensor.flatidx.pkl")
    vocabsize = len(fulldic)
    numrels = 20

    innerdim2 = 20

    data = loaddata(datafileprefix+tensorfile)
    data = data.keys.lok
    trainX = data[:, :2]
    labels = data[:, -1]
    # labels = data[:, 1:]
    datatt.tock("loaded")

    validsplit = 10 # 50

    # train model
    trainer = Trainer(lambda:
            AddEKMM(numrels=numrels, dim=dims, vocabsize=vocabsize,
                       maxiter=epochs, wreg=wreg, numbats=numbats, negrate=negrate).normalize#.autosave
            + SGD(lr=lr)
            #+ FullEGRU(dim=numrels, innerdim=innerdims, wreg=wreg, nobias=True)# nobias=True,, outpactivation=lambda x: x)
    )
    models, err, verr, _, _, _ = trainer.train(trainX, labels, validinter=evalinter, validsplit=validsplit, validrandom=123, folds=1)

    for model in models:
        evaluation = KBCEvaluator(RecallAt(10), RecallAt(15), RecallAt(30), MeanQuantile())
        d = loaddata(datafileprefix+testtensorfile).keys.lok
        res = evaluation.run(model, d[:, :2], d[:, 2])
        print(res)
        # evaluation.save(res) TODO
        print model.predict([417], [[11307]], [9145])
        print model.predict([417], [[11307]], [9156])

    plt.plot(err, "r")
    if len(verr) > 0:
        plt.plot(verr, "g")
    plt.show(block=True)