# java -jar RankLib-2.6.jar -ranker 6 -train sample_judgements_wfeatures.txt -save model.txt
    cmd = "java -jar RankLib-2.6.jar -ranker 6 -train %s -save %s" % (
        judgmentsWithFeaturesFile, modelOutput)
    print("Running %s" % cmd)
    os.system(cmd)
    pass


def saveModel(es, scriptName, modelFname):
    """ Save the ranklib model in Elasticsearch """
    with open(modelFname) as modelFile:
        modelContent = modelFile.read()
        es.put_script(lang='ranklib',
                      id=scriptName,
                      body={"script": modelContent})


if __name__ == "__main__":
    from elasticsearch import Elasticsearch
    from judgments import judgmentsFromFile, judgmentsByQid
    esUrl = "http://localhost:9200"
    es = Elasticsearch(timeout=1000)
    judgements = judgmentsByQid(
        judgmentsFromFile(filename='sample_judgements.txt'))
    kwDocFeatures(es, index='tmdb', searchType='movie', judgements=judgements)
    buildFeaturesJudgmentsFile(judgements,
                               filename='sample_judgements_wfeatures.txt')
    trainModel(judgmentsWithFeaturesFile='sample_judgements_wfeatures.txt',
               modelOutput='model.txt')
    saveModel(es, scriptName='test', modelFname='model.txt')
Пример #2
0
if __name__ == "__main__":
    from elasticsearch import Elasticsearch
    from judgments import judgmentsFromFile, judgmentsByQid, duplicateJudgmentsByWeight
    esUrl = "http://ec2-54-234-184-186.compute-1.amazonaws.com:9616/supersecretsquirrel/"
    es = Elasticsearch(esUrl, timeout=1000)
    # Parse a judgments
    judgments = judgmentsByQid(judgmentsFromFile(filename='osc_judgments.txt'))
    judgments = duplicateJudgmentsByWeight(judgments)
    trainJudgments, testJudgments = partitionJudgments(judgments,
                                                       testProportion=0.00)
    # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set
    # output as "osc_judgments_wfeatures.txt"
    kwDocFeatures(es, index='o19s', searchType='post', judgements=judgments)
    numFeatures = len(judgments[1][0].features)
    print("Training on %s features" % numFeatures)
    buildFeaturesJudgmentsFile(trainJudgments,
                               filename='osc_judgments_wfeatures_train.txt')
    buildFeaturesJudgmentsFile(testJudgments,
                               filename='osc_judgments_wfeatures_test.txt')
    # Train each ranklib model type
    for modelType in [0, 6, 9]:
        # 0, MART
        # 1, RankNet
        # 2, RankBoost
        # 3, AdaRank
        # 4, coord Ascent
        # 6, LambdaMART
        # 7, ListNET
        # 8, Random Forests
        # 9, Linear Regression
        print("*** Training %s " % modelType)
        trainModel(trainingData='osc_judgments_wfeatures_train.txt',