# java -jar RankLib-2.6.jar -ranker 6 -train sample_judgements_wfeatures.txt -save model.txt cmd = "java -jar RankLib-2.6.jar -ranker 6 -train %s -save %s" % ( judgmentsWithFeaturesFile, modelOutput) print("Running %s" % cmd) os.system(cmd) pass def saveModel(es, scriptName, modelFname): """ Save the ranklib model in Elasticsearch """ with open(modelFname) as modelFile: modelContent = modelFile.read() es.put_script(lang='ranklib', id=scriptName, body={"script": modelContent}) if __name__ == "__main__": from elasticsearch import Elasticsearch from judgments import judgmentsFromFile, judgmentsByQid esUrl = "http://localhost:9200" es = Elasticsearch(timeout=1000) judgements = judgmentsByQid( judgmentsFromFile(filename='sample_judgements.txt')) kwDocFeatures(es, index='tmdb', searchType='movie', judgements=judgements) buildFeaturesJudgmentsFile(judgements, filename='sample_judgements_wfeatures.txt') trainModel(judgmentsWithFeaturesFile='sample_judgements_wfeatures.txt', modelOutput='model.txt') saveModel(es, scriptName='test', modelFname='model.txt')
return (trainJudgments, testJudgments) if __name__ == "__main__": from elasticsearch import Elasticsearch from judgments import judgmentsFromFile, judgmentsByQid, duplicateJudgmentsByWeight esUrl = "http://ec2-54-234-184-186.compute-1.amazonaws.com:9616/supersecretsquirrel/" es = Elasticsearch(esUrl, timeout=1000) # Parse a judgments judgments = judgmentsByQid(judgmentsFromFile(filename='osc_judgments.txt')) judgments = duplicateJudgmentsByWeight(judgments) trainJudgments, testJudgments = partitionJudgments(judgments, testProportion=0.00) # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set # output as "osc_judgments_wfeatures.txt" kwDocFeatures(es, index='o19s', searchType='post', judgements=judgments) numFeatures = len(judgments[1][0].features) print("Training on %s features" % numFeatures) buildFeaturesJudgmentsFile(trainJudgments, filename='osc_judgments_wfeatures_train.txt') buildFeaturesJudgmentsFile(testJudgments, filename='osc_judgments_wfeatures_test.txt') # Train each ranklib model type for modelType in [0, 6, 9]: # 0, MART # 1, RankNet # 2, RankBoost # 3, AdaRank # 4, coord Ascent # 6, LambdaMART # 7, ListNET