Пример #1
0
 def test_gensim_lsi(self):
     corpus = DefaultJsonCorpus(self.get_docs())
     ds = DocumentSimilarity(model_type="gensim_lsi")
     ds.fit(corpus)
     score = ds.score(k=1)
     self.assertEqual(score,1.0)
Пример #2
0
 def test_gensim_rp(self):
     corpus = DefaultJsonCorpus(self.get_docs())
     ds = DocumentSimilarity(model_type="gensim_rp")
     ds.fit(corpus)
     res = ds.nn(0,k=1)
     self.assertEqual(res[0][0],1)
Пример #3
0
for filename in os.listdir("reuters-21578-json/data/full"):
    f = open("reuters-21578-json/data/full/"+filename)
    js = json.load(f)
    for j in js:
        if 'topics' in j and 'body' in j:
            d = {}
            d["id"] = j['id']
            d["text"] = j['body'].replace("\n","")
            d["title"] = j['title']
            d["tags"] = ",".join(j['topics'])
            docs.append(d)

print "loaded ",len(docs)," documents"

from  seldon.text import DocumentSimilarity,DefaultJsonCorpus
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

corpus = DefaultJsonCorpus(docs)
ds = DocumentSimilarity(model_type='gensim_lsi')
ds.fit(corpus)
print "built model"

import seldon
rw = seldon.Recommender_wrapper()
rw.save_recommender(ds,"reuters_recommender")
print "saved recommender"


Пример #4
0
 def test_sklearn_nmf(self):
     corpus = DefaultJsonCorpus(self.get_docs())
     ds = DocumentSimilarity(model_type="sklearn_nmf")
     ds.fit(corpus)
     res = ds.nn(0,k=1)
     self.assertEqual(res[0][0],1)
Пример #5
0
docs = []
for filename in os.listdir("reuters-21578-json/data/full"):
    f = open("reuters-21578-json/data/full/" + filename)
    js = json.load(f)
    for j in js:
        if 'topics' in j and 'body' in j:
            d = {}
            d["id"] = j['id']
            d["text"] = j['body'].replace("\n", "")
            d["title"] = j['title']
            d["tags"] = ",".join(j['topics'])
            docs.append(d)

print "loaded ", len(docs), " documents"

from seldon.text import DocumentSimilarity, DefaultJsonCorpus
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

corpus = DefaultJsonCorpus(docs)
ds = DocumentSimilarity(model_type='gensim_lsi')
ds.fit(corpus)
print "built model"

import seldon
rw = seldon.Recommender_wrapper()
rw.save_recommender(ds, "reuters_recommender")
print "saved recommender"