Пример #1
0
    def __init__(self, filename):
        self.analyzer = StandardAnalyzer()

        store = park.SQLiteStore(filename)

        self.model = Model(self.analyzer, store)
        self.searcher = RandomWalkSearcher(self.model)
Пример #2
0
    def setUp(self):
        self.store = park.SQLiteStore(self.DB)

        def cleanup():
            if os.path.exists(self.DB):
                os.unlink(self.DB)

        self.addCleanup(cleanup)
Пример #3
0
    def test_conflated_query(self):
        analyzer = analysis.WhitespaceAnalyzer()
        analyzer.add_token_normalizer(analysis.LowercaseNormalizer())

        m = model.Model(analyzer, park.SQLiteStore(":memory:"))
        m.train(u"This is a test")
        m.train(u"this is a test")

        query = analyzer.query(u"this is a query", m)

        expected = [
            dict(term="this", pos=0),
            dict(term="This", pos=0),
            dict(term="is", pos=1),
            dict(term="a", pos=2),
            dict(term="query", pos=3)
        ]

        self.assertListEqual(expected, query.terms)
Пример #4
0
    def run(args):
        store = park.SQLiteStore("cobe.store")
        analyzer = analysis.WhitespaceAnalyzer()
        model = Model(analyzer, store)

        print "Tokens:"
        for token, token_id in model.tokens.token_ids.iteritems():
            print token, decode_one(token_id)

        print "Normalized tokens:"
        for key in model._prefix_keys("n"):
            print key

        print "3-gram counts:"
        get_token = model.tokens.get_token
        for ngram, count in model._prefix_items("3", strip_prefix=True):
            # This needs a more efficient way to get the token ids,
            # maybe a simple varint-aware string split.
            grams = [get_token(encode_one(i)) for i in decode(ngram)]
            print grams, decode_one(count)
Пример #5
0
 def __init__(self):
     self.kv = park.SQLiteStore("orbitd.db")
     if (self.kv.get("local/device_uuid") == None):
         self.kv.put("local/device_uuid", str(uuid.uuid4()))
Пример #6
0
 def setUp(self):
     self.analyzer = WhitespaceAnalyzer()
     self.store = park.SQLiteStore(":memory:")
     self.model = Model(self.analyzer, self.store)