示例#1
0
 def test_extract(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     pdf = os.path.join(os.path.dirname(__file__), "data", "lipsum.pdf")
     with open(pdf, 'rb') as f:
         data = si.extract(f)
     self.assertEqual(0, data.status)
     self.assertTrue('Lorem ipsum' in data.text)
     self.assertEqual(['pdfTeX-1.40.13'], data.metadata['producer'])
示例#2
0
 def test_extract(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     pdf = os.path.join(os.path.dirname(__file__), "data", "lipsum.pdf")
     with open(pdf, 'rb') as f:
         data = si.extract(f)
     self.assertEqual(0, data.status)
     self.assertTrue('Lorem ipsum' in data.text)
     self.assertEqual(['pdfTeX-1.40.13'], data.metadata['producer'])
示例#3
0
 def test_debug(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     docs = {
         "id": "978-0641723445",
         "cat": ["book", "hardcover"],
         "name": u"The Höhlentripp Strauß",
         "author": u"Röüß Itoa",
         "series_t": u"Percy Jackson and \N{UMBRELLA}nicode",
         "sequence_i": 1,
         "genre_s": "fantasy",
         "inStock": True,
         "price": 12.50,
         "pages_i": 384
     }
     si.add(docs)
     si.commit()
     res = si.query(author=u"Röüß").debug().execute()
     self.assertEqual(res.result.numFound, 1)
     for k, v in docs.items():
         self.assertEqual(res.result.docs[0][k], v)
     self.assertTrue('explain' in res.debug)
     # deactivate
     res = si.query(author=u"Röüß").execute()
     self.assertFalse('explain' in res.debug)
示例#4
0
 def test_edismax_query(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.query(si.Q(**{"*": "*"})).filter(cat="hardcover").filter(
         genre_s="fantasy").alt_parser('edismax').execute()
     self.assertEqual(res.result.numFound, 1)
     self.assertEqual([x['name'] for x in res.result.docs],
                      [u'The Lightning Thief'])
示例#5
0
    def test_highlighting(self):
        dsn = os.environ.get("SOLR_URL", 'http://localhost:8983/solr')
        si = SolrInterface(dsn)
        docs = {
            "id": "978-0641723445",
            "cat": ["book", "hardcover"],
            "name": u"The Höhlentripp Strauß",
            "author": u"Röüß Itoa",
            "series_t": u"Percy Jackson and \N{UMBRELLA}nicode",
            "sequence_i": 1,
            "genre_s": "fantasy",
            "inStock": True,
            "price": 12.50,
            "pages_i": 384
        }
        si.add(docs)
        si.commit()
        res = si.query(author=u"Röüß").highlight('author').execute()
        highlighted_field_result = u'<em>Röüß</em> Itoa'
        # Does the highlighting attribute work?
        self.assertEqual(
            res.highlighting['978-0641723445']['author'][0],
            highlighted_field_result,
        )

        # Does each item have highlighting attributes?
        self.assertEqual(
            res.result.docs[0]['solr_highlights']['author'][0],
            highlighted_field_result,
        )
示例#6
0
 def test_debug(self):
     dsn = os.environ.get("SOLR_URL",
                          "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     docs = {
         "id": "978-0641723445",
         "cat": ["book", "hardcover"],
         "name": u"The Höhlentripp Strauß",
         "author": u"Röüß Itoa",
         "series_t": u"Percy Jackson and \N{UMBRELLA}nicode",
         "sequence_i": 1,
         "genre_s": "fantasy",
         "inStock": True,
         "price": 12.50,
         "pages_i": 384
         }
     si.add(docs)
     si.commit()
     res = si.query(author=u"Röüß").debug().execute()
     self.assertEqual(res.result.numFound, 1)
     for k, v in docs.items():
         self.assertEqual(res.result.docs[0][k], v)
     self.assertTrue('explain' in res.debug)
     # deactivate
     res = si.query(author=u"Röüß").execute()
     self.assertFalse('explain' in res.debug)
示例#7
0
    def test_get(self):
        dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
        si = SolrInterface(dsn)
        res = si.get("978-1423103349")
        self.assertEqual(len(res), 0)

        si.add(self.docs)
        res = si.get("978-1423103349")
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0]["name"], "The Sea of Monsters")

        res = si.get(["978-0641723445", "978-1423103349", "nonexist"])
        self.assertEqual(len(res), 2)
        self.assertEqual([x["name"] for x in res],
                         [u"The Lightning Thief", u"The Sea of Monsters"])

        si.commit()
        res = si.get(ids="978-1423103349", fields=["author"])
        self.assertEqual(len(res), 1)
        self.assertEqual(list(res[0].keys()), ["author"])
示例#8
0
 def test_facet_query(self):
     dsn = os.environ.get("SOLR_URL",
                          "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     res = si.add(self.docs)
     self.assertEqual(res[0].status, 0)
     si.commit()
     res = si.query(genre_s="fantasy").facet_by("cat").execute()
     self.assertEqual(res.result.numFound, 3)
     self.assertEqual([x['name'] for x in res.result.docs],
                      [u'The Lightning Thief',
                       u'The Sea of Monsters',
                       u"Sophie's World : The Greek Philosophers"])
     self.assertEqual(res.facet_counts.__dict__,
                      {'facet_fields': {u'cat': [(u'book', 3),
                                                 (u'paperback', 2),
                                                 (u'hardcover', 1)]},
                       'facet_dates': {},
                       'facet_queries': {},
                       'facet_ranges': {},
                       'facet_pivot': ()})
示例#9
0
    def test_highlighting(self):
        dsn = os.environ.get("SOLR_URL", 'http://localhost:8983/solr')
        si = SolrInterface(dsn)
        docs = {
            "id": "978-0641723445",
            "cat": ["book", "hardcover"],
            "name": u"The Höhlentripp Strauß",
            "author": u"Röüß Itoa",
            "series_t": u"Percy Jackson and \N{UMBRELLA}nicode",
            "sequence_i": 1,
            "genre_s": "fantasy",
            "inStock": True,
            "price": 12.50,
            "pages_i": 384
        }
        si.add(docs)
        si.commit()
        res = si.query(author=u"Röüß").highlight('author').execute()
        highlighted_field_result = u'<em>Röüß</em> Itoa'
        # Does the highlighting attribute work?
        self.assertEqual(
            res.highlighting['978-0641723445']['author'][0],
            highlighted_field_result,
        )

        # Does each item have highlighting attributes?
        self.assertEqual(
            res.result.docs[0]['solr_highlights']['author'][0],
            highlighted_field_result,
        )
示例#10
0
 def test_filter_query(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.query(si.Q(**{"*": "*"})).filter(cat="hardcover").filter(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 1)
     self.assertEqual([x["name"] for x in res.result.docs], ["The Lightning Thief"])
示例#11
0
    def test_get(self):
        dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
        si = SolrInterface(dsn)
        res = si.get("978-1423103349")
        self.assertEqual(len(res), 0)

        si.add(self.docs)
        res = si.get("978-1423103349")
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0]["name"], "The Sea of Monsters")

        res = si.get(["978-0641723445", "978-1423103349", "nonexist"])
        self.assertEqual(len(res), 2)
        self.assertEqual([x["name"] for x in res], ["The Lightning Thief", "The Sea of Monsters"])

        si.commit()
        res = si.get(ids="978-1423103349", fields=["author"])
        self.assertEqual(len(res), 1)
        self.assertEqual(list(res[0].keys()), ["author"])
示例#12
0
 def test_mlt(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.mlt_query("genre_s", interestingTerms="details", mintf=1, mindf=1).query(id="978-0641723445").execute()
     self.assertEqual(res.result.numFound, 2)
     self.assertEqual(res.interesting_terms, ["genre_s:fantasy", 1.0])
     self.assertEqual([x["author"] for x in res.result.docs], ["Rick Riordan", "Jostein Gaarder"])
示例#13
0
 def test_multi_value_dates(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     docs = {
         "id": "978",
         "important_dts": [
             "1969-01-01",
             "1969-01-02",
         ],
     }
     si.add(docs)
     si.commit()
     _ = si.query(id=u"978").execute()
示例#14
0
 def test_mlt(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.mlt_query("genre_s",
                        interestingTerms="details",
                        mintf=1,
                        mindf=1).query(id="978-0641723445").execute()
     self.assertEqual(res.result.numFound, 2)
     self.assertEqual(res.interesting_terms, [u'genre_s:fantasy', 1.0])
     self.assertEqual([x['author'] for x in res.result.docs],
                      [u'Rick Riordan', u'Jostein Gaarder'])
示例#15
0
 def test_mlt_component_query(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.query(id="978-0641723445").mlt("genre_s", mintf=1,
                                             mindf=1).execute()
     # query shows only one
     self.assertEqual(res.result.numFound, 1)
     # but in more like this we get two
     self.assertEqual(len(res.more_like_these["978-0641723445"].docs), 2)
     self.assertEqual(
         [x['author'] for x in res.more_like_these["978-0641723445"].docs],
         [u'Rick Riordan', u'Jostein Gaarder'])
示例#16
0
 def test_count(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     docs = [{
         "id": "1",
         "genre_s": "fantasy",
     }, {
         "id": "2",
         "genre_s": "fantasy",
     }]
     si.add(docs)
     si.commit()
     ungrouped_count = si.query(genre_s="fantasy").count()
     ungrouped_count_expected = 2
     self.assertEqual(ungrouped_count, ungrouped_count_expected)
     grouped_count = si.query(genre_s="fantasy").group_by("genre_s").count()
     grouped_count_expected = 1
     self.assertEqual(grouped_count, grouped_count_expected)
示例#17
0
 def test_mlt_component_query(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.query(id="978-0641723445").mlt("genre_s", mintf=1, mindf=1).execute()
     # query shows only one
     self.assertEqual(res.result.numFound, 1)
     # but in more like this we get two
     self.assertEqual(len(res.more_like_these["978-0641723445"].docs), 2)
     self.assertEqual(
         [x["author"] for x in res.more_like_these["978-0641723445"].docs], ["Rick Riordan", "Jostein Gaarder"]
     )
示例#18
0
 def test_multi_value_dates(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     docs = {
         "id": "978",
         "important_dts": [
             "1969-01-01",
             "1969-01-02",
         ],
     }
     si.add(docs)
     si.commit()
     _ = si.query(id=u"978").execute()
示例#19
0
    def test_cursor(self):
        dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
        si = SolrInterface(dsn)
        si.add(self.docs)
        si.commit()
        cursor = si.query(genre_s="fantasy").sort_by('id').cursor(rows=1)

        # Count how often we hit solr
        search_count = [0]
        old_search = cursor.search.interface.search

        def search_proxy(*args, **kwargs):
            search_count[0] += 1
            return old_search(*args, **kwargs)

        cursor.search.interface.search = search_proxy

        list(cursor)
        self.assertEqual(search_count[0], 4)  # 3 + 1 to realize we are done

        search_count = [0]
        cursor = si.query(genre_s="fantasy").sort_by('id') \
                   .cursor(constructor=Book, rows=2)
        # test constructor
        self.assertEqual([x.title for x in cursor], [
            u'The Lightning Thief', u'The Sea of Monsters',
            u"Sophie's World : The Greek Philosophers"
        ])
        self.assertEqual(search_count[0], 3)

        # empty results
        search_count = [0]
        cursor = si.query(genre_s="nonexist").sort_by('id') \
                   .cursor(constructor=Book)
        self.assertEqual(list(cursor), [])
        self.assertEqual(search_count[0], 1)
示例#20
0
 def test_count(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     docs = [{
         "id": "1",
         "genre_s": "fantasy",
     }, {
         "id": "2",
         "genre_s": "fantasy",
     }]
     si.add(docs)
     si.commit()
     ungrouped_count = si.query(genre_s="fantasy").count()
     ungrouped_count_expected = 2
     self.assertEqual(ungrouped_count, ungrouped_count_expected)
     grouped_count = si.query(genre_s="fantasy").group_by("genre_s").count()
     grouped_count_expected = 1
     self.assertEqual(grouped_count, grouped_count_expected)
示例#21
0
    def test_cursor(self):
        dsn = os.environ.get("SOLR_URL",
                             "http://localhost:8983/solr")
        si = SolrInterface(dsn)
        si.add(self.docs)
        si.commit()
        cursor = si.query(genre_s="fantasy").sort_by('id').cursor(rows=1)

        # Count how often we hit solr
        search_count = [0]
        old_search = cursor.search.interface.search

        def search_proxy(*args, **kwargs):
            search_count[0] += 1
            return old_search(*args, **kwargs)
        cursor.search.interface.search = search_proxy

        list(cursor)
        self.assertEqual(search_count[0], 4)  # 3 + 1 to realize we are done

        search_count = [0]
        cursor = si.query(genre_s="fantasy").sort_by('id') \
                   .cursor(constructor=Book, rows=2)
        # test constructor
        self.assertEqual([x.title for x in cursor],
                         [u'The Lightning Thief',
                          u'The Sea of Monsters',
                          u"Sophie's World : The Greek Philosophers"])
        self.assertEqual(search_count[0], 3)

        # empty results
        search_count = [0]
        cursor = si.query(genre_s="nonexist").sort_by('id') \
                   .cursor(constructor=Book)
        self.assertEqual(list(cursor), [])
        self.assertEqual(search_count[0], 1)
示例#22
0
 def test_facet_query(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.query(genre_s="fantasy").facet_by("cat").execute()
     self.assertEqual(res.result.numFound, 3)
     self.assertEqual(
         [x["name"] for x in res.result.docs],
         ["The Lightning Thief", "The Sea of Monsters", "Sophie's World : The Greek Philosophers"],
     )
     self.assertEqual(
         res.facet_counts.__dict__,
         {
             "facet_fields": {"cat": [("book", 3), ("paperback", 2), ("hardcover", 1)]},
             "facet_dates": {},
             "facet_queries": {},
             "facet_ranges": {},
             "facet_pivot": (),
         },
     )
示例#23
0
 def test_facet_query(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     res = si.add(self.docs)
     self.assertEqual(res[0].status, 0)
     si.commit()
     res = si.query(genre_s="fantasy").facet_by("cat").execute()
     self.assertEqual(res.result.numFound, 3)
     self.assertEqual([x['name'] for x in res.result.docs], [
         u'The Lightning Thief', u'The Sea of Monsters',
         u"Sophie's World : The Greek Philosophers"
     ])
     self.assertEqual(
         res.facet_counts.__dict__, {
             'facet_fields': {
                 u'cat': [(u'book', 3), (u'paperback', 2),
                          (u'hardcover', 1)]
             },
             'facet_dates': {},
             'facet_queries': {},
             'facet_ranges': {},
             'facet_pivot': ()
         })
示例#24
0
 def test_spellcheck(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     opts = si.query(name=u"Monstes").spellcheck().options()
     self.assertEqual({u'q': u'name:Monstes', u'spellcheck': True}, opts)
示例#25
0
 def test_query(self):
     dsn = os.environ.get("SOLR_URL",
                          "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
     # delete
     res = si.delete_by_ids(res.result.docs[0]['id'])
     self.assertEqual(res.status, 0)
     res = si.query(genre_s="fantasy").execute()
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 2)
     res = si.query(genre_s="fantasy").execute(constructor=Book)
     # test constructor
     self.assertEqual([x.title for x in res.result.docs],
                      [u'The Sea of Monsters',
                       u"Sophie's World : The Greek Philosophers"])
示例#26
0
 def tearDown(self):
     dsn = os.environ.get("SOLR_URL",
                          "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.delete_all()
     si.commit()
示例#27
0
 def test_spellcheck(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     opts = si.query(name=u"Monstes").spellcheck().options()
     self.assertEqual({u'q': u'name:Monstes', u'spellcheck': True}, opts)
示例#28
0
               passes=10)

print 'model trained'
for topics in ldamodel.show_topics(num_topics=num_topics, num_words=5):
    print topics
lda_model_path = 'lda_model/model'
ldamodel.save(lda_model_path)
text = "Just played: A Ceremony of Carols, Op. 28: Procession - Benjamin Britten, Frances Kelly, Marie-Claire Brookshaw, Clare Wilkinson - Brit..."
text = clean(text).split()
temp = ldamodel[dictionary.doc2bow(text)]
#print temp
#terms = ldamodel.get_topic_terms(4,topn=2)
#print [dictionary[terms[i][0]] for i in range(2)]

#Posting to Solr
si = SolrInterface("http://54.202.209.219:8983/solr/IRF16P4")

for i in range(len(doc_complete)):
    probs = ldamodel[dictionary.doc2bow(clean(doc_complete[i]).split())]
    max_prob = 0
    max_index = 0
    label = 0
    for j in range(len(probs)):
        if probs[j][1] > max_prob:
            max_prob = probs[j][1]
            max_index = j
            label = probs[j][0]
    terms = ldamodel.get_topic_terms(max_index, topn=2)
    topics = [dictionary[terms[k][0]] for k in range(2)]
    #print topics,"<======>",
    #print tweets[i]["text"]
示例#29
0
def connection(config):
    """Adapt Solr configuration to connection/query interface."""
    logger.info('Connecting to Solr on %s', config.url)
    return SolrInterface(config.url)
示例#30
0
 def _setup(self):
     self._wrapped = SolrInterface(settings.SOLR_URI)
示例#31
0
 def test_rollback(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.delete_all()
     si.add(self.docs)
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
     # delete
     res = si.delete_by_ids(res.result.docs[0]['id'])
     self.assertEqual(res.status, 0)
     # rollback
     res = si.rollback()
     self.assertEqual(res.status, 0)
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
示例#32
0
 def test_chunked_add(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     self.assertEqual(len(self.docs), 4)
     # chunk size = 1, chunks = 4
     si.delete_all()
     res = si.add(self.docs, chunk=1)
     self.assertEqual(len(res), 4)
     self.assertEqual([r.status for r in res], [0] * 4)
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
     # chunk size = 2, chunks = 2
     si.delete_all()
     res = si.add(self.docs, chunk=2)
     self.assertEqual(len(res), 2)
     self.assertEqual([r.status for r in res], [0] * 2)
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
示例#33
0
 def test_chunked_add(self):
     dsn = os.environ.get("SOLR_URL",
                          "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     self.assertEqual(len(self.docs), 4)
     # chunk size = 1, chunks = 4
     si.delete_all()
     res = si.add(self.docs, chunk=1)
     self.assertEqual(len(res), 4)
     self.assertEqual([r.status for r in res], [0] * 4)
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
     # chunk size = 2, chunks = 2
     si.delete_all()
     res = si.add(self.docs, chunk=2)
     self.assertEqual(len(res), 2)
     self.assertEqual([r.status for r in res], [0] * 2)
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
示例#34
0
 def test_rollback(self):
     dsn = os.environ.get("SOLR_URL",
                          "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.delete_all()
     si.add(self.docs)
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
     # delete
     res = si.delete_by_ids(res.result.docs[0]['id'])
     self.assertEqual(res.status, 0)
     # rollback
     res = si.rollback()
     self.assertEqual(res.status, 0)
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
示例#35
0
 def test_query(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.add(self.docs)
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 3)
     # delete
     res = si.delete_by_ids(res.result.docs[0]['id'])
     self.assertEqual(res.status, 0)
     res = si.query(genre_s="fantasy").execute()
     si.commit()
     res = si.query(genre_s="fantasy").execute()
     self.assertEqual(res.result.numFound, 2)
     res = si.query(genre_s="fantasy").execute(constructor=Book)
     # test constructor
     self.assertEqual([x.title for x in res.result.docs], [
         u'The Sea of Monsters', u"Sophie's World : The Greek Philosophers"
     ])
示例#36
0
 def tearDown(self):
     dsn = os.environ.get("SOLR_URL", "http://localhost:8983/solr")
     si = SolrInterface(dsn)
     si.delete_all()
     si.commit()