def init(): conn = ES('127.0.0.1:9200') try: conn.delete_index("zhihu") except: pass conn.create_index("zhihu") mapping = { u'id': { 'store': 'yes', 'type': u'integer' }, u'link': { 'store': 'yes', 'type': u'string' }, u'title': { 'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string' }, } conn.put_mapping("answer", {'properties': mapping}, ["zhihu"]) for item in Data().getData(): conn.index(item, "zhihu", "answer", item['id']) conn.refresh(["zhihu"]) return redirect('/list')
def init(): conn = ES('127.0.0.1:9200') try: conn.delete_index("zhihu") except: pass conn.create_index("zhihu") mapping = { u'id': {'store': 'yes', 'type': u'integer'}, u'link': {'store': 'yes', 'type': u'string'}, u'title': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string'}, } conn.put_mapping("answer", {'properties': mapping}, ["zhihu"]) for item in Data().getData(): conn.index(item, "zhihu", "answer", item['id']) conn.refresh(["zhihu"]) return redirect('/list')
class ESTestCase(unittest.TestCase): def setUp(self): self.conn = ES('127.0.0.1:9200') try: self.conn.delete_index("test-index") except NotFoundException: pass def tearDown(self): try: self.conn.delete_index("test-index") except NotFoundException: pass def assertResultContains(self, result, expected): for (key, value) in expected.items(): self.assertEquals(value, result[key]) def dump(self, result): """ dump to stdout the result """ pprint(result)
from pyes import ES from datetime import datetime import shelve conn = ES('127.0.0.1:9200') try: conn.delete_index("test-index") except: pass dataset = shelve.open("samples.shelve") mapping = { u'description': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string', "term_vector" : "with_positions_offsets" }, u'name': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string', "term_vector" : "with_positions_offsets" }, u'age': {'store': 'yes', 'type': u'integer'}, } conn.create_index("test-index") conn.put_mapping("test-type", {'properties':mapping}, ["test-index"]) start = datetime.now() for k, userdata in dataset.items():
class BaseElasticSearchClient(BaseClient): def __init__(self, servers, index=None): """ @param servers: Make sure to include the port with the server address @param index: Document index @return: """ super(BaseElasticSearchClient, self).__init__() self.connection = None self.servers = servers if index is not None: self.index = index if type(index) is list else [index] def connect(self, connection_pool=1, bulk_size=10): update_connection_pool(connection_pool) try: self.connection = ES(self.servers, bulk_size=bulk_size) except NoServerAvailable: self._log.error('Failed to connect to elastic search server') return False return True def close(self): self.connection = None def _create_term_query(self, must_list): # TODO: add remaining conditional list functionality. query = BoolQuery() for term in must_list: query.add_must(term) def refresh_index(self, index_name, wait=1): self._log.info('ES: Refreshing index {0}'.format(index_name)) self.connection.indices.refresh(index_name, timesleep=wait) def has_index(self, index_name): self._log.info('ES: Checking for index {0}'.format(index_name)) try: self.connection.status(index_name) except IndexMissingException: return False return True def wait_for_index(self, index_name, wait=30): """ Checks to see if an index exists. Checks every second for int(X) seconds and returns True if successful """ for i in range(0, int(wait)): if self.has_index(index_name): return True sleep(1) return False def wait_for_messages(self, name, value, num=1, index=None, max_wait=30): """ Wait for a specific number of messages to be returned within a specified amount of time. Checks every second for {max_wait} seconds and returns a list of msgs """ for i in range(0, int(max_wait)): msgs = self.find_term(name=name, value=value, size=1, index=index) if len(msgs) == num: return msgs sleep(1) return [] def delete_index(self, index_name): self._log.info('ES: Deleting index {0}'.format(index_name)) self.connection.delete_index(index_name) def find_term(self, name, value, size=10, index=None): if not self.connection: return query = TermQuery(name, value) return self.connection.search(query=Search(query, size=size), indices=index or self.index) def find(self, filter_terms, size=10, doc_types=None, index=None): if not self.connection: return query = self._create_term_query(must_list=filter_terms) return self.connection.search(query=Search(query, size=size), indices=index or self.index, doc_types=doc_types) def find_one(self, filter_terms, doc_types=None, index=None): if not self.connection: return results = self.find(filter_terms=filter_terms, size=1, doc_types=doc_types, index=index) return results[0] if len(results) > 0 else None
from pyes import ES from datetime import datetime import shelve #conn = ES('127.0.0.1:9200') conn = ES('192.168.1.51:9200') try: conn.delete_index("test-index") except: pass dataset = shelve.open("samples.shelve") mapping = { u'description': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string', "term_vector" : "with_positions_offsets" }, u'name': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string', "term_vector" : "with_positions_offsets" }, u'age': {'store': 'yes', 'type': u'integer'}, } conn.create_index("test-index") conn.put_mapping("test-type", {'properties':mapping}, ["test-index"]) start = datetime.now()