def ext_process(listname, hostname, url, filepath, msg): """Here's where you put your code to deal with the just archived message. Arguments here are the list name, the host name, the URL to the just archived message, the file system path to the just archived message and the message object. These can be replaced or augmented as needed. """ from pyes import ES from pyes.exceptions import ClusterBlockException, NoServerAvailable import datetime #CHANGE this settings to reflect your configuration _ES_SERVERS = ['127.0.0.1:9500'] # I prefer thrift _indexname = "mailman" _doctype = "mail" date = datetime.datetime.today() try: iconn = ES(_ES_SERVERS) status = None try: status = iconn.status(_indexname) logger.debug("Indexer status:%s" % status) except: iconn.create_index(_indexname) time.sleep(1) status = iconn.status(_indexname) mappings = { u'text': { 'store': 'true', 'type': u'text', "term_vector": "with_positions_offsets" }, u'url': { 'store': 'true', 'type': u'keyword' }, u'title': { 'store': 'true', 'type': u'text', "term_vector": "with_positions_offsets" }, u'date': { 'store': 'true', 'type': u'date' } } time.sleep(1) status = iconn.put_mapping(_doctype, mappings, _indexname) data = dict(url=url, title=msg.get('subject'), date=date, text=str(msg)) iconn.index(data, _indexname, _doctype) syslog('debug', 'listname: %s, hostname: %s, url: %s, path: %s, msg: %s', listname, hostname, url, filepath, msg) except ClusterBlockException: syslog( 'error', 'Cluster in revocery state: listname: %s, hostname: %s, url: %s, path: %s, msg: %s', listname, hostname, url, filepath, msg) except NoServerAvailable: syslog( 'error', 'No server available: listname: %s, hostname: %s, url: %s, path: %s, msg: %s', listname, hostname, url, filepath, msg) except: import traceback syslog( 'error', 'Unknown: listname: %s, hostname: %s, url: %s, path: %s, msg: %s\nstacktrace: %s', listname, hostname, url, filepath, msg, repr(traceback.format_exc())) return
def ext_process(listname, hostname, url, filepath, msg): """Here's where you put your code to deal with the just archived message. Arguments here are the list name, the host name, the URL to the just archived message, the file system path to the just archived message and the message object. These can be replaced or augmented as needed. """ from pyes import ES from pyes.exceptions import ClusterBlockException, NoServerAvailable import datetime #CHANGE this settings to reflect your configuration _ES_SERVERS = ['127.0.0.1:9500'] # I prefer thrift _indexname = "mailman" _doctype = "mail" date = datetime.datetime.today() try: iconn = ES(_ES_SERVERS) status = None try: status = iconn.status(_indexname) logger.debug("Indexer status:%s" % status) except: iconn.create_index(_indexname) time.sleep(1) status = iconn.status(_indexname) mappings = { u'text': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string', "term_vector" : "with_positions_offsets"}, u'url': {'boost': 1.0, 'index': 'not_analyzed', 'store': 'yes', 'type': u'string', "term_vector" : "no"}, u'title': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string', "term_vector" : "with_positions_offsets"}, u'date': {'store': 'yes', 'type': u'date'}} time.sleep(1) status = iconn.put_mapping(_doctype, mappings, _indexname) data = dict(url=url, title=msg.get('subject'), date=date, text=str(msg) ) iconn.index(data, _indexname, _doctype) syslog('debug', 'listname: %s, hostname: %s, url: %s, path: %s, msg: %s', listname, hostname, url, filepath, msg) except ClusterBlockException: syslog('error', 'Cluster in revocery state: listname: %s, hostname: %s, url: %s, path: %s, msg: %s', listname, hostname, url, filepath, msg) except NoServerAvailable: syslog('error', 'No server available: listname: %s, hostname: %s, url: %s, path: %s, msg: %s', listname, hostname, url, filepath, msg) except: import traceback syslog('error', 'Unknown: listname: %s, hostname: %s, url: %s, path: %s, msg: %s\nstacktrace: %s', listname, hostname, url, filepath, msg, repr(traceback.format_exc())) return
class BaseElasticSearchClient(BaseClient): def __init__(self, servers, index=None): """ @param servers: Make sure to include the port with the server address @param index: Document index @return: """ super(BaseElasticSearchClient, self).__init__() self.connection = None self.servers = servers if index is not None: self.index = index if type(index) is list else [index] def connect(self, connection_pool=1, bulk_size=10): update_connection_pool(connection_pool) try: self.connection = ES(self.servers, bulk_size=bulk_size) except NoServerAvailable: self._log.error('Failed to connect to elastic search server') return False return True def close(self): self.connection = None def _create_term_query(self, must_list): # TODO: add remaining conditional list functionality. query = BoolQuery() for term in must_list: query.add_must(term) def refresh_index(self, index_name, wait=1): self._log.info('ES: Refreshing index {0}'.format(index_name)) self.connection.indices.refresh(index_name, timesleep=wait) def has_index(self, index_name): self._log.info('ES: Checking for index {0}'.format(index_name)) try: self.connection.status(index_name) except IndexMissingException: return False return True def wait_for_index(self, index_name, wait=30): """ Checks to see if an index exists. Checks every second for int(X) seconds and returns True if successful """ for i in range(0, int(wait)): if self.has_index(index_name): return True sleep(1) return False def wait_for_messages(self, name, value, num=1, index=None, max_wait=30): """ Wait for a specific number of messages to be returned within a specified amount of time. Checks every second for {max_wait} seconds and returns a list of msgs """ for i in range(0, int(max_wait)): msgs = self.find_term(name=name, value=value, size=1, index=index) if len(msgs) == num: return msgs sleep(1) return [] def delete_index(self, index_name): self._log.info('ES: Deleting index {0}'.format(index_name)) self.connection.delete_index(index_name) def find_term(self, name, value, size=10, index=None): if not self.connection: return query = TermQuery(name, value) return self.connection.search(query=Search(query, size=size), indices=index or self.index) def find(self, filter_terms, size=10, doc_types=None, index=None): if not self.connection: return query = self._create_term_query(must_list=filter_terms) return self.connection.search(query=Search(query, size=size), indices=index or self.index, doc_types=doc_types) def find_one(self, filter_terms, doc_types=None, index=None): if not self.connection: return results = self.find(filter_terms=filter_terms, size=1, doc_types=doc_types, index=index) return results[0] if len(results) > 0 else None