示例#1
0
def ext_process(listname, hostname, url, filepath, msg):
    """Here's where you put your code to deal with the just archived message.

    Arguments here are the list name, the host name, the URL to the just
    archived message, the file system path to the just archived message and
    the message object.

    These can be replaced or augmented as needed.
    """
    from pyes import ES
    from pyes.exceptions import ClusterBlockException, NoServerAvailable
    import datetime

    #CHANGE this settings to reflect your configuration
    _ES_SERVERS = ['127.0.0.1:9500']  # I prefer thrift
    _indexname = "mailman"
    _doctype = "mail"
    date = datetime.datetime.today()

    try:
        iconn = ES(_ES_SERVERS)
        status = None
        try:
            status = iconn.status(_indexname)
            logger.debug("Indexer status:%s" % status)
        except:
            iconn.create_index(_indexname)
            time.sleep(1)
            status = iconn.status(_indexname)
            mappings = {
                u'text': {
                    'store': 'true',
                    'type': u'text',
                    "term_vector": "with_positions_offsets"
                },
                u'url': {
                    'store': 'true',
                    'type': u'keyword'
                },
                u'title': {
                    'store': 'true',
                    'type': u'text',
                    "term_vector": "with_positions_offsets"
                },
                u'date': {
                    'store': 'true',
                    'type': u'date'
                }
            }
            time.sleep(1)
            status = iconn.put_mapping(_doctype, mappings, _indexname)

        data = dict(url=url,
                    title=msg.get('subject'),
                    date=date,
                    text=str(msg))
        iconn.index(data, _indexname, _doctype)

        syslog('debug',
               'listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
               listname, hostname, url, filepath, msg)
    except ClusterBlockException:
        syslog(
            'error',
            'Cluster in revocery state: listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
            listname, hostname, url, filepath, msg)
    except NoServerAvailable:
        syslog(
            'error',
            'No server available: listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
            listname, hostname, url, filepath, msg)
    except:
        import traceback
        syslog(
            'error',
            'Unknown: listname: %s, hostname: %s, url: %s, path: %s, msg: %s\nstacktrace: %s',
            listname, hostname, url, filepath, msg,
            repr(traceback.format_exc()))

    return
示例#2
0
def ext_process(listname, hostname, url, filepath, msg):
    """Here's where you put your code to deal with the just archived message.

    Arguments here are the list name, the host name, the URL to the just
    archived message, the file system path to the just archived message and
    the message object.

    These can be replaced or augmented as needed.
    """
    from pyes import ES
    from pyes.exceptions import ClusterBlockException, NoServerAvailable
    import datetime

    #CHANGE this settings to reflect your configuration
    _ES_SERVERS = ['127.0.0.1:9500'] # I prefer thrift
    _indexname = "mailman"
    _doctype = "mail"
    date = datetime.datetime.today()

    try:
        iconn = ES(_ES_SERVERS)
        status = None
        try:
            status = iconn.status(_indexname)
            logger.debug("Indexer status:%s" % status)
        except:
            iconn.create_index(_indexname)
            time.sleep(1)
            status = iconn.status(_indexname)
            mappings = { u'text': {'boost': 1.0,
                                     'index': 'analyzed',
                                     'store': 'yes',
                                     'type': u'string',
                                     "term_vector" : "with_positions_offsets"},
                             u'url': {'boost': 1.0,
                                        'index': 'not_analyzed',
                                        'store': 'yes',
                                        'type': u'string',
                                        "term_vector" : "no"},
                             u'title': {'boost': 1.0,
                                        'index': 'analyzed',
                                        'store': 'yes',
                                        'type': u'string',
                                        "term_vector" : "with_positions_offsets"},
                             u'date': {'store': 'yes',
                                        'type': u'date'}}
            time.sleep(1)
            status = iconn.put_mapping(_doctype, mappings, _indexname)


        data = dict(url=url,
                    title=msg.get('subject'),
                    date=date,
                    text=str(msg)
                    )
        iconn.index(data, _indexname, _doctype)

        syslog('debug', 'listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
               listname, hostname, url, filepath, msg)
    except ClusterBlockException:
        syslog('error', 'Cluster in revocery state: listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
               listname, hostname, url, filepath, msg)
    except NoServerAvailable:
        syslog('error', 'No server available: listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
               listname, hostname, url, filepath, msg)
    except:
        import traceback
        syslog('error', 'Unknown: listname: %s, hostname: %s, url: %s, path: %s, msg: %s\nstacktrace: %s',
               listname, hostname, url, filepath, msg, repr(traceback.format_exc()))

    return
示例#3
0
class BaseElasticSearchClient(BaseClient):

    def __init__(self, servers, index=None):
        """
        @param servers: Make sure to include the port with the server address
        @param index: Document index
        @return:
        """
        super(BaseElasticSearchClient, self).__init__()
        self.connection = None
        self.servers = servers

        if index is not None:
            self.index = index if type(index) is list else [index]

    def connect(self, connection_pool=1, bulk_size=10):
        update_connection_pool(connection_pool)

        try:
            self.connection = ES(self.servers, bulk_size=bulk_size)
        except NoServerAvailable:
            self._log.error('Failed to connect to elastic search server')
            return False
        return True

    def close(self):
        self.connection = None

    def _create_term_query(self, must_list):
        # TODO: add remaining conditional list functionality.
        query = BoolQuery()
        for term in must_list:
            query.add_must(term)

    def refresh_index(self, index_name, wait=1):
        self._log.info('ES: Refreshing index {0}'.format(index_name))
        self.connection.indices.refresh(index_name, timesleep=wait)

    def has_index(self, index_name):
        self._log.info('ES: Checking for index {0}'.format(index_name))
        try:
            self.connection.status(index_name)
        except IndexMissingException:
            return False
        return True

    def wait_for_index(self, index_name, wait=30):
        """ Checks to see if an index exists.
        Checks every second for int(X) seconds and returns True if successful
        """
        for i in range(0, int(wait)):
            if self.has_index(index_name):
                return True

            sleep(1)
        return False

    def wait_for_messages(self, name, value, num=1, index=None, max_wait=30):
        """ Wait for a specific number of messages to be returned within a
        specified amount of time.
        Checks every second for {max_wait} seconds and returns a list of msgs
        """
        for i in range(0, int(max_wait)):
            msgs = self.find_term(name=name, value=value, size=1, index=index)
            if len(msgs) == num:
                return msgs
            sleep(1)
        return []

    def delete_index(self, index_name):
        self._log.info('ES: Deleting index {0}'.format(index_name))
        self.connection.delete_index(index_name)

    def find_term(self, name, value, size=10, index=None):
        if not self.connection:
            return

        query = TermQuery(name, value)
        return self.connection.search(query=Search(query, size=size),
                                      indices=index or self.index)

    def find(self, filter_terms, size=10, doc_types=None, index=None):
        if not self.connection:
            return

        query = self._create_term_query(must_list=filter_terms)
        return self.connection.search(query=Search(query, size=size),
                                      indices=index or self.index,
                                      doc_types=doc_types)

    def find_one(self, filter_terms, doc_types=None, index=None):
        if not self.connection:
            return

        results = self.find(filter_terms=filter_terms, size=1,
                            doc_types=doc_types, index=index)
        return results[0] if len(results) > 0 else None