示例#1
0
class ESDataTarget(base.DataTarget):
    """docstring for ClassName
    """
    def __init__(self, document_type, database="test", host="127.0.0.1", port="9200",
                 truncate=False, expand=False, **elasticsearch_args):
        """Creates a ElasticSearch data target stream.

        :Attributes:
            * document_ElasticSearch elasticsearch document_type name
            * database: database name
            * host: ElasticSearch database server host, default is ``localhost``
            * port: ElasticSearch port, default is ``9200``
            * expand: expand dictionary values and treat children as top-level keys with dot '.'
                separated key path to the child..
            * truncate: delete existing data in the document_type. Default: False
        """
        self.document_type = document_type
        self.database_name = database
        self.host = host
        self.port = port
        self.elasticsearch_args = elasticsearch_args
        self.expand = expand
        self.truncate = truncate
        self._fields = None

    def initialize(self):
        """Initialize ElasticSearch source stream:
        """
        from pyes.es import ES
        from pyes.exceptions import IndexAlreadyExistsException

        args = self.elasticsearch_args.copy()
        server = ""
        if self.host:
            server = self.host
        if self.port:
            server += ":" + self.port

        create = args.pop("create", False)
        replace = args.pop("replace", False)

        self.connection = ES(server, **args)
        self.connection.default_indices = self.database_name
        self.connection.default_types = self.document_type

        created = False
        if create:
            try:
                self.connection.create_index(self.database_name)
                self.connection.refresh(self.database_name)
                created = True
            except IndexAlreadyExistsException:
                pass

        if replace and not created:
            self.connection.delete_index_if_exists(self.database_name)
            time.sleep(2)
            self.connection.create_index(self.database_name)
            self.connection.refresh(self.database_name)

        if self.truncate:
            self.connection.delete_mapping(self.database_name, self.document_type)
            self.connection.refresh(self.database_name)

    def append(self, obj):
        record = obj
        if not isinstance(obj, dict):
            record = dict(zip(self.fields.names(), obj))

        if self.expand:
            record = expand_record(record)

        id = record.get('id') or record.get('_id')
        self.connection.index(record, self.database_name, self.document_type, id, bulk=True)

    def finalize(self):
        self.connection.flush_bulk(forced=True)
示例#2
0
class ESDataTarget(DataTarget):
    """docstring for ClassName
    """
    def __init__(self,
                 document_type,
                 index="test",
                 host="127.0.0.1",
                 port="9200",
                 truncate=False,
                 expand=False,
                 **elasticsearch_args):
        """Creates a ElasticSearch data target stream.

        :Attributes:
            * document_ElasticSearch elasticsearch document_type name
            * index: database name
            * host: ElasticSearch database server host, default is ``localhost``
            * port: ElasticSearch port, default is ``9200``
            * expand: expand dictionary values and treat children as top-level keys with dot '.'
                separated key path to the child..
            * truncate: delete existing data in the document_type. Default: False
        """
        super(ESDataTarget, self).__init__()
        self.document_type = document_type
        self.index = index
        self.host = host
        self.port = port
        self.elasticsearch_args = elasticsearch_args
        self.expand = expand
        self.truncate = truncate
        self._fields = None

    def initialize(self):
        """
        Initialize ElasticSearch source stream:
        """
        from pyes.es import ES
        from pyes.exceptions import IndexAlreadyExistsException

        args = self.elasticsearch_args.copy()
        server = ""
        if self.host:
            server = self.host
        if self.port:
            server += ":" + self.port

        create = args.pop("create", False)
        replace = args.pop("replace", False)

        self.connection = ES(server, **args)
        self.connection.default_indices = self.index
        self.connection.default_types = self.document_type

        created = False
        if create:
            try:
                self.connection.create_index(self.index)
                self.connection.refresh(self.index)
                created = True
            except IndexAlreadyExistsException:
                pass

        if replace and not created:
            self.connection.delete_index_if_exists(self.index)
            self.connection.refresh(self.index)
            self.connection.create_index(self.index)
            self.connection.refresh(self.index)

        if self.truncate:
            self.connection.delete_mapping(self.index, self.document_type)
            self.connection.refresh(self.index)
        #check mapping
        try:
            self.connection.get_mapping(self.document_type, self.index)
        except TypeMissingException:
            self.connection.put_mapping(self.document_type,
                                        self._get_mapping(), self.index)

    def _get_mapping(self):
        """Build an ES optimized mapping for the given fields"""
        from pyes.mappings import DocumentObjectField, IntegerField, StringField, BooleanField, FloatField, DateField

        document = DocumentObjectField(name=self.document_type)
        for field in self.fields:
            st = field.storage_type
            if st == "unknown":
                #lets es detect the type
                continue
            elif st in ["string", "text"]:
                document.add_property(StringField(name=field.name))
            elif st == "integer":
                document.add_property(IntegerField(name=field.name))
            elif st == "boolean":
                document.add_property(BooleanField(name=field.name))
            elif st == "date":
                document.add_property(DateField(name=field.name))
            elif st == "float":
                document.add_property(FloatField(name=field.name))

        return document

    def append(self, obj):
        record = obj
        if not isinstance(obj, dict):
            record = dict(zip(self.field_names, obj))

        if self.expand:
            record = expand_record(record)

        id = record.get('id') or record.get('_id')
        self.connection.index(record,
                              self.index,
                              self.document_type,
                              id,
                              bulk=True)

    def finalize(self):
        self.connection.flush_bulk(forced=True)
示例#3
0
class ESDataTarget(DataTarget):
    """docstring for ClassName
    """

    def __init__(self, document_type, index="test", host="127.0.0.1", port="9200", truncate=False, expand=False,
                 **elasticsearch_args):
        """Creates a ElasticSearch data target stream.

        :Attributes:
            * document_ElasticSearch elasticsearch document_type name
            * index: database name
            * host: ElasticSearch database server host, default is ``localhost``
            * port: ElasticSearch port, default is ``9200``
            * expand: expand dictionary values and treat children as top-level keys with dot '.'
                separated key path to the child..
            * truncate: delete existing data in the document_type. Default: False
        """
        super(ESDataTarget, self).__init__()
        self.document_type = document_type
        self.index = index
        self.host = host
        self.port = port
        self.elasticsearch_args = elasticsearch_args
        self.expand = expand
        self.truncate = truncate
        self._fields = None

    def initialize(self):
        """
        Initialize ElasticSearch source stream:
        """
        from pyes.es import ES
        from pyes.exceptions import IndexAlreadyExistsException

        args = self.elasticsearch_args.copy()
        server = ""
        if self.host:
            server = self.host
        if self.port:
            server += ":" + self.port

        create = args.pop("create", False)
        replace = args.pop("replace", False)

        self.connection = ES(server, **args)
        self.connection.default_indices = self.index
        self.connection.default_types = self.document_type

        created = False
        if create:
            try:
                self.connection.create_index(self.index)
                self.connection.refresh(self.index)
                created = True
            except IndexAlreadyExistsException:
                pass

        if replace and not created:
            self.connection.delete_index_if_exists(self.index)
            self.connection.refresh(self.index)
            self.connection.create_index(self.index)
            self.connection.refresh(self.index)

        if self.truncate:
            self.connection.delete_mapping(self.index, self.document_type)
            self.connection.refresh(self.index)
        #check mapping
        try:
            self.connection.get_mapping(self.document_type, self.index)
        except TypeMissingException:
            self.connection.put_mapping(self.document_type, self._get_mapping(), self.index)

    def _get_mapping(self):
        """Build an ES optimized mapping for the given fields"""
        from pyes.mappings import DocumentObjectField, IntegerField, StringField, BooleanField, FloatField, DateField

        document = DocumentObjectField(name=self.document_type)
        for field in self.fields:
            st = field.storage_type
            if st == "unknown":
                #lets es detect the type
                continue
            elif st in ["string", "text"]:
                document.add_property(StringField(name=field.name))
            elif st == "integer":
                document.add_property(IntegerField(name=field.name))
            elif st == "boolean":
                document.add_property(BooleanField(name=field.name))
            elif st == "date":
                document.add_property(DateField(name=field.name))
            elif st == "float":
                document.add_property(FloatField(name=field.name))

        return document


    def append(self, obj):
        record = obj
        if not isinstance(obj, dict):
            record = dict(zip(self.field_names, obj))

        if self.expand:
            record = expand_record(record)

        id = record.get('id') or record.get('_id')
        self.connection.index(record, self.index, self.document_type, id, bulk=True)

    def finalize(self):
        self.connection.flush_bulk(forced=True)
示例#4
0
class ESDataTarget(base.DataTarget):
    """docstring for ClassName
    """
    def __init__(self,
                 document_type,
                 database="test",
                 host="127.0.0.1",
                 port="9200",
                 truncate=False,
                 expand=False,
                 **elasticsearch_args):
        """Creates a ElasticSearch data target stream.

        :Attributes:
            * document_ElasticSearch elasticsearch document_type name
            * database: database name
            * host: ElasticSearch database server host, default is ``localhost``
            * port: ElasticSearch port, default is ``9200``
            * expand: expand dictionary values and treat children as top-level keys with dot '.'
                separated key path to the child..
            * truncate: delete existing data in the document_type. Default: False
        """
        self.document_type = document_type
        self.database_name = database
        self.host = host
        self.port = port
        self.elasticsearch_args = elasticsearch_args
        self.expand = expand
        self.truncate = truncate
        self._fields = None

    def initialize(self):
        """Initialize ElasticSearch source stream:
        """
        from pyes.es import ES
        from pyes.exceptions import IndexAlreadyExistsException

        args = self.elasticsearch_args.copy()
        server = ""
        if self.host:
            server = self.host
        if self.port:
            server += ":" + self.port

        create = args.pop("create", False)
        replace = args.pop("replace", False)

        self.connection = ES(server, **args)
        self.connection.default_indices = self.database_name
        self.connection.default_types = self.document_type

        created = False
        if create:
            try:
                self.connection.create_index(self.database_name)
                self.connection.refresh(self.database_name)
                created = True
            except IndexAlreadyExistsException:
                pass

        if replace and not created:
            self.connection.delete_index_if_exists(self.database_name)
            time.sleep(2)
            self.connection.create_index(self.database_name)
            self.connection.refresh(self.database_name)

        if self.truncate:
            self.connection.delete_mapping(self.database_name,
                                           self.document_type)
            self.connection.refresh(self.database_name)

    def append(self, obj):
        record = obj
        if not isinstance(obj, dict):
            record = dict(zip(self.fields.names(), obj))

        if self.expand:
            record = expand_record(record)

        id = record.get('id') or record.get('_id')
        self.connection.index(record,
                              self.database_name,
                              self.document_type,
                              id,
                              bulk=True)

    def finalize(self):
        self.connection.flush_bulk(forced=True)