示例#1
0
def document_field(field):
    """
    The default ``field_factory`` method for converting Django field instances to ``elasticsearch_dsl.Field`` instances.
    Auto-created fields (primary keys, for example) and one-to-many fields (reverse FK relationships) are skipped.
    """
    if field.auto_created or field.one_to_many:
        return None
    if field.many_to_many:
        return RawMultiString
    defaults = {
        models.DateField:
        dsl.Date(),
        models.DateTimeField:
        dsl.Date(),
        models.IntegerField:
        dsl.Long(),
        models.PositiveIntegerField:
        dsl.Long(),
        models.BooleanField:
        dsl.Boolean(),
        models.NullBooleanField:
        dsl.Boolean(),
        #        models.SlugField: dsl.String(index='not_analyzed'),
        models.SlugField:
        dsl.Text(index='not_analyzed'),
        models.DecimalField:
        dsl.Double(),
        models.FloatField:
        dsl.Float(),
    }
    return defaults.get(field.__class__, RawString)
示例#2
0
class TrainingJob(elasticsearch_dsl.Document):
    id = elasticsearch_dsl.Integer()
    schema_version = elasticsearch_dsl.Integer()
    job_name = elasticsearch_dsl.Keyword()
    author = elasticsearch_dsl.Keyword()
    created_at = elasticsearch_dsl.Date()
    ended_at = elasticsearch_dsl.Date()
    params = elasticsearch_dsl.Text()
    raw_log = elasticsearch_dsl.Text()
    model_url = elasticsearch_dsl.Text()

    # Metrics
    epochs = elasticsearch_dsl.Integer()
    train_acc = elasticsearch_dsl.Float()
    final_val_acc = elasticsearch_dsl.Float()
    best_val_acc = elasticsearch_dsl.Float()
    final_val_loss = elasticsearch_dsl.Float()
    best_val_loss = elasticsearch_dsl.Float()
    final_val_sensitivity = elasticsearch_dsl.Float()
    best_val_sensitivity = elasticsearch_dsl.Float()
    final_val_specificity = elasticsearch_dsl.Float()
    best_val_specificity = elasticsearch_dsl.Float()
    final_val_auc = elasticsearch_dsl.Float()
    best_val_auc = elasticsearch_dsl.Float()

    # Params
    batch_size = elasticsearch_dsl.Integer()
    val_split = elasticsearch_dsl.Float()
    seed = elasticsearch_dsl.Integer()

    rotation_range = elasticsearch_dsl.Float()
    width_shift_range = elasticsearch_dsl.Float()
    height_shift_range = elasticsearch_dsl.Float()
    shear_range = elasticsearch_dsl.Float()
    zoom_range = elasticsearch_dsl.Keyword()
    horizontal_flip = elasticsearch_dsl.Boolean()
    vertical_flip = elasticsearch_dsl.Boolean()

    dropout_rate1 = elasticsearch_dsl.Float()
    dropout_rate2 = elasticsearch_dsl.Float()

    data_dir = elasticsearch_dsl.Keyword()
    gcs_url = elasticsearch_dsl.Keyword()

    mip_thickness = elasticsearch_dsl.Integer()
    height_offset = elasticsearch_dsl.Integer()
    pixel_value_range = elasticsearch_dsl.Keyword()

    # We need to keep a list of params for the parser because
    # we can't use traditional approaches to get the class attrs
    params_to_parse = [
        'batch_size', 'val_split', 'seed', 'rotation_range',
        'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range',
        'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2',
        'data_dir', 'gcs_url', 'mip_thickness', 'height_offset',
        'pixel_value_range'
    ]

    class Index:
        name = TRAINING_JOBS
示例#3
0
        def decorator(cls):
            print("setup_schema:" + cls.__name__.lower())
            #
            # create an elastic model from the schema
            #
            # there are two special keys you can use additionally to the
            # standard cerberus syntx:
            # "elastic" :   add any Elastic DSL "Column" __init__ kwargs here, they will be handed raw
            #               to the Column __init__
            # "elastictype" : add a more specific elasticserach_dsl type definition (Text instead of string)
            # the two special keys will be removed from the schema at the end of this
            # decorator.
            #    

            #
            # now set the right elastic types for the doc
            #
            from datetime import datetime
            #from elasticsearch_dsl import DocType, String, Date, Nested, Boolean, Integer\
            #    Float, Byte, Text, analyzer, InnerObjectWrapper, Completion
            import elasticsearch_dsl
            
            for elem in cls.schema.keys():
                #print(elem)
                # the raw Column __init__ parameters dict
                elastic=cls.schema[elem].get("elastic", {})
                if cls.schema[elem]["type"] == "integer":
                    setattr(cls, elem, elasticsearch_dsl.Integer(**elastic))
                elif cls.schema[elem]["type"] == "float":
                    setattr(cls, elem, elasticsearch_dsl.Float(**elastic))
                elif cls.schema[elem]["type"] == "string":
                    setattr(cls, elem, elasticsearch_dsl.Text(**elastic))
                elif cls.schema[elem]["type"] == "bool":
                    setattr(cls, elem, elasticsearch_dsl.Boolean(**elastic))
                elif cls.schema[elem]["type"] == "date":
                    setattr(cls, elem, elasticsearch_dsl.Date(**elastic))
                elif cls.schema[elem]["type"] == "datetime":
                    setattr(cls, elem, elasticsearch_dsl.Date(**elastic))
                elif cls.schema[elem]["type"] == "number":
                    setattr(cls, elem, elasticsearch_dsl.Integer(**elastic))
                elif cls.schema[elem]["type"] == "binary":
                    setattr(cls, elem, elasticsearch_dsl.Byte(**elastic))
                elif cls.schema[elem]["type"] == "list":
                    setattr(cls, elem, elasticsearch_dsl.Keyword(**elastic))
                else:
                    raise Exception("Wrong Datatype in schema") 
                #print("  .. removing the schema (raw) elastic key(s)")
                cls.schema[elem].pop("elastic", None)
                cls.schema[elem].pop("elastictype", None)

            return cls
示例#4
0
文件: base.py 项目: tjanez/resolwe
class BaseDocument(indices.BaseDocument):
    """Base search document."""

    id = dsl.Integer()  # pylint: disable=invalid-name
    slug = Slug()
    version = dsl.Keyword()
    name = Name()
    created = dsl.Date()
    modified = dsl.Date()
    contributor_id = dsl.Integer()
    contributor_name = User()
    # We use a separate field for contributor sorting because we use an entirely
    # different value for it (the display name).
    contributor_sort = dsl.Keyword()
    owner_ids = dsl.Integer(multi=True)
    owner_names = User(multi=True)
示例#5
0
def test_aggregate_data_schema():
    """Verify the behaviour of the ingress.utils.aggregate_data_schema function."""
    class Base:  # noqa
        data_schema = {}

    class Sub1(Base):  # noqa
        data_schema = {'Sub1': True}

    class Sub2(Base):  # noqa
        data_schema = {'Sub2': True}

    class SubSub1(Sub2):  # noqa
        data_schema = {'SubSub1': True}

    class SubSub2(Sub2):  # noqa
        data_schema = {'SubSub2': True}

    aggregated_schema = iu.aggregate_data_schema(Base, include_defaults=True)

    assert aggregated_schema == {
        'Sub1': True,
        'Sub2': True,
        'SubSub1': True,
        'SubSub2': True,
        '_raw': es.Object(dynamic=True),
        'timestamp': es.Date()
    }
示例#6
0
class Node(es.DocType):
    """
    Elastic document describing user
    """

    node_type = es.Keyword()

    objectID = es.Keyword()

    name = es.Text(
        fielddata=True,
        analyzer=autocomplete
    )

    user = es.Object(
        fields={
            'id': es.Keyword(),
            'name': es.Text(
                fielddata=True,
                analyzer=autocomplete)
        }
    )

    description = es.Text()

    is_free = es.Boolean()

    project = es.Object(
        fields={
            'id': es.Keyword(),
            'name': es.Keyword(),
            'url': es.Keyword(),
        }
    )

    media = es.Keyword()

    picture = es.Keyword()

    tags = es.Keyword(multi=True)
    license_notes = es.Text()

    created_at = es.Date()
    updated_at = es.Date()

    class Meta:
        index = 'nodes'
示例#7
0
class EmbeddingIndex(es.Document):
    corpus = es.Keyword()
    number_of_documents = es.Integer()
    is_ready = es.Boolean()
    name = es.Keyword()
    description = es.Text()
    datetime_created = es.Date()
    datetime_finished = es.Date()

    by_unit = es.Keyword()  # Token/Word/Sentence/Text
    algorithm = es.Keyword()
    pooling = es.Keyword()
    meta_parameters = es.Object()

    class Index:
        name = ES_INDEX_EMBEDDING
        using = ES_CLIENT
示例#8
0
def doc_field(type):
    defaults = {
        'date': dsl.Date(),
        'integer': dsl.Long(),
        'boolean': dsl.Boolean(),
        'double': dsl.Double(),
        'float': dsl.Float(),
    }
    return defaults.get(type, RawString)
示例#9
0
class DataDocument(BaseDocument):
    """Document for data search."""

    started = dsl.Date()
    finished = dsl.Date()
    status = dsl.Keyword()
    process = dsl.Integer()
    process_type = ProcessType()
    # Keep backward compatibility.
    type = ProcessType()
    process_name = Name()
    tags = dsl.Keyword(multi=True)

    collection = dsl.Integer()
    entity = dsl.Integer()

    class Index:
        """Meta class for data search document."""

        name = 'data'
示例#10
0
class GroupDocument(esd.DocType):
    date = esd.Date()
    aperture = esd.Float()
    exposure = esd.Float()
    focal_length = esd.Float()
    focal_length_35 = esd.Float()
    iso = esd.Integer()
    model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    lens = esd.String(index='not_analyzed')
    path = esd.String(index='not_analyzed')
    dirname = esd.String(index='not_analyzed')
    basename = esd.String(index='not_analyzed')
示例#11
0
class Dictionary(es.Document):
    corpus = es.Keyword()
    name = es.Keyword()
    description = es.Text()
    datetime = es.Date()
    number_of_documents = es.Integer()

    is_ready = es.Boolean()

    class Index:
        name = ES_INDEX_DICTIONARY_INDEX
        using = ES_CLIENT
示例#12
0
class TopicDocument(es.Document):
    topic_id = es.Keyword()
    topic_weight = es.Float()
    document_es_id = es.Keyword()
    datetime = es.Date()
    document_source = es.Keyword()
    document_corpus = es.Keyword()
    document_num_views = es.Integer()
    document_num_comments = es.Integer()

    class Index:
        name = ES_INDEX_TOPIC_DOCUMENT  # f"{ES_INDEX_TOPIC_DOCUMENT}_{tm}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 3,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        settings_dynamic = {
            "number_of_shards": 2,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        mappings = {
            "properties": {
                "datetime": {
                    "type": "date"
                },
                "document_es_id": {
                    "type": "keyword",
                },
                "document_source": {
                    "type": "keyword",
                },
                "document_corpus": {
                    "type": "keyword",
                },
                "document_num_views": {
                    "type": "long",
                },
                "document_num_comments": {
                    "type": "long",
                },
                "topic_id": {
                    "type": "keyword",
                },
                "topic_weight": {
                    "type": "float"
                }
            }
        }
示例#13
0
class DataDocument(BaseDocument):
    """Document for data search."""

    started = dsl.Date()
    finished = dsl.Date()
    status = dsl.Keyword()
    process = dsl.Integer()
    process_type = ProcessType()
    # Keep backward compatibility.
    type = ProcessType()  # pylint: disable=invalid-name
    process_name = Name()
    tags = dsl.Keyword(multi=True)

    collection = dsl.Integer(multi=True)
    parents = dsl.Integer(multi=True)
    children = dsl.Integer(multi=True)
    entity = dsl.Integer(multi=True)

    class Meta:
        """Meta class for data search document."""

        index = 'data'
示例#14
0
class TestSearchDocument(BaseDocument):
    id = dsl.Integer()
    name = dsl.Text(fielddata=True)
    num = dsl.Integer()
    date = dsl.Date()
    json = dsl.Object()

    field_name = Name()
    field_process_type = ProcessType()
    none_test = dsl.Integer()

    class Index:
        name = "test_search"
class DataDocType(es.DocType):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    description = es.Text()

    class Meta:
        index = 'test'
示例#16
0
class Mappings(es.Document):
    threshold = es.Keyword()
    meta_dtm_name = es.Keyword()
    topic_modelling_first = es.Keyword()
    topic_modelling_second = es.Keyword()
    topic_modelling_first_from = es.Date(),
    topic_modelling_second_to = es.Date(),
    mappings_dict = es.Text()
    scores_list = es.Keyword()
    delta_words_dict = es.Text()
    delta_count_dict = es.Text()

    class Index:
        name = ES_INDEX_MAPPINGS
        using = ES_CLIENT

        settings = {
            "index.mapping.total_fields.limit": 5000,
            "number_of_shards": 1,
            "number_of_replicas": 1,
        }

        mappings = {
            "properties": {
                "threshold": {
                    "type": "keyword",
                },
                "meta_dtm_name": {
                    "type": "keyword",
                },
                "topic_modelling_first_from": {
                    "type": "date"
                },
                "topic_modelling_second_to": {
                    "type": "date"
                }
            },
        }
示例#17
0
class META_DTM(es.Document):
    meta_name = es.Keyword()
    volume_days = es.Float()
    delta_days = es.Float()
    reset_index = es.Boolean()
    from_date = es.Date()
    to_date = es.Date()

    class Index:
        name = ES_INDEX_META_DTM
        using = ES_CLIENT

        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 1,
        }
        mappings = {
            "properties": {
                "meta_name": {
                    "type": "keyword",
                },
                "volume_days": {
                    "type": "float",
                },
                "delta_days": {
                    "type": "float",
                },
                "reset_index": {
                    "type": "boolean",
                },
                "from_date": {
                    "type": "date"
                },
                "to_date": {
                    "type": "date"
                }
            },
        }
示例#18
0
class TestSearchDocument(BaseDocument):
    # pylint: disable=no-member
    id = dsl.Integer()  # pylint: disable=invalid-name
    name = dsl.Text(fielddata=True)
    num = dsl.Integer()
    date = dsl.Date()
    json = dsl.Object()

    field_name = Name()
    field_process_type = ProcessType()
    none_test = dsl.Integer()

    class Meta:
        index = 'test_search'
示例#19
0
class TopicModellingIndex(es.Document):
    corpus = es.Keyword()
    source = es.Keyword()
    number_of_documents = es.Integer()
    is_ready = es.Boolean()
    has_topic_info = es.Boolean()
    name = es.Keyword()
    description = es.Text()
    datetime_created = es.Date()
    datetime_finished = es.Date()

    datetime_from = es.Date()
    datetime_to = es.Date()

    algorithm = es.Keyword()
    number_of_topics = es.Integer()
    hierarchical = es.Boolean()
    meta_parameters = es.Object()

    perplexity = es.Float()
    purity = es.Float()
    contrast = es.Float()
    coherence = es.Float()

    tau_smooth_sparse_theta = es.Float()
    tau_smooth_sparse_phi = es.Float()
    tau_decorrelator_phi = es.Float()
    tau_coherence_phi = es.Float()

    topics = es.Nested(Topic)

    is_actualizable = es.Boolean()

    class Index:
        name = ES_INDEX_TOPIC_MODELLING
        using = ES_CLIENT
示例#20
0
class DataDocType(es.Document):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    location = es.GeoPoint()
    description = es.Text()


    class Index:
        name = 'test'
示例#21
0
class AWSIdNameMapping(dsl.DocType):
    class Meta:
        index = 'awsidnamemapping'
    key = dsl.String(index='not_analyzed')
    rid = dsl.String(index='not_analyzed')
    name = dsl.String(index='not_analyzed')
    date = dsl.Date(format='date_optional_time||epoch_millis')

    @classmethod
    def get_id_name_mapping(cls, key):
        s = cls.search()
        s = s.query('match', key=key).sort('-date')
        res = {}
        for hit in s.scan():
            if hit.rid not in res:
                res[hit.rid] = hit.name
        return res
示例#22
0
class InfoRiegoRecord(dsl.DocType):
    code = dsl.String()
    location = dsl.String()
    date = dsl.Date()
    rain = dsl.Float()
    temperature = dsl.Float()
    rel_humidity = dsl.Float()
    radiation = dsl.Float()
    wind_speed = dsl.Float()
    wind_direction = dsl.Float()

    lat_lon = dsl.GeoPoint(lat_lon=True)
    station_height = dsl.Integer()

    def save(self, **kwargs):
        return super(InfoRiegoRecord, self).save(**kwargs)

    class Meta:
        index = 'inforiego'
示例#23
0
class DocumentLocation(es.Document):
    document_es_id = es.Keyword()
    document_datetime = es.Date()
    document_source = es.Keyword()
    location_name = es.Keyword()
    location_level = es.Keyword()
    location_weight = es.Float()
    location_id = es.Keyword()

    class Index:
        name = ES_INDEX_DOCUMENT_LOCATION  # !!! f"{ES_INDEX_DOCUMENT_EVAL}_{tm}_{criterion.id}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 3,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        mappings = {
            "properties": {
                "document_datetime": {
                    "type": "date"
                },
                "document_es_id": {
                    "type": "keyword"
                },
                "document_source": {
                    "type": "keyword"
                },
                "location_level": {
                    "type": "keyword"
                },
                "location_name": {
                    "type": "keyword"
                },
                "location_weight": {
                    "type": "float"
                },
                "location_id": {
                    "type": "keyword"
                },
            }
        }
示例#24
0
class PhotoDocument(esd.DocType):
    date = esd.Date()
    aperture = esd.Float()
    exposure = esd.Float()
    focal_length = esd.Float()
    focal_length_35 = esd.Float()
    iso = esd.Integer()
    size = esd.Integer()
    model = esd.String(index='not_analyzed') #analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    model_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    lens = esd.String(index='not_analyzed')
    lens_ci = esd.String(analyzer=esd.analyzer('keyword', tokenizer="keyword", filter=['lowercase', ]))
    path = esd.String(index='not_analyzed')
    dirname = esd.String(index='not_analyzed')
    basename = esd.String(index='not_analyzed')

    def extended_dict(self):
        dct = self.to_dict()
        dct["id"] = self.meta.id
        return dct
示例#25
0
def aggregate_data_schema(
    base_class: Type,
    include_defaults: bool = True,
) -> Dict[str, Any]:
    """Iterate through imported plugins and create an ingress mapping to process the data with."""
    mapping: Dict = {}
    for subclass in find_subclasses(base_class):
        subclass_data_schema = None
        try:
            subclass_data_schema = getattr(subclass, 'data_schema')
        except AttributeError:
            continue
        if subclass_data_schema:
            mapping.update(subclass_data_schema)

    if include_defaults:
        mapping['_raw'] = es.Object(dynamic=True)
        mapping['timestamp'] = es.Date()

    return mapping
示例#26
0
class DocumentEval(es.Document):
    value = es.Float()
    document_es_id = es.Keyword()
    document_datetime = es.Date()
    document_source = es.Keyword()
    topic_ids_top = es.Keyword()
    topic_ids_bottom = es.Keyword()

    class Index:
        name = ES_INDEX_DOCUMENT_EVAL  # !!! f"{ES_INDEX_DOCUMENT_EVAL}_{tm}_{criterion.id}{_neg}{_m4a}{_m4a_class}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 3,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        mappings = {
            "properties": {
                "document_datetime": {
                    "type": "date"
                },
                "document_es_id": {
                    "type": "keyword"
                },
                "document_source": {
                    "type": "keyword"
                },
                "value": {
                    "type": "float"
                },
                "topic_ids_top": {
                    "type": "keyword"
                },
                "topic_ids_bottom": {
                    "type": "keyword"
                },
            }
        }
示例#27
0
class Job(es.DocType):
    class Meta:
        index = 'jobs'
        doc_type = 'job-offer'

    french_elision = es.token_filter('french_elision',
                                     type='elision',
                                     articles_case=True,
                                     articles=[
                                         'l', 'm', 't', 'qu', 'n', 's', 'j',
                                         'd', 'c', 'jusqu', 'quoiqu', 'lorsqu',
                                         'puisqu'
                                     ])

    french_stopwords = es.token_filter('french_stopwords',
                                       type='stop',
                                       stopwords='_french_')

    # Do not include this filter if keywords is empty
    french_keywords = es.token_filter('french_keywords',
                                      type='keyword_marker',
                                      keywords=[])

    french_stemmer = es.token_filter('french_stemmer',
                                     type='stemmer',
                                     language='light_french')

    french_analyzer = es.analyzer(
        'french_analyzer',
        tokenizer='standard',
        filter=[
            'lowercase',
            'asciifolding',
            french_elision,
            french_stopwords,
            # french_keywords,
            french_stemmer
        ],
        char_filter=['html_strip'])

    technologies_tokenizer = es.tokenizer('comma_tokenizer',
                                          type='pattern',
                                          pattern=' |,|, ')

    technologies_synonyms_filter = es.token_filter(
        'technologies_synonyms',
        type='synonym',
        synonyms=[
            'c => c_language', 'c++, cpp => cpp_language',
            'c/c++, c/cpp => c_language', 'c/c++, c/cpp => cpp_language',
            'c#, c♯, csharp => csharp_language',
            'f#, f♯, fsharp => fsharp_language', 'c#, c♯, csharp => dotnet',
            'f#, f♯, fsharp => dotnet', '.net => dotnet'
        ])

    technologies_analyzer = es.analyzer(
        'technologies_analyzer',
        tokenizer=technologies_tokenizer,
        filter=['lowercase', 'asciifolding', technologies_synonyms_filter])

    company_name_analyzer = es.analyzer('company_name_analyzer',
                                        tokenizer='standard',
                                        filter=['lowercase', 'asciifolding'])

    id = es.Integer()

    url = es.String(index='no')
    source = es.String(index='not_analyzed')

    title = es.String(
        analyzer=french_analyzer,
        fields={'technologies': es.String(analyzer=technologies_analyzer)})

    description = es.String(
        analyzer=french_analyzer,
        fields={'technologies': es.String(analyzer=technologies_analyzer)})

    company = es.String(analyzer=company_name_analyzer)

    company_url = es.String(index='no')

    address = es.String(analyzer=french_analyzer)
    address_is_valid = es.Boolean()

    tags = es.Nested(doc_class=Tag,
                     properties=dict(tag=es.String(index='not_analyzed'),
                                     weight=es.Integer()))

    publication_datetime = es.Date()
    publication_datetime_is_fake = es.Boolean()

    crawl_datetime = es.Date()

    geolocation = es.GeoPoint()
    geolocation_is_valid = es.Boolean()

    def __init__(self, meta=None, **kwargs):
        super(Job, self).__init__(meta, **kwargs)
        self._doc_type.index = compute_index_name(self.index)

    @property
    def index(self):
        return self._doc_type.index

    @property
    def doc_type(self):
        return self._doc_type.name

    @property
    def published(self):
        return format_date(self.publication_datetime, locale='FR_fr')

    @property
    def published_in_days(self):
        delta = datetime.now() - self.publication_datetime  # TODO: bugfix
        return format_timedelta(delta, granularity='day', locale='en_US')

    @property
    def alltags(self):
        tags = []
        if self.tags:
            for tag in self.tags:
                if tag['tag'] not in condition_tags:
                    tags.append(Tag2(tag['tag'], tag['weight']))
        return tags

    @property
    def condition_tags(self):
        tags = []
        if self.tags:
            for tag in self.tags:
                if tag['tag'] in condition_tags:
                    tag = Tag2(tag['tag'], tag['weight'],
                               Tag2.get_css(tag['tag']))
                    tags.append(tag)
        return tags
示例#28
0
class ResponseDocType(FjordDocType):
    id = es_dsl.Integer()
    happy = es_dsl.Boolean()
    api = es_dsl.Integer()
    url = es_dsl.String(index='not_analyzed')
    url_domain = es_dsl.String(index='not_analyzed')
    has_email = es_dsl.Boolean()
    description = es_dsl.String(analyzer='snowball')
    category = es_dsl.String(index='not_analyzed')
    description_bigrams = es_dsl.String(index='not_analyzed')
    description_terms = es_dsl.String(analyzer='standard')
    user_agent = es_dsl.String(index='not_analyzed')
    product = es_dsl.String(index='not_analyzed')
    channel = es_dsl.String(index='not_analyzed')
    version = es_dsl.String(index='not_analyzed')
    browser = es_dsl.String(index='not_analyzed')
    browser_version = es_dsl.String(index='not_analyzed')
    platform = es_dsl.String(index='not_analyzed')
    locale = es_dsl.String(index='not_analyzed')
    country = es_dsl.String(index='not_analyzed')
    device = es_dsl.String(index='not_analyzed')
    manufacturer = es_dsl.String(index='not_analyzed')
    source = es_dsl.String(index='not_analyzed')
    campaign = es_dsl.String(index='not_analyzed')
    souce_campaign = es_dsl.String(index='not_analyzed')
    organic = es_dsl.Boolean()
    created = es_dsl.Date()

    docs = ResponseDocTypeManager()

    class Meta:
        pass

    def mlt(self):
        """Returns a search with a morelikethis query for docs like this"""
        # Short responses tend to not repeat any words, so then MLT
        # returns nothing. This fixes that by setting min_term_freq to
        # 1. Longer responses tend to repeat important words, so we can
        # set min_term_freq to 2.
        num_words = len(self.description.split(' '))
        if num_words > 40:
            min_term_freq = 2
        else:
            min_term_freq = 1

        s = self.search()
        if self.product:
            s = s.filter('term', product=self.product)
        if self.platform:
            s = s.filter('term', platform=self.platform)

        s = s.query('more_like_this',
                    fields=['description'],
                    docs=[{
                        '_index': get_index_name(),
                        '_type': self._doc_type.name,
                        '_id': self.id
                    }],
                    min_term_freq=min_term_freq,
                    stop_words=list(ANALYSIS_STOPWORDS))
        return s

    @classmethod
    def get_model(cls):
        return Response

    @classmethod
    def public_fields(cls):
        """Fields that can be publicly-visible

        .. Note::

           Do NOT include fields that have PII in them.

        """
        return ('id', 'happy', 'api', 'url_domain', 'has_email', 'description',
                'category', 'description_bigrams', 'user_agent', 'product',
                'version', 'platform', 'locale', 'source', 'campaign',
                'organic', 'created')

    @property
    def truncated_description(self):
        """Shorten feedback for dashboard view."""
        return smart_truncate(self.description, length=500)

    @classmethod
    def extract_doc(cls, resp, with_id=True):
        """Converts a Response to a dict of values

        This can be used with ``ResponseDocType.from_obj()`` to create a
        ``ResponseDocType`` object or it can be used for indexing.

        :arg resp: a Response object
        :arg with_id: whether or not to include the ``_id`` value--include
            it when you're bulk indexing

        :returns: a dict

        """
        doc = {
            'id':
            resp.id,
            'happy':
            resp.happy,
            'api':
            resp.api,
            'url':
            resp.url,
            'url_domain':
            resp.url_domain,
            'has_email':
            bool(resp.user_email),
            'description':
            resp.description,
            'user_agent':
            resp.user_agent,
            'product':
            resp.product,
            'channel':
            resp.channel,
            'version':
            resp.version,
            'browser':
            resp.browser,
            'browser_version':
            resp.browser_version,
            'platform':
            resp.platform,
            'locale':
            resp.locale,
            'country':
            resp.country,
            'device':
            resp.device,
            'manufacturer':
            resp.manufacturer,
            'source':
            resp.source,
            'campaign':
            resp.campaign,
            'source_campaign':
            '::'.join([(resp.source or '--'), (resp.campaign or '--')]),
            'organic': (not resp.campaign),
            'created':
            resp.created
        }

        # We only compute bigrams for english because the analysis
        # uses English stopwords, stemmers, ...
        if resp.locale.startswith(u'en') and resp.description:
            doc['description_bigrams'] = compute_grams(resp.description)
        else:
            doc['description_bigrams'] = []

        if with_id:
            doc['_id'] = doc['id']
        return doc
class AWSDetailedLineitem(dsl.DocType):
    class Meta:
        index = 'awsdetailedlineitem'

    availability_zone = dsl.String(index='not_analyzed')
    cost = dsl.Double()
    un_blended_cost = dsl.Double()
    item_description = dsl.String(index='not_analyzed')
    linked_account_id = dsl.String(index='not_analyzed')
    operation = dsl.String()
    payer_account_id = dsl.String(index='not_analyzed')
    pricing_plan_id = dsl.Long()
    product_name = dsl.String(index='not_analyzed')
    rate = dsl.Double()
    un_blended_rate = dsl.Double()
    rate_id = dsl.Long()
    record_id = dsl.String(index='not_analyzed')
    reserved_instance = dsl.Boolean()
    resource_id = dsl.String(index='not_analyzed')
    subscription_id = dsl.Long()
    tag = dsl.Object(
        properties={
            'key': dsl.String(index='not_analyzed'),
            'value': dsl.String(index='not_analyzed')
        })
    usage_end_date = dsl.Date(format='strict_date_optional_time||epoch_millis')
    usage_quantity = dsl.Double()
    usage_start_date = dsl.Date(
        format='strict_date_optional_time||epoch_millis')
    usage_type = dsl.String(index='not_analyzed')

    @classmethod
    @with_cache(ttl=3600 * 3, worker_refresh=True)
    def keys_has_data(cls, keys, date_from=None, date_to=None):
        date_to = date_to or datetime.utcnow()
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if date_from:
            s = s.filter('range',
                         usage_start_date={
                             'from': date_from.isoformat(),
                             'to': date_to.isoformat()
                         })
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        return res['hits']['total'] > 0

    @classmethod
    @with_cache(is_json=False, ret=lambda x: datetime.strptime(x, "%Y-%m-%d"))
    def get_first_date(cls, keys):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.sort('usage_start_date')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)
        if res['hits']['total'] == 0:
            return
        return res['hits']['hits'][0]['_source']['usage_start_date'].split(
            'T')[0]

    @classmethod
    @with_cache(is_json=False, ret=lambda x: datetime.strptime(x, "%Y-%m-%d"))
    def get_last_date(cls, keys, limit=None):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if limit:
            s = s.filter('range', usage_start_date={'to': limit.isoformat()})
        s = s.sort('-usage_start_date')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=1,
                            request_timeout=60)
        if res['hits']['total'] == 0:
            return
        return res['hits']['hits'][0]['_source']['usage_start_date'].split(
            'T')[0]

    @classmethod
    def get_first_to_now_date(cls, keys):
        def from_date_to_today(d):
            now = datetime.utcnow()
            while d < now:
                yield d
                d += relativedelta(months=1)

        return list(from_date_to_today(cls.get_first_date(keys)))

    @classmethod
    def get_first_to_last_date(cls, keys):
        def from_date_to_last(d):
            last = cls.get_last_date(keys)
            while d < last:
                yield d
                d += relativedelta(months=1)

        return list(from_date_to_last(cls.get_first_date(keys)))

    @classmethod
    @with_cache(6 * 3600)
    def get_available_tags(cls, keys, only_with_data=None, product_name=None):
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        if product_name:
            s = s.filter('term', product_name=product_name)
        s.aggs.bucket('tag_key', 'terms', field='tag.key')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tags = []
        for tag in res['aggregations']['tag_key']['buckets']:
            if tag['key'].startswith('user:'******'key'].split(':')[1]
                if not only_with_data or name in AWSStat.latest_hourly_cpu_usage_by_tag(
                        only_with_data
                )['tags'] or name in AWSStat.latest_daily_cpu_usage_by_tag(
                        only_with_data)['tags']:
                    tags.append(name)
        tags.sort()
        return dict(tags=tags)

    @classmethod
    @with_cache(ttl=6 * 3600)
    def get_cost_by_tag(cls, keys, tag, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', **{'tag.key': 'user:{}'.format(tag)})
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s.aggs.bucket('total_cost', 'sum', field='cost')
        agg = s.aggs.bucket('tag_value',
                            'terms',
                            field='tag.value',
                            size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tags = [{
            'tag_value': tag['key'],
            'cost': tag['cost']['value'],
        } for tag in res['aggregations']['tag_value']['buckets']]
        return dict(tags=tags,
                    total_cost=res['aggregations']['total_cost']['value'])

    @classmethod
    @with_cache(ttl=6 * 3600)
    def get_cost(cls, keys, date_from, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(
            hour=23, minute=59, second=59, microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s.aggs.bucket('total_cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        return dict(total_cost=res['aggregations']['total_cost']['value'])

    @classmethod
    @with_cache()
    def get_monthly_cost_by_tag(cls, keys, tag, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', **{'tag.key': 'user:{}'.format(tag)})
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg.bucket('total_cost', 'sum', field='cost')
        agg = agg.bucket('tag_value',
                         'terms',
                         field='tag.value',
                         size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        months = [{
            'month':
            interval['key_as_string'].split('T')[0][:-3],
            'tags': [{
                'tag_value': tag['key'],
                'cost': tag['cost']['value'],
            } for tag in interval['tag_value']['buckets']],
            'total_cost':
            interval['total_cost']['value'],
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=months)

    @classmethod
    @with_cache()
    def get_cost_by_product(cls,
                            key,
                            date_from=None,
                            date_to=None,
                            without_discount=False,
                            only_discount=False,
                            size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        if without_discount:
            s = s.query(
                'bool',
                filter=[
                    ~dsl.Q('term', item_description='PAR_APN_ProgramFee_2500')
                ])
        if only_discount:
            s = s.filter('term', item_description='PAR_APN_ProgramFee_2500')
        agg = s.aggs.bucket('products',
                            'terms',
                            field='product_name',
                            order={'cost': 'desc'},
                            size=size)
        agg.bucket('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        products = [{
            'product': SHORT_NAMES.get(product['key'], product['key']),
            'cost': product['cost']['value'],
        } for product in res['aggregations']['products']['buckets']]
        return dict(products=products)

    @classmethod
    @with_cache()
    def get_cost_by_region(cls,
                           keys,
                           tagged=False,
                           byaccount=False,
                           date_from=None,
                           date_to=None,
                           size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })

        agg = s.aggs
        if byaccount:
            agg = agg.bucket('accounts', 'terms', field='linked_account_id')
        agg = agg.bucket('intervals',
                         'date_histogram',
                         field='usage_start_date',
                         interval='month',
                         min_doc_count=1)
        agg = agg.bucket('regions',
                         'terms',
                         field='availability_zone',
                         size=size)
        agg.bucket('cost', 'sum', field='cost')
        if tagged:
            agg = agg.bucket('tags', 'terms', field='tag.value')
            agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0)

        return res['aggregations']

    @classmethod
    @with_cache()
    def get_monthly_cost(cls,
                         keys,
                         date_from=None,
                         date_to=None,
                         size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'month': interval['key_as_string'].split('T')[0],
            'total_cost': interval['cost']['value'],
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=res)

    @classmethod
    @with_cache()
    def get_monthly_cost_by_product(cls,
                                    keys,
                                    tagged=False,
                                    date_from=None,
                                    date_to=None,
                                    size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='month',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.bucket('cost', 'sum', field='cost')
        if tagged:
            agg = agg.bucket('tags', 'terms', field='tag.value')
            agg.bucket('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        def tagged_cost(bucket, total):
            total_tag = 0.0
            for tag in bucket:
                total_tag += tag['cost']['value']
                yield (tag['key'], tag['cost']['value'])
            if total != total_tag:
                yield ('untagged', total - total_tag)

        res = [{
            'month':
            interval['key_as_string'].split('T')[0],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
                'tags': [{
                    'name': tag[0],
                    'cost': tag[1],
                } for tag in tagged_cost(product['tags']['buckets'],
                                         product['cost']['value'])],
            } for product in interval['products']['buckets']] if tagged else [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(months=res)

    @classmethod
    @with_cache(ttl=4 * 3600)
    def get_daily_cost_by_product(cls,
                                  keys,
                                  date_from=None,
                                  date_to=None,
                                  size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(
            hour=23, minute=59, second=59, microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='day',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.metric('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'day':
            interval['key_as_string'].split('T')[0],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(days=res)

    @classmethod
    @with_cache(ttl=24 * 3600)
    def get_yearly_cost_by_product(cls,
                                   keys,
                                   date_from=None,
                                   date_to=None,
                                   size=0x7FFFFFFF):
        date_from = date_from or datetime.utcnow().replace(
            month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(month=12,
                                               day=31,
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='year',
                            min_doc_count=1)
        agg = agg.bucket('products', 'terms', field='product_name', size=size)
        agg.metric('cost', 'sum', field='cost')
        s = s.query('bool', filter=[~dsl.Q('term', cost=0)])
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        res = [{
            'year':
            interval['key_as_string'][:4],
            'products': [{
                'product':
                SHORT_NAMES.get(product['key'], product['key']),
                'cost':
                product['cost']['value'],
            } for product in interval['products']['buckets']]
        } for interval in res['aggregations']['intervals']['buckets']]
        return dict(years=res)

    @classmethod
    @with_cache()
    def get_cost_by_resource(cls,
                             keys,
                             date_from=None,
                             date_to=None,
                             search=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        if search:
            s = s.query('wildcard', resource_id='*{}*'.format(search))
        agg = s.aggs.bucket('resources',
                            'terms',
                            field='resource_id',
                            order={'cost': 'desc'},
                            size=0x7FFFFFFF)
        agg.bucket('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        resources = [{
            'resource': resource['key'],
            'cost': resource['cost']['value'],
        } for resource in res['aggregations']['resources']['buckets']]
        return resources

    @classmethod
    def get_monthly_cost_by_resource(cls,
                                     resource_ids,
                                     date_from=None,
                                     date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        if resource_ids:
            s = cls.search()
            s = s.filter('range',
                         usage_start_date={
                             'from': date_from.isoformat(),
                             'to': date_to.isoformat()
                         })
            s = s.filter('terms', resource_id=list(resource_ids))
            agg = s.aggs.bucket('months',
                                'date_histogram',
                                field='usage_start_date',
                                interval='month',
                                min_doc_count=1)
            agg.metric('cost', 'sum', field='cost')
            r = client.search('awsdetailedlineitem',
                              body=s.to_dict(),
                              size=0,
                              request_timeout=60)
            return {
                e['key_as_string']: e['cost']['value']
                for e in r['aggregations']['months']['buckets']
            }
        else:
            return {}

    @classmethod
    @with_cache()
    def get_lambda_usage(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', product_name='AWS Lambda')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('resources',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'avg', field='cost')
        agg = agg.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        agg = agg.bucket('descriptions',
                         'terms',
                         field='item_description',
                         size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        #return res

        def _lambda_usage_regb(buckets, endswith):
            for b in buckets:
                if b['key'].endswith(endswith):
                    return b['quantity']['value']

        usages = [{
            'rid':
            usage['key'],
            'name':
            usage['key'].split(':')[-1],
            'requests':
            _lambda_usage_regb(usage['types']['buckets'], '-Request'),
            'gb_seconds':
            _lambda_usage_regb(usage['types']['buckets'], '-Lambda-GB-Second'),
            'cost':
            usage['cost']['value'],
            'raw_cost':
            lambdapricing.get_raw_cost([
                x['descriptions']['buckets'] for x in usage['types']['buckets']
            ]),
        } for usage in res['aggregations']['resources']['buckets']]
        return usages

    @classmethod
    @with_cache()
    def get_s3_bandwidth_costs(cls, key, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('types',
                            'terms',
                            field='usage_type',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg.metric('gb', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        transfers = [{
            'type': transfer['key'],
            'quantity': transfer['gb']['value'],
            'cost': transfer['cost']['value'],
        } for transfer in res['aggregations']['types']['buckets']]
        return transfers

    @classmethod
    @with_cache()
    def get_ec2_bandwidth_costs(cls, key, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Elastic Compute Cloud')
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        agg = s.aggs.bucket('types',
                            'terms',
                            field='usage_type',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg.metric('gb', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        transfers = [{
            'type': transfer['key'],
            'quantity': transfer['gb']['value'],
            'cost': transfer['cost']['value'],
        } for transfer in res['aggregations']['types']['buckets']]
        return transfers

    @classmethod
    def get_ec2_daily_cost(cls, key):
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Elastic Compute Cloud')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='usage_start_date',
                            interval='day',
                            min_doc_count=1)
        agg.metric('cost', 'sum', field='cost')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for interval in res['aggregations']['intervals']['buckets']:
            yield interval['key_as_string'].split(
                'T')[0], interval['cost']['value']

    @classmethod
    @with_cache()
    def get_elb_usage_a_day(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        gib = Fraction(2**30)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("prefix", resource_id="arn:aws:elasticloadbalancing")
        s = s.sort({"usage_start_date": {"order": "desc"}})
        agg = s.aggs.bucket('rid',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg = agg.bucket('types', 'terms', field='usage_type', size=0x7FFFFFFF)
        agg.metric('quantity', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        elbs = [{
            'rid':
            elb['key'],
            'cost':
            elb['cost']['value'] / (date_to - date_from).days,
            'hours':
            float(
                sum([
                    x['quantity']['value'] for x in elb['types']['buckets']
                    if x['key'].endswith('LoadBalancerUsage')
                ]) / (date_to - date_from).days),
            'bytes':
            float((sum([
                x['quantity']['value']
                for x in elb['types']['buckets'] if x['key'].endswith('Bytes')
            ]) * gib) / (date_to - date_from).days),
        } for elb in res['aggregations']['rid']['buckets']]
        return elbs

    @classmethod
    @with_cache()
    def get_instance_type(cls, keys, date_from=None, date_to=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.extra(_source=[
            'usage_start_date', 'usage_type', 'availability_zone',
            'resource_id'
        ])
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("term", product_name='Amazon Elastic Compute Cloud')
        s = s.query('wildcard', usage_type='*BoxUsage:*')
        s = s.filter('exists', field='resource_id')
        s = s.sort({"usage_start_date": {"order": "desc"}})
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=10000,
                            request_timeout=60)

        def cut_region_name(s):
            return s[:-1] if s[-1].isalpha() else s

        types = []
        refs = {}

        def add_in_types(type, rid):
            ref_tuple = (type['hour'], type['instance'], type['region'])
            if ref_tuple in refs:
                refs[ref_tuple]['rids'].append(rid)
                refs[ref_tuple]['ridCount'] += 1
                return
            type['rids'] = [rid]
            types.append(type)
            refs[ref_tuple] = types[-1]

        for r in res['hits']['hits']:
            elem = {
                'hour':
                r['_source']['usage_start_date'],
                'instance':
                r['_source']['usage_type'].split(':')[1],
                'region':
                cut_region_name(r['_source']['availability_zone'])
                if 'availability_zone' in r['_source'] else 'unknown',
                'ridCount':
                1,
            }
            add_in_types(elem, r['_source']['resource_id'])
        return types

    @classmethod
    @with_cache()
    def get_instance_hour(cls,
                          keys,
                          date_from=None,
                          date_to=None,
                          min_hour=None):
        date_from = date_from or datetime.utcnow().replace(
            day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter("term", product_name='Amazon Elastic Compute Cloud')
        s = s.filter('prefix', resource_id='i-')
        s = s.query('wildcard', usage_type='*BoxUsage*')
        agg = s.aggs.bucket('resource_id',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.bucket('days',
                   'date_histogram',
                   field='usage_start_date',
                   interval='day',
                   min_doc_count=1)
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        instance_list = []
        for instance in res['aggregations']['resource_id']['buckets']:
            tmp_hours = []
            for day in instance['days']['buckets']:
                tmp_hours.append(day['doc_count'])
            avg_hours = sum(tmp_hours) / float(len(tmp_hours))
            if not min_hour or avg_hours >= min_hour:
                instance_list.append(dict(id=instance['key'], hours=avg_hours))
        return sorted(instance_list, key=lambda x: x['hours'], reverse=True)

    @classmethod
    @with_cache()
    def get_s3_buckets_per_tag(cls, keys):
        def _check_if_in_list(dict_list, value, key):
            return next((item for item in dict_list if item[key] == value),
                        None)

        def _parse_tag_keys_results(res):
            bucket_tagged = []
            for bucket_tag_key in res['aggregations']['tag_key']['buckets']:
                buff_tag_key = _check_if_in_list(bucket_tagged,
                                                 bucket_tag_key['key'],
                                                 'tag_key')
                if buff_tag_key is None:
                    buff_tag_key = {
                        "tag_key": bucket_tag_key['key'],
                        "tag_value": []
                    }
                buff_tag_key = _parse_tag_values_results(
                    bucket_tag_key, buff_tag_key)
                bucket_tagged.append(buff_tag_key)
            return bucket_tagged

        def _parse_tag_values_results(bucket_tag_key, buff_tag_key):
            for bucket_tag_value in bucket_tag_key['tag_value']['buckets']:
                buff_tag_value = _check_if_in_list(buff_tag_key['tag_value'],
                                                   bucket_tag_value['key'],
                                                   'tag_value')
                if buff_tag_value is None:
                    buff_tag_value = {
                        "tag_value": bucket_tag_value['key'],
                        "s3_buckets": []
                    }
                buff_tag_value = _parse_buckets_results(
                    buff_tag_value, bucket_tag_value)
                buff_tag_key['tag_value'].append(buff_tag_value)
            return buff_tag_key

        def _parse_buckets_results(buff_tag_value, bucket_tag_value):
            for bucket_resource_id in bucket_tag_value['ressource_id'][
                    'buckets']:
                buff_bucket_resource_id = _check_if_in_list(
                    buff_tag_value['s3_buckets'], bucket_resource_id['key'],
                    'bucket_name')
                if buff_bucket_resource_id is None:
                    buff_bucket_resource_id = {
                        "bucket_name":
                        bucket_resource_id['key'],
                        "account_id":
                        bucket_resource_id['account_id']['buckets'][0]['key']
                    }
                buff_tag_value['s3_buckets'].append(buff_bucket_resource_id)
            return buff_tag_value

        s = cls.search()
        s = s.filter(
            'terms',
            linked_account_id=keys if isinstance(keys, list) else [keys])
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.query('exists', field="tag")
        s = s.query('wildcard', item_description="*storage*")
        agg = s.aggs.bucket('tag_key', 'terms', field="tag.key")
        agg = agg.bucket('tag_value', 'terms', field='tag.value')
        agg.bucket('ressource_id', 'terms',
                   field='resource_id').bucket('account_id',
                                               'terms',
                                               field='linked_account_id')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        '''
        bucket_tagged structure
        [{
            "tag_key" : "KEY", # Unique in list
            "tag_value": [{
                "tag_value": "VALUE", # Unique in list
                "s3_buckets": [{
                    "bucket_name": "BUCKET_NAME",
                    "account_id": "ACCOUND_ID"
                }, {...}]
            }, {...}]
        }, {...}]
        '''

        bucket_tagged = _parse_tag_keys_results(res)
        return bucket_tagged

    @classmethod
    @with_cache()
    def get_s3_bandwidth_info_and_cost_per_name(cls,
                                                key,
                                                bucket_resource_ids,
                                                date_from=None,
                                                date_to=None):
        date_from = date_from or (datetime.utcnow() - relativedelta(
            month=1)).replace(day=1, hour=0, minute=0, second=0, microsecond=0)
        date_to = date_to or date_from.replace(day=calendar.monthrange(
            date_from.year, date_from.month)[1],
                                               hour=23,
                                               minute=59,
                                               second=59,
                                               microsecond=999999)
        s = cls.search()
        s = s.filter('term', linked_account_id=key)
        s = s.filter('term', product_name='Amazon Simple Storage Service')
        s = s.filter('terms',
                     resource_id=bucket_resource_ids if isinstance(
                         bucket_resource_ids, list) else [bucket_resource_ids])
        s = s.filter('range',
                     usage_start_date={
                         'from': date_from.isoformat(),
                         'to': date_to.isoformat()
                     })
        s = s.filter('wildcard', usage_type="*Bytes")
        agg = s.aggs.bucket('bucket_name',
                            'terms',
                            field='resource_id',
                            size=0x7FFFFFFF)
        agg.metric('cost', 'sum', field='cost')
        agg = agg.bucket('transfer_type', 'terms', field='usage_type')
        agg.metric('data', 'sum', field='usage_quantity')
        res = client.search(index='awsdetailedlineitem',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)
        data = [{
            "bucket_name":
            bucket['key'],
            "cost":
            bucket['cost']['value'],
            "transfer_stats": [{
                "type": transfer_stat['key'],
                "data": transfer_stat['data']['value']
            } for transfer_stat in bucket['transfer_type']['buckets']]
        } for bucket in res['aggregations']['bucket_name']['buckets']]
        return data
示例#30
0
class AWSMetric(dsl.DocType):
    class Meta:
        index = 'awsmetric'

    key = dsl.String(index='not_analyzed')
    resource = dsl.String(index='not_analyzed')
    metric = dsl.String(index='not_analyzed')
    time = dsl.Date(format='date_optional_time||epoch_millis')
    period = dsl.Integer()
    value = dsl.Double()

    @classmethod
    def underutilized_resources(cls, keys, timespan=timedelta(days=30)):
        keys = any_key_to_string_array(keys)
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')
        s = s.filter('terms', key=keys)
        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('percentiles',
                   'percentile_ranks',
                   field='value',
                   values=[20, 50])
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        resources = []
        for resource in res['aggregations']['resources']['buckets']:
            if resource['percentiles']['values']['20.0'] == 100:
                res_region, res_id = resource['key'].split('/')
                resources.append(
                    dict(type='EC2 Instance',
                         id=res_id,
                         region=res_region,
                         underutilized=['CPU usage under 20%']))

        return dict(resources=resources)

    @classmethod
    def hourly_cpu_usage(cls, keys, resources=None):
        s = cls.search()
        if isinstance(keys, basestring):
            keys = [keys]
        elif not isinstance(keys, list):
            keys = list(keys)
        assert all(isinstance(key, basestring) for key in keys)
        s = s.filter('terms', key=keys)
        if resources:
            s = s.filter('terms', resource=resources)
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='time',
                            interval='hour',
                            min_doc_count=1)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tmp_hours = defaultdict(list)
        for interval in res['aggregations']['intervals']['buckets']:
            interval_hour = interval['key_as_string'].split('T')[1].split(
                ':')[0]
            tmp_hours[interval_hour].append(interval['utilization']['value'])
        hours = OrderedDict(
            zip(["{:02d}".format(x) for x in range(0, 24)],
                itertools.repeat(0)))
        for hour, values in tmp_hours.iteritems():
            hours[hour] = sum(values) / len(values)
        if not tmp_hours:
            return None
        return [
            dict(hour=hour, cpu=float(cpu)) for hour, cpu in hours.iteritems()
        ]

    @classmethod
    def days_of_the_week_cpu_usage(cls, keys, resources=None):
        s = cls.search()
        if isinstance(keys, basestring):
            keys = [keys]
        elif not isinstance(keys, list):
            keys = list(keys)
        assert all(isinstance(key, basestring) for key in keys)
        s = s.filter('terms', key=keys)
        if resources:
            s = s.filter('terms', resource=resources)
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='time',
                            interval='day',
                            min_doc_count=1)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        tmp_days_of_the_week = defaultdict(list)
        for interval in res['aggregations']['intervals']['buckets']:
            weekday = datetime.strptime(
                interval['key_as_string'].split('T')[0],
                '%Y-%m-%d').date().weekday()
            tmp_days_of_the_week[weekday].append(
                interval['utilization']['value'])
        days = OrderedDict(zip(range(0, 7), itertools.repeat(0)))
        for weekday, values in tmp_days_of_the_week.iteritems():
            days[weekday] = sum(values) / len(values)
        if not tmp_days_of_the_week:
            return None
        return [
            dict(day=calendar.day_name[weekday], cpu=float(cpu))
            for weekday, cpu in days.iteritems()
        ]

    @classmethod
    def daily_cpu_utilization(cls, key):
        s = cls.search()
        s = s.filter('term', key=key)
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')

        agg = s.aggs.bucket('intervals',
                            'date_histogram',
                            field='time',
                            interval='day',
                            min_doc_count=1)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for interval in res['aggregations']['intervals']['buckets']:
            yield interval['key_as_string'].split(
                'T')[0], interval['utilization']['value']

    @classmethod
    def get_cpu_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:CPUUtilization:Maximum')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_instance_read_iops_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:DiskReadOps:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_instance_write_iops_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:DiskWriteOps:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_instance_read_bytes_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:DiskReadBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_instance_write_bytes_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:DiskWriteBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_volume_read_iops_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:VolumeReadOps:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_volume_write_iops_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:VolumeWriteOps:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_volume_read_bytes_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:VolumeReadBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_volume_write_bytes_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EBS:VolumeWriteBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_network_in_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:NetworkIn:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_network_out_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/EC2:NetworkOut:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']

    @classmethod
    def get_s3_space_usage(cls, key, timespan=timedelta(days=30)):
        s = cls.search()
        s = s.filter('range',
                     time={'gt': (datetime.utcnow() - timespan).isoformat()})
        s = s.filter('term', metric='AWS/S3:BucketSizeBytes:Average')
        s = s.filter('term', key=key)

        agg = s.aggs.bucket('resources', 'terms', field='resource', size=300)
        agg.metric('utilization', 'avg', field='value')
        res = client.search(index='awsmetric',
                            body=s.to_dict(),
                            size=0,
                            request_timeout=60)

        for resource in res['aggregations']['resources']['buckets']:
            yield resource['key'], resource['utilization']['value']