class ESAlchemy(object): def __init__(self, index_name, config): self.index_name = index_name self.config = config self.es = HQESQuery(index_name) def __getitem__(self, sliced_or_int): hits = self.es[sliced_or_int] hits = [self._hit_to_row(hit) for hit in hits] if isinstance(sliced_or_int, (int, long)): return hits[0] return hits def _hit_to_row(self, hit): def mapping_to_datatype(column, value): if not value: return value datatype = column.datatype if datatype == 'datetime': try: return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S") except ValueError: return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f") elif datatype == 'date': return datetime.datetime.strptime(value, "%Y-%m-%d") return value return ESAlchemyRow(self.column_ordering, { col.database_column_name: mapping_to_datatype(col, hit[col.database_column_name]) for col in self.columns }) @property def columns(self): return self.config.indicators.get_columns() @property @memoized def column_ordering(self): return [col.database_column_name for col in self.columns] @property def column_descriptions(self): return [{"name": col} for col in self.column_ordering] def count(self): return self.es.count() def distinct_values(self, column, size): # missing aggregation can be removed on upgrade to ES 2.0 missing_agg_name = column + '_missing' query = self.es.terms_aggregation(column, column, size=size, sort_field="_term").size(0) query = query.aggregation(MissingAggregation(missing_agg_name, column)) results = query.run() missing_result = getattr(results.aggregations, missing_agg_name).result result = getattr(results.aggregations, column).keys if missing_result['doc_count'] > 0: result.append(None) return result
class ESAlchemy(object): def __init__(self, index_name, config): self.index_name = index_name self.config = config self.es = HQESQuery(index_name) def __getitem__(self, sliced_or_int): hits = self.es[sliced_or_int] hits = [self._hit_to_row(hit) for hit in hits] if isinstance(sliced_or_int, six.integer_types): return hits[0] return hits def _hit_to_row(self, hit): def mapping_to_datatype(column, value): if not value: return value datatype = column.datatype if datatype == 'datetime': try: return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S") except ValueError: return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f") elif datatype == 'date': return datetime.datetime.strptime(value, "%Y-%m-%d") return value return ESAlchemyRow(self.column_ordering, { col.database_column_name: mapping_to_datatype(col, hit[col.database_column_name]) for col in self.columns }) @property def columns(self): return self.config.indicators.get_columns() @property @memoized def column_ordering(self): return [col.database_column_name for col in self.columns] @property def column_descriptions(self): return [{"name": col} for col in self.column_ordering] def count(self): return self.es.count() def distinct_values(self, column, size): # missing aggregation can be removed on upgrade to ES 2.0 missing_agg_name = column + '_missing' query = self.es.terms_aggregation(column, column, size=size, sort_field="_term").size(0) query = query.aggregation(MissingAggregation(missing_agg_name, column)) results = query.run() missing_result = getattr(results.aggregations, missing_agg_name).result result = getattr(results.aggregations, column).keys if missing_result['doc_count'] > 0: result.append(None) return result
class ESAlchemy(object): def __init__(self, index_name, config): self.index_name = index_name self.config = config self.es = HQESQuery(index_name) def __getitem__(self, sliced_or_int): hits = self.es[sliced_or_int] hits = [self._hit_to_row(hit) for hit in hits] if isinstance(sliced_or_int, (int, long)): return hits[0] return hits def _hit_to_row(self, hit): def mapping_to_datatype(column, value): if not value: return value datatype = column.datatype if datatype == 'datetime': try: return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S") except ValueError: return datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f") elif datatype == 'date': return datetime.datetime.strptime(value, "%Y-%m-%d") return value return ESAlchemyRow( self.column_ordering, { col.database_column_name: mapping_to_datatype( col, hit[col.database_column_name]) for col in self.columns }) @property def columns(self): return self.config.indicators.get_columns() @property @memoized def column_ordering(self): return [col.database_column_name for col in self.columns] @property def column_descriptions(self): return [{"name": col} for col in self.column_ordering] def count(self): return self.es.count() def distinct_values(self, column, size): query = self.es.terms_aggregation(column, column, size=size).size(0) results = query.run() return getattr(results.aggregations, column).keys