def _analyze_explores(self, model=None, explore=None, sortkey=None, limit=None, min_queries=0, timeframe=90): explores = fetcher.get_explores(self, model=model, explore=explore, verbose=1) explores_usage = {} info = [] for e in explores: # in case explore does not exist (bug - #32748) if e is None: pass else: _used_fields = fetcher.get_used_explore_fields( self, e['model_name'], e['scopes'], timeframe, min_queries) used_fields = list(_used_fields.keys()) exposed_fields = fetcher.get_explore_fields(self, explore=e, scoped_names=1) unused_fields = set(exposed_fields) - set(used_fields) field_count = len(exposed_fields) query_count = fetcher.get_used_explores(self, model=e['model_name'], explore=e['name']) all_joins = set(e['scopes']) all_joins.remove(e['name']) used_joins = set([i.split('.')[2] for i in used_fields]) unused_joins = len(list(all_joins - used_joins)) has_description = 'Yes' if e['description'] else 'No' if query_count.get(e['name']): query_count = query_count[e['name']] else: query_count = 0 info.append({ 'model': e['model_name'], 'explore': e['name'], 'is_hidden': e['hidden'], 'has_description': has_description, 'join_count': len(all_joins), 'unused_joins': unused_joins, 'field_count': field_count, 'unused_fields': len(unused_fields), 'query_count': query_count }) if not info: self.analyze_logger.error('No matching explores found') raise Exception('No matching explores found') valid_values = list(info[0].keys()) info = dc.sort(info, valid_values, sortkey) info = dc.limit(info, limit=limit) return info
def _analyze_fields(self, model=None, explore=None, sortkey=None, limit=None, min_queries=0, timeframe=90): print('Retrieving explores for fields...') explores = fetcher.get_explores(self, model=model, explore=explore, verbose=1) info = [] progress = 1 for e in explores: print('Analyzing {}.{}, {} of {} explores'.format( e['model_name'], e['name'], progress, len(explores))) if e is None: pass else: _used_fields = fetcher.get_used_explore_fields( self, e['model_name'], e['scopes'], timeframe, min_queries) used_fields = list(_used_fields.keys()) exposed_fields = fetcher.get_explore_fields(self, explore=e, scoped_names=1) unused_fields = set(exposed_fields) - set(used_fields) field_count = len(exposed_fields) missing_description = 0 dimensions = 0 measures = 0 for dim in e['fields']['dimensions']: dimensions += 1 if not dim['description']: missing_description += 1 for measure in e['fields']['measures']: measures += 1 if not measure['description']: missing_description += 1 info.append({ 'model': e['model_name'], 'explore': e['name'], 'field_count': field_count, 'unused_fields': len(unused_fields), 'missing_description': missing_description, 'dimensions': dimensions, 'measures': measures }) progress += 1 if not info: self.analyze_logger.error('No matching explores found') raise Exception('No matching explores found') valid_values = list(info[0].keys()) info = styler.sort(info, valid_values, sortkey) info = styler.limit(info, limit=limit) return info
def _vacuum_explores(self, model=None, explore=None, timeframe=90, min_queries=0): explores = fetcher.get_explores(self, model=model, explore=explore, verbose=1) info = [] for e in explores: # get field usage from i__looker using all the views inside explore # returns fields in the form of model.explore.view.field _used_fields = fetcher.get_used_explore_fields(self, e['model_name'], e['scopes'], timeframe, min_queries) used_fields = list(_used_fields.keys()) # get field picker fields in the form of model.explore.view.field exposed_fields = fetcher.get_explore_fields(self, explore=e, scoped_names=1) _unused_fields = set(exposed_fields) - set(used_fields) # remove scoping all_joins = set(e['scopes']) all_joins.remove(e['name']) used_joins = set([i.split('.')[2] for i in used_fields]) _unused_joins = list(all_joins - used_joins) unused_joins = ('\n').join(_unused_joins) or 'N/A' # only keep fields that belong to used joins (unused joins fields # don't matter) if there's at least one used join (including the # base view). else don't match anything temp = list(used_joins) temp.append(e['name']) pattern = ('|').join(temp) or 'ALL' unused_fields = [] if pattern != 'ALL': for field in _unused_fields: f = re.match(r'^({0}).*'.format(pattern), '.'.join(field.split('.')[2:])) if f is not None: unused_fields.append(f.group(0)) unused_fields = sorted(unused_fields) unused_fields = ('\n').join(unused_fields) else: unused_fields = color.format(pattern, 'fail', 'color') info.append({ 'model': e['model_name'], 'explore': e['name'], 'unused_joins': unused_joins, 'unused_fields': unused_fields }) if not info: self.vacuum_logger.error('No matching explores found') raise Exception('No matching explores found') return info
def _vacuum_fields(self, model=None, explore=None, timeframe=90, min_queries=0): explores = fetcher.get_explores(self, model=model, explore=explore, verbose=1) info = [] master_exposed_fields = set() master_used_fields = set() distinct_views = set() progress = 1 for e in explores: print('Analyzing {}.{}, {} of {} explores'.format( e['model_name'], e['name'], progress, len(explores))) # get field usage from i__looker using all the views inside explore # returns fields in the form of model.explore.view.field _used_fields = fetcher.get_used_explore_fields( self, e['model_name'], e['scopes'], timeframe, min_queries) used_fields = list(_used_fields.keys()) # get field picker fields in the form of model.explore.view.field exposed_fields = fetcher.get_explore_fields(self, explore=e, scoped_names=1) _unused_fields = set(exposed_fields) - set(used_fields) # Get fields used in joins for join in e['joins']: if join['sql_on'] is not None: f = re.findall('\{(.*?)\}', join['sql_on']) for field in f: master_used_fields.add(field) distinct_views.add(field.split('.')[0]) #Get used fields for field in used_fields: field = '.'.join(field.split('.')[2:]) master_used_fields.add(field) distinct_views.add(field.split('.')[0]) #Get all fields for field in exposed_fields: #strip out the model and explore field = '.'.join(field.split('.')[2:]) master_exposed_fields.add(field) distinct_views.add(field.split('.')[0]) progress += 1 # Fields to ignore if they contain the following: ignore_list = [ 'week', 'quarter', 'year', 'month', 'raw', 'date', 'time' ] # Get all unused fields and then organize them by their view master_unused_fields = master_exposed_fields - master_used_fields for view in sorted(list(distinct_views)): if any(char.isdigit() for char in view): continue unused_fields = [] for field in master_unused_fields: # always keep id fields and basic count fields field_name = field.split('.')[1] if field_name == 'id' or field_name == 'count' or 'id' in field_name.split( '_'): continue elif any(ignore in field for ignore in ignore_list): continue if field.split('.')[0] == view: unused_fields.append(field) unused_fields = ('\n').join(unused_fields) if unused_fields is not None: info.append({'view': view, 'unused_fields': unused_fields}) if not info: self.vacuum_logger.error('No matching explores found') raise Exception('No matching explores found') return info