Пример #1
0
    def _analyze_explores(self,
                          model=None,
                          explore=None,
                          sortkey=None,
                          limit=None,
                          min_queries=0,
                          timeframe=90):
        explores = fetcher.get_explores(self,
                                        model=model,
                                        explore=explore,
                                        verbose=1)
        explores_usage = {}
        info = []
        for e in explores:
            # in case explore does not exist (bug - #32748)
            if e is None:
                pass
            else:
                _used_fields = fetcher.get_used_explore_fields(
                    self, e['model_name'], e['scopes'], timeframe, min_queries)
                used_fields = list(_used_fields.keys())
                exposed_fields = fetcher.get_explore_fields(self,
                                                            explore=e,
                                                            scoped_names=1)
                unused_fields = set(exposed_fields) - set(used_fields)
                field_count = len(exposed_fields)
                query_count = fetcher.get_used_explores(self,
                                                        model=e['model_name'],
                                                        explore=e['name'])

                all_joins = set(e['scopes'])
                all_joins.remove(e['name'])
                used_joins = set([i.split('.')[2] for i in used_fields])
                unused_joins = len(list(all_joins - used_joins))

                has_description = 'Yes' if e['description'] else 'No'

                if query_count.get(e['name']):
                    query_count = query_count[e['name']]
                else:
                    query_count = 0
                info.append({
                    'model': e['model_name'],
                    'explore': e['name'],
                    'is_hidden': e['hidden'],
                    'has_description': has_description,
                    'join_count': len(all_joins),
                    'unused_joins': unused_joins,
                    'field_count': field_count,
                    'unused_fields': len(unused_fields),
                    'query_count': query_count
                })

        if not info:
            self.analyze_logger.error('No matching explores found')
            raise Exception('No matching explores found')
        valid_values = list(info[0].keys())
        info = dc.sort(info, valid_values, sortkey)
        info = dc.limit(info, limit=limit)
        return info
Пример #2
0
    def _analyze_fields(self,
                        model=None,
                        explore=None,
                        sortkey=None,
                        limit=None,
                        min_queries=0,
                        timeframe=90):

        print('Retrieving explores for fields...')
        explores = fetcher.get_explores(self,
                                        model=model,
                                        explore=explore,
                                        verbose=1)
        info = []
        progress = 1
        for e in explores:
            print('Analyzing {}.{}, {} of {} explores'.format(
                e['model_name'], e['name'], progress, len(explores)))
            if e is None:
                pass
            else:
                _used_fields = fetcher.get_used_explore_fields(
                    self, e['model_name'], e['scopes'], timeframe, min_queries)
                used_fields = list(_used_fields.keys())
                exposed_fields = fetcher.get_explore_fields(self,
                                                            explore=e,
                                                            scoped_names=1)
                unused_fields = set(exposed_fields) - set(used_fields)
                field_count = len(exposed_fields)

                missing_description = 0
                dimensions = 0
                measures = 0
                for dim in e['fields']['dimensions']:
                    dimensions += 1
                    if not dim['description']:
                        missing_description += 1
                for measure in e['fields']['measures']:
                    measures += 1
                    if not measure['description']:
                        missing_description += 1

                info.append({
                    'model': e['model_name'],
                    'explore': e['name'],
                    'field_count': field_count,
                    'unused_fields': len(unused_fields),
                    'missing_description': missing_description,
                    'dimensions': dimensions,
                    'measures': measures
                })
                progress += 1
        if not info:
            self.analyze_logger.error('No matching explores found')
            raise Exception('No matching explores found')
        valid_values = list(info[0].keys())
        info = styler.sort(info, valid_values, sortkey)
        info = styler.limit(info, limit=limit)
        return info
Пример #3
0
    def _vacuum_explores(self, model=None, explore=None, timeframe=90,
                         min_queries=0):
        explores = fetcher.get_explores(self,
                                        model=model,
                                        explore=explore,
                                        verbose=1)
        info = []
        for e in explores:
            # get field usage from i__looker using all the views inside explore
            # returns fields in the form of model.explore.view.field
            _used_fields = fetcher.get_used_explore_fields(self,
                                                           e['model_name'],
                                                           e['scopes'],
                                                           timeframe,
                                                           min_queries)
            used_fields = list(_used_fields.keys())
            # get field picker fields in the form of model.explore.view.field
            exposed_fields = fetcher.get_explore_fields(self,
                                                        explore=e,
                                                        scoped_names=1)
            _unused_fields = set(exposed_fields) - set(used_fields)

            # remove scoping
            all_joins = set(e['scopes'])
            all_joins.remove(e['name'])
            used_joins = set([i.split('.')[2] for i in used_fields])

            _unused_joins = list(all_joins - used_joins)
            unused_joins = ('\n').join(_unused_joins) or 'N/A'

            # only keep fields that belong to used joins (unused joins fields
            # don't matter) if there's at least one used join (including the
            # base view). else don't match anything
            temp = list(used_joins)
            temp.append(e['name'])
            pattern = ('|').join(temp) or 'ALL'
            unused_fields = []
            if pattern != 'ALL':
                for field in _unused_fields:
                    f = re.match(r'^({0}).*'.format(pattern),
                                 '.'.join(field.split('.')[2:]))
                    if f is not None:
                        unused_fields.append(f.group(0))
                unused_fields = sorted(unused_fields)
                unused_fields = ('\n').join(unused_fields)
            else:
                unused_fields = color.format(pattern, 'fail', 'color')

            info.append({
                        'model': e['model_name'],
                        'explore': e['name'],
                        'unused_joins': unused_joins,
                        'unused_fields': unused_fields
                        })
        if not info:
            self.vacuum_logger.error('No matching explores found')
            raise Exception('No matching explores found')
        return info
Пример #4
0
def test_get_explore_fields_gets_fields(
    fc: fetcher.Fetcher, test_model, test_explores_stats
):
    """fetcher.get_explore_fields() should return an explores fields."""
    test_explore = test_explores_stats[0]
    explore = fc.get_explores(model=test_model["name"], explore=test_explore["name"])
    assert isinstance(explore, list)
    explore = explore[0]
    assert isinstance(explore, models.LookmlModelExplore)
    assert explore.model_name == test_model["name"]
    assert explore.name == test_explore["name"]
    fields = fc.get_explore_fields(explore)
    assert isinstance(fields, list)
    assert fields == test_explore["all_fields"]
Пример #5
0
def test_get_explore_fields_gets_fields_for_dimension_or_measure_only_explores(
    fc: fetcher.Fetcher, test_model, test_dimensions_or_measures_only_explores
):
    """fetcher.get_explore_fields() should return when an explore has only dimensions
    or only measures.
    """
    expected = test_dimensions_or_measures_only_explores[0]
    explore = fc.get_explores(model=test_model["name"], explore=expected["name"])
    assert isinstance(explore, list)
    actual = explore[0]
    assert actual.name == expected["name"]
    assert not (actual.fields.dimensions and actual.fields.measures)
    expected_fields = [f["name"] for f in expected["fields"]]
    actual_fields = fc.get_explore_fields(actual)
    assert actual_fields == expected_fields
Пример #6
0
    def _vacuum_fields(self,
                       model=None,
                       explore=None,
                       timeframe=90,
                       min_queries=0):
        explores = fetcher.get_explores(self,
                                        model=model,
                                        explore=explore,
                                        verbose=1)
        info = []
        master_exposed_fields = set()
        master_used_fields = set()
        distinct_views = set()
        progress = 1
        for e in explores:
            print('Analyzing {}.{}, {} of {} explores'.format(
                e['model_name'], e['name'], progress, len(explores)))
            # get field usage from i__looker using all the views inside explore
            # returns fields in the form of model.explore.view.field
            _used_fields = fetcher.get_used_explore_fields(
                self, e['model_name'], e['scopes'], timeframe, min_queries)
            used_fields = list(_used_fields.keys())

            # get field picker fields in the form of model.explore.view.field
            exposed_fields = fetcher.get_explore_fields(self,
                                                        explore=e,
                                                        scoped_names=1)
            _unused_fields = set(exposed_fields) - set(used_fields)

            # Get fields used in joins
            for join in e['joins']:
                if join['sql_on'] is not None:
                    f = re.findall('\{(.*?)\}', join['sql_on'])
                    for field in f:
                        master_used_fields.add(field)
                        distinct_views.add(field.split('.')[0])
            #Get used fields
            for field in used_fields:
                field = '.'.join(field.split('.')[2:])
                master_used_fields.add(field)
                distinct_views.add(field.split('.')[0])
            #Get all fields
            for field in exposed_fields:
                #strip out the model and explore
                field = '.'.join(field.split('.')[2:])
                master_exposed_fields.add(field)
                distinct_views.add(field.split('.')[0])
            progress += 1

        # Fields to ignore if they contain the following:
        ignore_list = [
            'week', 'quarter', 'year', 'month', 'raw', 'date', 'time'
        ]

        # Get all unused fields and then organize them by their view
        master_unused_fields = master_exposed_fields - master_used_fields
        for view in sorted(list(distinct_views)):
            if any(char.isdigit() for char in view):
                continue
            unused_fields = []
            for field in master_unused_fields:
                # always keep id fields and basic count fields
                field_name = field.split('.')[1]
                if field_name == 'id' or field_name == 'count' or 'id' in field_name.split(
                        '_'):
                    continue
                elif any(ignore in field for ignore in ignore_list):
                    continue
                if field.split('.')[0] == view:
                    unused_fields.append(field)
            unused_fields = ('\n').join(unused_fields)
            if unused_fields is not None:
                info.append({'view': view, 'unused_fields': unused_fields})
        if not info:
            self.vacuum_logger.error('No matching explores found')
            raise Exception('No matching explores found')
        return info