Пример #1
0
def test_run_field_rules():
    config = {
        "rules": {
            "field_level_rules": [
                {
                    "name": "DescriptionRule",
                    "run": True
                },
                {
                    "name": "YesNoNameRule",
                    "run": True
                },
            ]
        },
    }
    linter = LookMlLinter(config)
    lookml = LookML("test/minimal_multiline.view.lkml")
    v = lookml.views()[0]

    print("v", v)

    out = linter.run_field_rules(v, "dimension", "dimensions", "xxx", [])
    assert out[0] == {
        "file": "xxx",
        "rule": "DescriptionRule",
        "passed": 1,
        "type": "dimension",
        "fieldname": "city_code",
    }
Пример #2
0
def test_run_field_rules():
    config = {
        "rules": {
            "field_level_rules": [
                {
                    "name": "DescriptionRule",
                    "run": True
                },
                {
                    "name": "YesNoNameRule",
                    "run": True
                },
            ]
        },
    }
    linter = LookMlLinter(config)
    lookml = LookML("test/minimal_multiline.view.lkml")
    v = lookml.views()[0]

    print("v", v)

    out = linter.run_field_rules(v, 'dimension', 'dimensions', "xxx", [])
    assert out[0] == {
        'file': 'xxx',
        'rule': 'DescriptionRule',
        'passed': 1,
        'type': 'dimension',
        'fieldname': 'city_code'
    }
Пример #3
0
    def run(self):
        '''
            run the set of file and field-level rules against all files in the file glob

            Returns:
                nothing. Saves two CSV files, specified in the config
        '''

        file_out = []
        field_out = []

        timestr = datetime.datetime.now().isoformat()

        no_orphans_rule = None
        if "NoOrphansRule" in self.other_rules_to_run():
            no_orphans_rule = NoOrphansRule(self.config)

        globstrings = self.config['infile_globs']
        for globstring in globstrings:
            filepaths = glob.glob(globstring)
            for filepath in filepaths:

                simple_filepath = os.path.basename(filepath)

                logging.info("Processing %s", filepath)

                lookml = LookML(filepath)

                file_out = self.run_file_rules(lookml, simple_filepath,
                                               file_out)

                if lookml.has_views():
                    v = lookml.views()[0]
                    field_out = self.run_field_rules(v, 'dimension',
                                                     'dimensions',
                                                     simple_filepath,
                                                     field_out)
                    field_out = self.run_field_rules(v, 'dimension_group',
                                                     'dimension_groups',
                                                     simple_filepath,
                                                     field_out)
                    field_out = self.run_field_rules(v, 'measure', 'measures',
                                                     simple_filepath,
                                                     field_out)

                if no_orphans_rule:
                    no_orphans_rule.process_lookml(lookml)

            #add some metadata for each of the records we created above
            [
                f.update({'glob': globstring}) for f in field_out + file_out
                if not 'glob' in f
            ]

        # for this rule, we can only assess who failed after all files are processed
        if no_orphans_rule:
            file_out = no_orphans_rule.finish_up(file_out)

        if 'simple_biquery' in self.config['output']:
            simple_bq_writer = SimpleBqWriter()
        if 'bigquery' in self.config['output']:
            bq_writer = BqWriter()

        if len(file_out) > 0:
            df = pd.DataFrame(file_out)
            df['time'] = timestr
            df['repo'] = self.config['git']['url']

            if 'csv' in self.config['output']:
                self.write_file_csv(df)

            if 'simple_biquery' in self.config['output']:
                simple_bq_writer.upload(df, self.config,
                                        'file_destination_table')

            if 'bigquery' in self.config['output']:
                bq_writer.upload(df, self.config, 'file_destination_table')

        if len(field_out) > 0:
            df = pd.DataFrame(field_out)
            df['time'] = timestr
            df['repo'] = self.config['git']['url']

            if 'csv' in self.config['output']:
                self.write_field_csv(df)

            if 'simple_biquery' in self.config['output']:
                simple_bq_writer.upload(df, self.config,
                                        'field_destination_table')

            if 'bigquery' in self.config['output']:
                bq_writer.upload(df, self.config, 'field_destination_table')

        return file_out, field_out