Пример #1
0
 def test_run(self):
     with prepare_experiment(self.config) as experiment:
         experiment.run()
         table_should_have_data(experiment.labels_table_name, experiment.db_engine)
Пример #2
0
    def _run(self, cohort_config):
        mutex_keys = set(['dense_states', 'entities_table', 'query'])
        available_keys = mutex_keys | set(['name'])
        used_keys = set(cohort_config.keys())
        bad_keys = used_keys - available_keys
        if bad_keys:
            raise ValueError(
                dedent('''
                Section: cohort_config -
                The following given keys: '{}'
                are invalid. Available keys are: '{}'
                '''.format(bad_keys, available_keys)))
        used_mutex_keys = mutex_keys & used_keys
        if len(used_mutex_keys) > 1:
            raise ValueError(
                dedent('''
                Section: cohort_config -
                Only one of the following keys can be sent: '{}'
                Found '{}'
                '''.format(mutex_keys, used_mutex_keys)))

        if 'dense_states' in cohort_config:
            state_config = cohort_config['dense_states']
            if 'table_name' not in state_config:
                raise ValueError(
                    dedent('''
                Section: cohort_config -
                If 'dense_states' is used as cohort config,
                a table name must be present'''))
            dense_state_table = state_config['table_name']
            table_should_have_data(dense_state_table, self.db_engine)
            column_should_be_intlike(dense_state_table, 'entity_id',
                                     self.db_engine)
            column_should_be_stringlike(dense_state_table, 'state',
                                        self.db_engine)
            column_should_be_timelike(dense_state_table, 'start_time',
                                      self.db_engine)
            column_should_be_timelike(dense_state_table, 'end_time',
                                      self.db_engine)
            if 'state_filters' not in state_config or len(
                    state_config['state_filters']) < 1:
                raise ValueError(
                    dedent('''
                Section: cohort_config -
                If 'dense_states' is used as cohort config,
                at least one state filter must be present'''))
        elif 'entities_table' in cohort_config:
            entities_table = cohort_config['entities_table']
            table_should_have_data(entities_table, self.db_engine)
            column_should_be_intlike(entities_table, 'entity_id',
                                     self.db_engine)
        elif 'query' in cohort_config:
            query = cohort_config['query']
            if '{as_of_date}' not in query:
                raise ValueError(
                    dedent('''
                Section: cohort_config -
                If 'query' is used as cohort_config,
                {as_of_date} must be present'''))
            dated_query = query.replace('{as_of_date}', '2016-01-01')
            conn = self.db_engine.connect()
            logging.info('Validating cohort query')
            try:
                conn.execute(f'explain {dated_query}')
            except Exception as e:
                raise ValueError(
                    dedent(f'''
                    Section: cohort_config -
                    given query can not be run with a sample as_of_date .
                    query: "{query}"
                    Full error: {e}'''))
Пример #3
0
    def _run(self, cohort_config):
        mutex_keys = set(["dense_states", "entities_table", "query"])
        available_keys = mutex_keys | set(["name"])
        used_keys = set(cohort_config.keys())
        bad_keys = used_keys - available_keys
        if bad_keys:
            raise ValueError(
                dedent("""
                Section: cohort_config -
                The following given keys: '{}'
                are invalid. Available keys are: '{}'
                """.format(bad_keys, available_keys)))
        used_mutex_keys = mutex_keys & used_keys
        if len(used_mutex_keys) > 1:
            raise ValueError(
                dedent("""
                Section: cohort_config -
                Only one of the following keys can be sent: '{}'
                Found '{}'
                """.format(mutex_keys, used_mutex_keys)))

        if "dense_states" in cohort_config:
            state_config = cohort_config["dense_states"]
            if "table_name" not in state_config:
                raise ValueError(
                    dedent("""
                Section: cohort_config -
                If 'dense_states' is used as cohort config,
                a table name must be present"""))
            dense_state_table = state_config["table_name"]
            table_should_have_data(dense_state_table, self.db_engine)
            column_should_be_intlike(dense_state_table, "entity_id",
                                     self.db_engine)
            column_should_be_stringlike(dense_state_table, "state",
                                        self.db_engine)
            column_should_be_timelike(dense_state_table, "start_time",
                                      self.db_engine)
            column_should_be_timelike(dense_state_table, "end_time",
                                      self.db_engine)
            if ("state_filters" not in state_config
                    or len(state_config["state_filters"]) < 1):
                raise ValueError(
                    dedent("""
                Section: cohort_config -
                If 'dense_states' is used as cohort config,
                at least one state filter must be present"""))
        elif "entities_table" in cohort_config:
            entities_table = cohort_config["entities_table"]
            table_should_have_data(entities_table, self.db_engine)
            column_should_be_intlike(entities_table, "entity_id",
                                     self.db_engine)
        elif "query" in cohort_config:
            query = cohort_config["query"]
            if "{as_of_date}" not in query:
                raise ValueError(
                    dedent("""
                Section: cohort_config -
                If 'query' is used as cohort_config,
                {as_of_date} must be present"""))
            dated_query = query.replace("{as_of_date}", "2016-01-01")
            conn = self.db_engine.connect()
            logging.info("Validating cohort query")
            try:
                conn.execute(f"explain {dated_query}")
            except Exception as e:
                raise ValueError(
                    dedent(f"""
                    Section: cohort_config -
                    given query can not be run with a sample as_of_date .
                    query: "{query}"
                    Full error: {e}"""))