示例#1
0
    def _extract_numerical_feature_sample(self) -> pd.DataFrame:
        """Extracts a random sample of values from selected numerical features.

    Returns:
      results: Extracted values as a DataFrame.
    """
        logging.info('Extracting a random sample of numerical features.')
        logging.info('Creating the sql code.')
        sql_segment = self._create_column_list_sql(
            self._numerical_feature_list)
        query_params = {
            'bq_features_table': self._features_table_path,
            'label_column': self._label_column,
            'positive_class_label': self._positive_class_label,
            'negative_class_label': self._negative_class_label,
            'num_pos_instances': self._num_pos_instances,
            'num_neg_instances': self._num_neg_instances,
            'sql_code_segment': sql_segment
        }
        sql_query = utils.configure_sql(_EXTRACT_NUM_FEATURE_SAMPLE_SQL_PATH,
                                        query_params)
        logging.info('Finished creating the sql code.')

        logging.info('Executing the sql code.')
        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished executing the sql code.')

        return results
示例#2
0
    def test_congigure_sql_creates_tuple_given_list_of_strings(self):
        test_sql = 'SELECT * FROM test_table WHERE test_column IN {test_list};'
        query_params = {'test_list': 'value1,value2'}
        mock_open = absltest.mock.mock_open(read_data=test_sql)
        expected_sql = test_sql.format(test_list=('value1', 'value2'))

        with absltest.mock.patch('builtins.open', mock_open):
            actual = utils.configure_sql(self.test_sql, query_params)
            self.assertEqual(expected_sql, actual)
示例#3
0
    def test_congigure_sql_replaces_params(self):
        test_sql = 'SELECT * FROM {project}.{dataset}.{table};'
        query_params = {
            'project': 'test_project',
            'dataset': 'test_dataset',
            'table': 'test_table'
        }
        mock_open = absltest.mock.mock_open(read_data=test_sql)
        expected_sql = test_sql.format(**query_params)

        with absltest.mock.patch('builtins.open', mock_open):
            actual = utils.configure_sql(self.test_sql, query_params)
            self.assertEqual(expected_sql, actual)
    def _calc_numerical_fact_stats(self) -> pd.DataFrame:
        """Calculates the statistics for selected numerical fact variables.

    Returns:
      results: Calculated statistics.
    """
        logging.info('Calculating statistics from numerical facts.')
        logging.info('Reading the sql query from the file.')
        query_params = {
            'bq_facts_table': self._numerical_facts_table_path,
        }
        sql_query = utils.configure_sql(_CALC_NUM_FACT_STATS_SQL_PATH,
                                        query_params)

        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished calculating statistics from numerical facts.')

        results['date'] = pd.to_datetime(results['date'])
        return results
示例#5
0
    def _calc_categorical_fact_stats(self) -> pd.DataFrame:
        """Calculates the statistics for selected categorical fact variables.

    Returns:
      results: Calculated statistics.
    """
        logging.info('Calculating statistics from categorical facts.')
        logging.info('Reading the sql query from the file.')
        query_params = {
            'bq_facts_table': self._facts_table_path,
            'categorical_fact_list': self._categorical_facts,
            'number_top_levels': self._number_top_levels
        }
        sql_query = utils.configure_sql(_CALC_CAT_FACT_STATS_SQL_PATH,
                                        query_params)

        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished calculating statistics from categorical facts.')

        return results
示例#6
0
    def _calc_categorical_feature_stats(self) -> pd.DataFrame:
        """Calculates the statistics from selected categorical features.

    Returns:
      results: Calculated statistics.
    """
        logging.info('Calculating statistics from categorical features.')
        logging.info('Creating the sql code.')
        sql_segment = self._create_struct_column_list_sql(
            self._categorical_feature_list)
        query_params = {
            'bq_features_table': self._features_table_path,
            'sql_code_segment': sql_segment
        }
        sql_query = utils.configure_sql(_CALC_CAT_FEATURE_STATS_SQL_PATH,
                                        query_params)
        logging.info('Finished creating the sql code.')

        logging.info('Executing the sql code.')
        results = viz_utils.execute_sql(self._bq_client, sql_query)
        logging.info('Finished executing the sql code.')

        return results