Python Datasets.summary примеры использования

Язык программирования: Python

Пространство имен/Пакет: bamboo.controllers.datasets

Класс/Тип: Datasets

Метод/Функция: summary

Примеров на hotexamples.com: 4

Python Datasets.summary - 4 примера найдено. Это лучшие примеры Python кода для bamboo.controllers.datasets.Datasets.summary, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Datasets(5)

info(2)

summary(2)

update(2)

aggregations(1)

create(1)

options(1)

show(1)

Пример #1

Показать файл

Файл: test_abstract_datasets.py Проект: sparkplug/bamboo

class TestAbstractDatasets(TestBase):

    NUM_COLS = 15
    NUM_ROWS = 19

    def setUp(self):
        TestBase.setUp(self)
        self.controller = Datasets()
        self._file_name = 'good_eats.csv'
        self._update_file_name = 'good_eats_update.json'
        self._update_check_file_path = '%sgood_eats_update_values.json' % (
            self.FIXTURE_PATH)
        self.default_formulae = [
            'amount',
            'amount + 1',
            'amount - 5',
        ]

    def _put_row_updates(self, dataset_id=None, file_name=None, validate=True):
        if not dataset_id:
            dataset_id = self.dataset_id

        if not file_name:
            file_name = self._update_file_name

        update = open('%s%s' % (self.FIXTURE_PATH, file_name), 'r').read()
        result = json.loads(self.controller.update(dataset_id=dataset_id,
                                                   update=update))

        if validate:
            self.assertTrue(isinstance(result, dict))
            self.assertTrue(Dataset.ID in result)

        # set up the (default) values to test against
        with open(self._update_check_file_path, 'r') as f:
            self._update_values = json.loads(f.read())

    def _load_schema(self):
        return json.loads(
            self.controller.info(self.dataset_id))[Dataset.SCHEMA]

    def _check_dframes_are_equal(self, dframe1, dframe2):
        rows1 = comparable(dframe1)
        rows2 = comparable(dframe2)

        self.__check_dframe_is_subset(rows1, rows2)
        self.__check_dframe_is_subset(rows2, rows1)

    def __check_dframe_is_subset(self, rows1, rows2):
        for row in rows1:
            self.assertTrue(row in rows2,
                            '\nrow:\n%s\n\nnot in rows2:\n%s' % (row, rows2))

    def _post_calculations(self, formulae=[], group=None):
        schema = self._load_schema()
        controller = Calculations()

        for idx, formula in enumerate(formulae):
            name = 'calc_%d' % idx if not schema or\
                formula in schema.keys() else formula

            controller.create(self.dataset_id, formula=formula, name=name,
                              group=group)

    def _test_summary_built(self, result):
        # check that summary is created
        self.assertTrue(isinstance(result, dict))
        self.assertTrue(Dataset.ID in result)
        self.dataset_id = result[Dataset.ID]

        results = self.controller.summary(
            self.dataset_id,
            select=self.controller.SELECT_ALL_FOR_SUMMARY)

        return self._test_summary_results(results)

    def _test_summary_results(self, results):
        results = json.loads(results)
        self.assertTrue(isinstance(results, dict))
        return results

    def _test_aggregations(self, groups=['']):
        results = json.loads(self.controller.aggregations(self.dataset_id))
        self.assertTrue(isinstance(results, dict))
        self.assertEqual(len(results.keys()), len(groups))
        self.assertEqual(results.keys(), groups)
        linked_dataset_id = results[groups[0]]
        self.assertTrue(isinstance(linked_dataset_id, basestring))

        # inspect linked dataset
        return json.loads(self.controller.show(linked_dataset_id))

    def _test_summary_no_group(self, results, dataset_id=None, group=None):
        if not dataset_id:
            dataset_id = self.dataset_id

        group = [group] if group else []
        result_keys = results.keys()

        # minus the column that we are grouping on
        self.assertEqual(len(result_keys), self.NUM_COLS - len(group))

        columns = [col for col in
                   self.get_data(self._file_name).columns.tolist()
                   if not col in [MONGO_ID] + group]

        dataset = Dataset.find_one(dataset_id)
        labels_to_slugs = dataset.schema.labels_to_slugs

        for col in columns:
            slug = labels_to_slugs[col]
            self.assertTrue(slug in result_keys,
                            'col (slug): %s in: %s' % (slug, result_keys))
            self.assertTrue(SUMMARY in results[slug].keys())

Пример #2

Показать файл

class TestAbstractDatasets(TestBase):

    NUM_COLS = 15
    NUM_ROWS = 19

    def setUp(self):
        TestBase.setUp(self)
        self.controller = Datasets()
        self._file_name = 'good_eats.csv'
        self._update_file_name = 'good_eats_update.json'
        self._update_check_file_path = '%sgood_eats_update_values.json' % (
            self.FIXTURE_PATH)
        self.default_formulae = [
            'amount',
            'amount + 1',
            'amount - 5',
        ]

    def _put_row_updates(self, dataset_id=None, file_name=None, validate=True):
        if not dataset_id:
            dataset_id = self.dataset_id

        if not file_name:
            file_name = self._update_file_name

        update = open('%s%s' % (self.FIXTURE_PATH, file_name), 'r').read()
        result = json.loads(self.controller.update(dataset_id=dataset_id,
                                                   update=update))

        if validate:
            self.assertTrue(isinstance(result, dict))
            self.assertTrue(Dataset.ID in result)

        # set up the (default) values to test against
        with open(self._update_check_file_path, 'r') as f:
            self._update_values = json.loads(f.read())

    def _load_schema(self):
        return json.loads(
            self.controller.info(self.dataset_id))[Dataset.SCHEMA]

    def _check_dframes_are_equal(self, dframe1, dframe2):
        rows1 = comparable(dframe1)
        rows2 = comparable(dframe2)

        self.__check_dframe_is_subset(rows1, rows2)
        self.__check_dframe_is_subset(rows2, rows1)

    def __check_dframe_is_subset(self, rows1, rows2):
        for row in rows1:
            self.assertTrue(row in rows2,
                            '\nrow:\n%s\n\nnot in rows2:\n%s' % (row, rows2))

    def _post_calculations(self, formulae=[], group=None):
        schema = self._load_schema()
        controller = Calculations()

        for idx, formula in enumerate(formulae):
            name = 'calc_%d' % idx if not schema or\
                formula in schema.keys() else formula

            controller.create(self.dataset_id, formula=formula, name=name,
                              group=group)

    def _test_summary_built(self, result):
        # check that summary is created
        self.assertTrue(isinstance(result, dict))
        self.assertTrue(Dataset.ID in result)
        self.dataset_id = result[Dataset.ID]

        results = self.controller.summary(
            self.dataset_id,
            select=self.controller.SELECT_ALL_FOR_SUMMARY)

        return self._test_summary_results(results)

    def _test_summary_results(self, results):
        results = json.loads(results)
        self.assertTrue(isinstance(results, dict))
        return results

    def _test_aggregations(self, groups=['']):
        results = json.loads(self.controller.aggregations(self.dataset_id))
        self.assertTrue(isinstance(results, dict))
        self.assertEqual(len(results.keys()), len(groups))
        self.assertEqual(results.keys(), groups)
        linked_dataset_id = results[groups[0]]
        self.assertTrue(isinstance(linked_dataset_id, basestring))

        # inspect linked dataset
        return json.loads(self.controller.show(linked_dataset_id))

    def _test_summary_no_group(self, results, dataset_id=None, group=None):
        if not dataset_id:
            dataset_id = self.dataset_id

        group = [group] if group else []
        result_keys = results.keys()

        # minus the column that we are grouping on
        self.assertEqual(len(result_keys), self.NUM_COLS - len(group))

        columns = [col for col in
                   self.get_data(self._file_name).columns.tolist()
                   if not col in [MONGO_ID] + group]

        dataset = Dataset.find_one(dataset_id)
        labels_to_slugs = dataset.schema.labels_to_slugs

        for col in columns:
            slug = labels_to_slugs[col]
            self.assertTrue(slug in result_keys,
                            'col (slug): %s in: %s' % (slug, result_keys))
            self.assertTrue(SUMMARY in results[slug].keys())

Пример #3

Показать файл

Файл: test_profile.py Проект: vinothkumarthangaraj/bamboo

class TestProfile(TestBase):

    TEST_CASE_SIZES = {
        'tiny': (1, 1),
        'small': (2, 2),
        'large': (4, 40),
    }

    def setUp(self):
        TestBase.setUp(self)
        self.datasets = Datasets()
        self.tmp_file = NamedTemporaryFile(delete=False)

    def tearDown(self):
        os.unlink(self.tmp_file.name)

    def _expand_width(self, df, exponent):
        for i in xrange(0, exponent):
            other = df.rename(columns={
                col: '%s-%s' % (col, idx)
                for (idx, col) in enumerate(df.columns)
            })
            df = df.join(other)
            df.rename(columns={
                col: str(idx)
                for (idx, col) in enumerate(df.columns)
            },
                      inplace=True)
        return df

    def _grow_test_data(self, dataset_name, width_exp, length_factor):
        df = self.get_data(dataset_name)
        df = self._expand_width(df, width_exp)
        return concat([df] * length_factor)

    def test_tiny_profile(self):
        self._test_profile('tiny')

    def test_small_profile(self):
        self._test_profile('small')

    def test_large_profile(self):
        self._test_profile('large')

    @run_profiler
    def _test_profile(self, size):
        print 'bamboo/bamboo: %s' % size
        self._test_create_data(*self.TEST_CASE_SIZES[size])
        print 'saving dataset'
        self._test_save_dataset()
        self._test_get_info()
        self._test_get_summary()
        self._test_get_summary_with_group('province')
        self._test_get_summary_with_group('school_zone')

    def _test_create_data(self, width_exp, length_factor):
        self.data = self._grow_test_data('kenya_secondary_schools_2007.csv',
                                         width_exp, length_factor)
        print 'bamboo/bamboo rows: %s, columns: %s' % (len(
            self.data), len(self.data.columns))

    def _test_save_dataset(self):
        self.data.to_csv(self.tmp_file)
        self.tmp_file.close()
        mock_uploaded_file = MockUploadedFile(open(self.tmp_file.name, 'r'))
        result = json.loads(self.datasets.create(csv_file=mock_uploaded_file))
        self.assertTrue(isinstance(result, dict))
        self.assertTrue(Dataset.ID in result)
        self.dataset_id = result[Dataset.ID]

    def _test_get_info(self):
        result = json.loads(self.datasets.info(self.dataset_id))
        self.assertTrue(isinstance(result, dict))

    def _test_get_summary(self):
        result = json.loads(
            self.datasets.summary(self.dataset_id,
                                  select=self.datasets.SELECT_ALL_FOR_SUMMARY))
        self.assertTrue(isinstance(result, dict))

    def _test_get_summary_with_group(self, group):
        result = json.loads(
            self.datasets.summary(self.dataset_id,
                                  group=group,
                                  select=self.datasets.SELECT_ALL_FOR_SUMMARY))
        self.assertTrue(isinstance(result, dict))

Пример #4

Показать файл

Файл: test_profile.py Проект: 4sp1r3/bamboo

class TestProfile(TestBase):

    TEST_CASE_SIZES = {
        'tiny': (1, 1),
        'small': (2, 2),
        'large': (4, 40),
    }

    def setUp(self):
        TestBase.setUp(self)
        self.datasets = Datasets()
        self.tmp_file = NamedTemporaryFile(delete=False)

    def tearDown(self):
        os.unlink(self.tmp_file.name)

    def _expand_width(self, df, exponent):
        for i in xrange(0, exponent):
            other = df.rename(
                columns={col: '%s-%s' % (col, idx) for (idx, col) in
                         enumerate(df.columns)})
            df = df.join(other)
            df.rename(columns={col: str(idx) for (idx, col) in
                      enumerate(df.columns)}, inplace=True)
        return df

    def _grow_test_data(self, dataset_name, width_exp, length_factor):
        df = self.get_data(dataset_name)
        df = self._expand_width(df, width_exp)
        return concat([df] * length_factor)

    def test_tiny_profile(self):
        self._test_profile('tiny')

    def test_small_profile(self):
        self._test_profile('small')

    def test_large_profile(self):
        self._test_profile('large')

    @run_profiler
    def _test_profile(self, size):
        print 'bamboo/bamboo: %s' % size
        self._test_create_data(*self.TEST_CASE_SIZES[size])
        print 'saving dataset'
        self._test_save_dataset()
        self._test_get_info()
        self._test_get_summary()
        self._test_get_summary_with_group('province')
        self._test_get_summary_with_group('school_zone')

    def _test_create_data(self, width_exp, length_factor):
        self.data = self._grow_test_data(
            'kenya_secondary_schools_2007.csv', width_exp, length_factor)
        print 'bamboo/bamboo rows: %s, columns: %s' % (
            len(self.data), len(self.data.columns))

    def _test_save_dataset(self):
        self.data.to_csv(self.tmp_file)
        self.tmp_file.close()
        mock_uploaded_file = MockUploadedFile(open(self.tmp_file.name, 'r'))
        result = json.loads(self.datasets.create(csv_file=mock_uploaded_file))
        self.assertTrue(isinstance(result, dict))
        self.assertTrue(Dataset.ID in result)
        self.dataset_id = result[Dataset.ID]

    def _test_get_info(self):
        result = json.loads(self.datasets.info(self.dataset_id))
        self.assertTrue(isinstance(result, dict))

    def _test_get_summary(self):
        result = json.loads(self.datasets.summary(
            self.dataset_id,
            select=self.datasets.SELECT_ALL_FOR_SUMMARY))
        self.assertTrue(isinstance(result, dict))

    def _test_get_summary_with_group(self, group):
        result = json.loads(self.datasets.summary(
            self.dataset_id, group=group,
            select=self.datasets.SELECT_ALL_FOR_SUMMARY))
        self.assertTrue(isinstance(result, dict))