示例#1
0
    def _test_calculator(self, delay=True):
        dframe = Observation.find(self.dataset, as_df=True)

        columns = dframe.columns.tolist()
        start_num_cols = len(columns)
        added_num_cols = 0

        column_labels_to_slugs = build_labels_to_slugs(self.dataset)
        label_list, slugified_key_list = [list(ary) for ary in
                zip(*column_labels_to_slugs.items())]

        for idx, formula in enumerate(self.calculations):
            name = 'test-%s' % idx
            if delay:
                task = calculate_column.delay(self.dataset, dframe,
                        formula, name)
                # test that task has completed
                self.assertTrue(task.ready())
                self.assertTrue(task.successful())
            else:
                task = calculate_column(self.dataset, dframe,
                        formula, name)

            column_labels_to_slugs = build_labels_to_slugs(self.dataset)

            unslug_name = name
            name = column_labels_to_slugs[unslug_name]

            # test that updated dataframe persisted
            dframe = Observation.find(self.dataset, as_df=True)
            self.assertTrue(name in dframe.columns)

            # test new number of columns
            added_num_cols += 1
            self.assertEqual(start_num_cols + added_num_cols,
                    len(dframe.columns.tolist()))

            # test that the schema is up to date
            dataset = Dataset.find_one(self.dataset[DATASET_ID])
            self.assertTrue(SCHEMA in dataset.keys())
            self.assertTrue(isinstance(dataset[SCHEMA], dict))
            schema = dataset[SCHEMA]

            # test slugified column names
            slugified_key_list.append(name)
            self.assertEqual(sorted(schema.keys()), sorted(slugified_key_list))

            # test column labels
            label_list.append(unslug_name)
            labels = [schema[col][LABEL] for col in schema.keys()]
            self.assertEqual(sorted(labels), sorted(label_list))

            # test result of calculation
            formula = column_labels_to_slugs[formula]

            for idx, row in dframe.iterrows():
                try:
                    result = np.float64(row[name])
                    stored = np.float64(row[formula])
                    # np.nan != np.nan, continue if we have two nan values
                    if np.isnan(result) and np.isnan(stored):
                        continue
                    msg = self._equal_msg(result, stored, formula)
                    self.assertAlmostEqual(result, stored, self.places, msg)
                except ValueError:
                    msg = self._equal_msg(row[name], row[formula], formula)
                    self.assertEqual(row[name], row[formula], msg)
示例#2
0
        except ParseError, err:
            # do not save record, return error
            return {ERROR: err}

        record = {
            DATASET_ID: dataset[DATASET_ID],
            cls.FORMULA: formula,
            cls.NAME: name,
        }
        cls.collection.insert(record)

        # invalidate summary ALL since we have a new column
        stats = dataset.get(STATS)
        if stats:
            del stats[ALL]
            del dataset[STATS]
            Dataset.update(dataset, {STATS: stats})

        # call remote calculate and pass calculation id
        calculate_column.delay(dataset, dframe, formula, name)
        return mongo_remove_reserved_keys(record)

    @classmethod
    def find(cls, dataset):
        """
        Return the calculations for given *dataset*.
        """
        return [mongo_remove_reserved_keys(record) for record in cls.collection.find({
            DATASET_ID: dataset[DATASET_ID],
        })]