Пример #1
0
    def test_None_to_nan_conversion_all_none(self):
        BaseCollection.serializer_class = InternalDtypeTestSerializer

        b = BaseCollection()
        b.load_data(self.dtyp_test_data_all_none)
        df = b.to_dataframe()  # would raise here
        self.assertIsInstance(df, pd.DataFrame)
Пример #2
0
    def test_base_collection_is_iterable(self):

        base = BaseCollection()
        base.load_data(self.data)

        for i in self.data:  # loop over data objects
            self.assertIsInstance(i, dict)  # returns
Пример #3
0
    def test_base_collection_to_dataframe(self):

        base = BaseCollection()
        base.load_data(self.data)

        test = base.to_dataframe()

        assert_frame_equal(test, pd.DataFrame().from_dict(self.data))
Пример #4
0
    def test_empty_collection_raises_CollectionLoadError_if_passed_empty_record_collection(
            self):

        BaseCollection.serializer_class = InternalSerializer
        records = []

        b = BaseCollection()
        with self.assertRaises(CollectionLoadError):
            b.load_data(records, raise_on_empty=True)
Пример #5
0
    def test_load_data_from_dataframe(self):

        df = pd.DataFrame(self.data)
        base = BaseCollection()

        base.load_data(df)

        for i in base._data:
            self.assertIsInstance(i, InternalObject)
Пример #6
0
    def test_ma_kwargs_in_constructor_pass_to_serializer(self):
        class TestMaKwargsSerializer(BaseSerializer):
            id = fields.Integer(required=True)
            name = fields.String()
            mydate = fields.Date()

            class Meta:
                dateformat = '%Y-%m-%d'

        BaseCollection.serializer_class = TestMaKwargsSerializer

        data = [
            {
                'id': 1,
                'name': 'hep'
            },
            {
                'id': 2,
                'name': 'tups',
                'mydate': '2017-07-01',
                'random': 'thing'
            },
            {
                'id': 3,
                'who': 'am i'
            }  # test ma3 unknown=INCLUDE
        ]

        coll = BaseCollection(data, unknown=EXCLUDE)
        expected = [{
            'id': 1,
            'name': 'hep'
        }, {
            'id': 2,
            'mydate': '2017-07-01',
            'name': 'tups'
        }, {
            'id': 3
        }]

        self.assertListEqual(coll.data, expected)

        coll2 = BaseCollection(data, unknown=INCLUDE, only=['id', 'name'])
        expected = [{
            'id': 1,
            'name': 'hep'
        }, {
            'id': 2,
            'name': 'tups'
        }, {
            'id': 3
        }]

        self.assertListEqual(coll2.data, expected)
Пример #7
0
    def test_empty_collection_returns_empty_dataframe_in_to_dataframe(self):

        BaseCollection.serializer_class = InternalSerializer
        records = []

        b = BaseCollection()
        b.load_data(records)

        df = b.to_dataframe()

        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(len(df), 0)
Пример #8
0
    def test_non_required_fields_not_present_do_not_raise_key_error_in_to_dataframe(
            self):

        BaseCollection.serializer_class = InternalSerializer  # these fields are not required

        records = [{'bdbid': 1}, {'bdbid': 2}]

        b = BaseCollection()
        b.load_data(records)

        df = b.to_dataframe()

        self.assertEqual(records, df.to_dict('records'))
Пример #9
0
    def test_non_required_date_fields_do_not_raiseTypeError_in_to_dataframe(
            self):
        class TestDateSerializer(BaseSerializer):
            test_date = fields.Date('%Y-%m-%d', allow_none=True)

        BaseCollection.serializer_class = TestDateSerializer  # these fields are not required

        records = [{'test_date': None}, {'test_date': '2017-07-01'}]
        b = BaseCollection()
        b.load_data(records)
        df = b.to_dataframe()
        check = df.to_dict('records')
        self.assertEqual(str(check[0]['test_date']), 'NaT')
        self.assertEqual(pd.Timestamp('2017-07-01'), check[1]['test_date'])
Пример #10
0
    def test_base_collection_concatenation(self):

        base = BaseCollection()
        base.load_data(self.data)

        base2 = BaseCollection()
        base2.load_data(self.data)

        new_base = base + base2
Пример #11
0
    def test_base_collection_dataframe_with_dtypes(self):

        BaseCollection.serializer_class = InternalDtypeTestSerializer  # NOTE patching a different serializer here
        base = BaseCollection()
        base.load_data(self.dtype_test_data)

        base2 = BaseCollection()
        base2.load_data(self.dtype_test_data_none)
        df = base2.to_dataframe()

        self.assertTrue(df.isnull().values.any())

        BaseCollection.serializer_class = InternalSerializer  #NOTE must patch this back here
Пример #12
0
    def test_non_required_int_fields_do_not_raise_TypeError_in_to_dataframe(
            self):
        class TestIntSerializer(BaseSerializer):
            test_id = fields.Integer(allow_none=True)

        BaseCollection.serializer_class = TestIntSerializer  # these fields are not required

        records = [{'test_id': None}, {'test_id': 2}]
        #expected_result = [{'test_id': np.nan}, {'test_id': 2.0}]
        b = BaseCollection()
        b.load_data(records)
        df = b.to_dataframe()
        check = df.to_dict('records')

        self.assertTrue(np.isnan(
            check[0]['test_id']))  #NOTE coerced to nan and float
        self.assertEqual(check[1]['test_id'], 2.0)
Пример #13
0
    def test_new_collection_instances_register_on_serializer_and_internal(
            self):

        base = BaseCollection()

        test = BaseCollection in base.serializer.registered_colls
        self.assertTrue(test)

        BaseCollection in base.internal.registered_colls
        self.assertTrue(test)
Пример #14
0
    def test_base_collection_raises_ValidationError(self):

        base = BaseCollection()

        # test 3 cases where data is bad
        with self.assertRaises(ValidationError):
            base.load_data(self.data_with_none)

        with self.assertRaises(ValidationError):
            base.load_data(self.data_with_missing_field)

        with self.assertRaises(ValidationError):
            base.load_data(self.data_bad_input)
Пример #15
0
    def test_non_required_datetimes_not_present_do_not_raise_utils_key_error(
            self):

        # if a date field was not required and not provided a KeyError was being raised
        # in RecordUtils. We swallow that error and only parse datefields that are in the
        # loaded data

        BaseCollection.serializer_class = DateStringFormatTestSerializer

        records = [
            {
                'a': 1,
                'b': datetime(2017, 5, 4, 10, 10, 10)
            },
            {
                'a': 2,
                'c': date(2018, 5, 4)
            },
            {
                'a': 3,
                'b': datetime(2017, 7, 4, 10, 10, 10),
                'c': date(2019, 5, 4)
            },
        ]

        b = BaseCollection()
        b.load_data(records)

        test = [{
            'a': 1,
            'b': '2017-05-04 10:10:10'
        }, {
            'a': 2,
            'c': '2018-05-04'
        }, {
            'a': 3,
            'b': '2017-07-04 10:10:10',
            'c': '2019-05-04'
        }]

        self.assertListEqual(test, b.data)
Пример #16
0
    def test_pandas_timestamp_correctly_parsed_by_load_data(self):

        BaseCollection.serializer_class = DateStringFormatTestSerializer

        records = [
            {
                'a': 1,
                'b': pd.Timestamp(2017, 5, 4, 10, 10, 10),
                'c': pd.Timestamp(2017, 5, 4)
            },
            {
                'a': 2,
                'b': pd.Timestamp(2017, 6, 4, 10, 10, 10),
                'c': pd.Timestamp(2018, 5, 4)
            },
            {
                'a': 3,
                'b': pd.Timestamp(2017, 7, 4, 10, 10, 10),
                'c': pd.Timestamp(2019, 5, 4)
            },
        ]

        b = BaseCollection()
        b.load_data(records)

        test = [{
            'a': 1,
            'b': '2017-05-04 10:10:10',
            'c': '2017-05-04'
        }, {
            'a': 2,
            'b': '2017-06-04 10:10:10',
            'c': '2018-05-04'
        }, {
            'a': 3,
            'b': '2017-07-04 10:10:10',
            'c': '2019-05-04'
        }]

        self.assertListEqual(test, b.data)

        # testing on a dataframe

        df = pd.DataFrame.from_records(records)
        b = BaseCollection()
        b.load_data(records)

        self.assertListEqual(b.data, test)
Пример #17
0
    def test_data_load_raises_validation_error(self):
        class TestDataLoadSerializer(BaseSerializer):
            id = fields.Integer(required=True)
            name = fields.String()
            mydate = fields.Date()

            class Meta:
                dateformat = '%Y-%m-%d'

        BaseCollection.serializer_class = TestDataLoadSerializer

        data = [{'id': 1, 'name': 'hep'}, {'bad': 'idea'}]

        with self.assertRaises(ValidationError):
            coll = BaseCollection(data)
Пример #18
0
    def test_data_load_via_init_works_as_expected(self):
        class TestInitSerializer(BaseSerializer):
            id = fields.Integer(required=True)
            name = fields.String()
            mydate = fields.Date()

            class Meta:
                dateformat = '%Y-%m-%d'

        BaseCollection.serializer_class = TestInitSerializer

        data = [{
            'id': 1,
            'name': 'hep'
        }, {
            'id': 2,
            'name': 'tups',
            'mydate': '2017-07-01'
        }]

        coll = BaseCollection(data)

        for c in coll:
            self.assertIsInstance(c, InternalObject)
Пример #19
0
    def test_base_collection_correctly_loads_good_data(self):
        base = BaseCollection()
        base.load_data(self.data)

        for i in base._data:  # creates InternalObject Instances
            self.assertIsInstance(i, InternalObject)
Пример #20
0
    def test_base_collection_returns_len(self):
        base = BaseCollection()
        base.load_data(self.data)

        self.assertEqual(len(base), len(self.data))
Пример #21
0
    def test_base_collection_raises_CollectionLoadError(self):
        base = BaseCollection()

        base._serializer = None  # patching to None
        with self.assertRaises(CollectionLoadError):
            base.load_data(self.data)