def test_drop_duplicates(self):
        class MyRecord(DictClass):
            first_name: str
            last_name: str
            sex: str

        data_table = DataTable(
            record_class=MyRecord,
            base_dict={
                "first_name": ["ABC", "ABC", "DEF", "DEF", "DEF"],
                "last_name": ["XYZ", "XYZ", "MNO", "MNO", "MNO"],
                "sex": ["M", "M", "F", "M", "F"],
            },
        )

        assert isinstance(data_table.get_record(0), MyRecord)

        deduplicated_data_table = data_table.drop_duplicates()
        assert isinstance(deduplicated_data_table.get_record(0), MyRecord)
        assert {
            "first_name": ["ABC", "DEF", "DEF"],
            "last_name": ["XYZ", "MNO", "MNO"],
            "sex": ["M", "F", "M"],
        } == deduplicated_data_table

        deduplicated_data_table = data_table.drop_duplicates(
            subset=("first_name", ))
        assert isinstance(deduplicated_data_table.get_record(0), MyRecord)
        assert {
            "first_name": ["ABC", "DEF"],
            "last_name": ["XYZ", "MNO"],
            "sex": ["M", "F"],
        } == deduplicated_data_table

        deduplicated_data_table = data_table.drop_duplicates(
            subset=("last_name", "sex"))
        assert isinstance(deduplicated_data_table.get_record(0), MyRecord)
        assert {
            "first_name": ["ABC", "DEF", "DEF"],
            "last_name": ["XYZ", "MNO", "MNO"],
            "sex": ["M", "F", "M"],
        } == deduplicated_data_table

        # it should throw an error as the column name provied is invalid
        with pytest.raises(DataTableError):
            _ = data_table.drop_duplicates(subset=("invalid_column", ))

        # not normalized table
        data_table = DataTable(
            record_class=MyRecord,
            base_dict={
                "first_name": ["ABC", "ABC", "DEF", "DEF", "DEF"],
                "last_name": ["XYZ", "XYZ", "MNO", "MNO"],
                "sex": ["M", "M", "F", "M", "F"],
            },
        )

        # It should throw an error as table is not normalized
        with pytest.raises(DataTableError):
            _ = data_table.drop_duplicates()
    def test_cached_batch_get(self):
        self.client_mock.batch_get_item.return_value = {
            "Responses": {
                "my_table_name": [{
                    "pk": "my_pk",
                    "sk": "my_sk",
                    "data": "value"
                }]
            }
        }
        data_table = DataTable().add_record({"pk": "my_pk", "sk": "my_sk"})
        result = list(self.result.cached_batch_get(data_table).get_records())
        assert result == [{"pk": "my_pk", "sk": "my_sk", "data": "value"}]
        self.client_mock.batch_get_item.assert_called_with(
            RequestItems={
                "my_table_name": {
                    "Keys": [{
                        "pk": "my_pk",
                        "sk": "my_sk"
                    }]
                }
            },
            ReturnConsumedCapacity="NONE",
        )
        self.client_mock.batch_get_item.reset_mock()
        result = list(self.result.cached_batch_get(data_table).get_records())
        self.client_mock.batch_get_item.assert_not_called()

        assert list(self.result.cached_batch_get(
            DataTable()).get_records()) == []
    def test_batch_delete(self):
        self.client_mock.batch_write_item.return_value = {
            "Responses": {
                "my_table_name": [{
                    "pk": "my_pk",
                    "sk": "my_sk",
                    "data": "value"
                }]
            }
        }
        data_table = DataTable().add_record({"pk": "my_pk", "sk": "my_sk"})
        assert list(self.result.batch_delete(data_table).get_records()) == [{
            "pk":
            "my_pk",
            "sk":
            "my_sk"
        }]
        self.client_mock.batch_write_item.assert_called_with(
            RequestItems={
                "my_table_name": [{
                    "DeleteRequest": {
                        "Key": {
                            "pk": "my_pk",
                            "sk": "my_sk"
                        }
                    }
                }]
            },
            ReturnConsumedCapacity="NONE",
            ReturnItemCollectionMetrics="NONE",
        )

        assert list(self.result.batch_delete(DataTable()).get_records()) == []
    def test_get_records() -> None:
        data_table = DataTable({"a": [1, 2], "b": [3, 4]})
        records = data_table.get_records()
        assert next(records) == {"a": 1, "b": 3}
        assert next(records) == {"a": 2, "b": 4}

        with pytest.raises(StopIteration):
            next(records)
    def test_get_column() -> None:
        data_table = DataTable({"a": [1, 2], "b": [3, DataTable.NOT_SET]})
        assert data_table.get_column("a") == [1, 2]
        assert data_table.get_column("b") == [3, None]
        assert data_table.get_column("c") == [None, None]

        with pytest.raises(DataTableError):
            DataTable({"a": [1], "b": []}).get_column("a")
 def test_is_normalized() -> None:
     assert DataTable({"a": [1, 2], "b": [3, 4]}).is_normalized()
     assert not DataTable({"a": [1, 2], "b": [3]}).is_normalized()
     assert not DataTable({
         "a": [1, 2],
         "b": [3, 4],
         "c": []
     }).is_normalized()
 def test_get_lenghts() -> None:
     data_table = DataTable({"a": [1, 2, 3], "b": [1, 2]})
     assert data_table["a"] == [1, 2, 3]
     assert data_table["b"] == [1, 2]
     assert data_table.max_length == 3
     assert data_table.min_length == 2
     assert data_table.get_lengths() == [3, 2]
     assert DataTable().get_lengths() == []
     assert DataTable({"a": [None]}).get_lengths() == [1]
Пример #8
0
    def batch_get(self,
                  data_table: DataTable[_RecordType],
                  consistent_read: bool = False) -> DataTable[_RecordType]:
        """
        Get multuple records as a DataTable from DB.

        `data_table` must have all columns to calculate table keys.

        Example:

            ```python
            # UserTable is a subclass of a DynamoTable
            user_table = UserTable()

            # we should provide table keys or fields to calculate them
            # in our case, PK is calculated from `email` field.
            users_table = DataTable[UserRecord]().add_record(
                {
                    "email": "*****@*****.**",
                },
                {
                    "email": "*****@*****.**",
                },
            )
            user_records = user_table.batch_get(users_table)

            for user_record in user_records:
                # print found records
                # if record was not found - it will still be returned
                # but only with the data you provided
                print(user_record)
            ```

        Arguments:
            data_table -- Request data table.
            consistent_read -- `ConsistentRead` boto3 parameter.

        Returns:
            DataTable with existing records.
        """

        if not data_table:
            return data_table.copy()
        get_data_table = DataTable()
        for record in data_table.get_records():
            record = self._convert_record(record)
            record = self.normalize_record(record)
            record.update(self._get_record_keys(record))
            get_data_table.add_record(record)

        results: DataTable[Any] = (
            self.dynamo_query_class.build_batch_get_item(
                consistent_read=consistent_read, logger=self._logger).table(
                    table_keys=self.table_keys,
                    table=self.table).execute(data_table=get_data_table))
        return DataTable(record_class=self.record_class).add_table(results)
    def test_add_record() -> None:
        data_table = DataTable({"a": [1], "b": [3]})
        result = data_table.add_record({"a": 5, "c": 4}, {"c": 5})
        assert result is data_table
        assert data_table == {
            "a": [1, 5, data_table.NOT_SET],
            "b": [3, data_table.NOT_SET, data_table.NOT_SET],
            "c": [data_table.NOT_SET, 4, 5],
        }

        with pytest.raises(DataTableError):
            DataTable({"a": [1], "b": []}).add_record({"a": 1})
Пример #10
0
    def _validate_data_table_has_table_keys(self,
                                            data_table: DataTable) -> None:
        for table_key in self.table_keys:
            if data_table.has_set_column(table_key):
                continue

            if data_table.has_column(table_key):
                raise DynamoQueryError(
                    f'Column "{table_key}" has missing values in input data,'
                    f" but present in table keys {self.table_keys}")

            raise DynamoQueryError(
                f'Column "{table_key}" is missing in input data,'
                f" but present in table keys {self.table_keys}")
Пример #11
0
    def _validate_required_value_keys(self, data_table: DataTable) -> None:
        for name, expression in self._expressions.items():
            required_value_keys = expression.get_format_values()
            for required_value_key in required_value_keys:
                if data_table.has_set_column(required_value_key):
                    continue

                if data_table.has_column(required_value_key):
                    raise DynamoQueryError(
                        f'Column "{required_value_key}"" has missing values in input data,'
                        f' but present in {name} = "{expression}"')

                raise DynamoQueryError(
                    f'Column "{required_value_key}" is missing in input data,'
                    f' but present in {name} = "{expression}"')
    def test_batch_get_records(self):
        self.client_mock.batch_get_item.return_value = {
            "Responses": {
                "my_table_name": [{
                    "pk": "my_pk",
                    "sk": "my_sk",
                    "data": "value"
                }]
            }
        }
        assert list(
            self.result.batch_get_records([{
                "pk": "my_pk",
                "sk": "my_sk"
            }])) == [{
                "pk": "my_pk",
                "sk": "my_sk",
                "data": "value"
            }]
        self.client_mock.batch_get_item.assert_called_with(
            RequestItems={
                "my_table_name": {
                    "Keys": [{
                        "pk": "my_pk",
                        "sk": "my_sk"
                    }]
                }
            },
            ReturnConsumedCapacity="NONE",
        )

        assert list(self.result.batch_get(DataTable()).get_records()) == []
 def test_has_set_column() -> None:
     data_table = DataTable({"a": [1, 2], "b": [DataTable.NOT_SET, 3]})
     assert data_table.has_set_column()
     assert data_table.has_set_column("a")
     assert not data_table.has_set_column("b")
     assert not data_table.has_set_column("c")
     assert not data_table.has_set_column("b", "a")
     assert not data_table.has_set_column("c")
     assert not data_table.has_set_column("a", "c")
    def test_normalize() -> None:
        data_table = DataTable({"a": [1, 2, 3], "b": [3, 4], "c": []})
        data_table.normalize()
        assert data_table == {
            "a": [1, 2, 3],
            "b": [3, 4, data_table.NOT_SET],
            "c": [data_table.NOT_SET, data_table.NOT_SET, data_table.NOT_SET],
        }
        assert data_table.get_record(0) == {"a": 1, "b": 3, "c": None}

        data_table = DataTable({"a": [1, 2], "b": [3, 4]})
        data_table.normalize()
        assert data_table == {"a": [1, 2], "b": [3, 4]}
    def test_set() -> None:
        data_table = DataTable({"a": [1, 2], "b": [DataTable.NOT_SET]})
        result = data_table.set("a", 1, "value_a").set("b", 0, "value_b")
        assert result is data_table
        assert data_table == DataTable({"a": [1, "value_a"], "b": ["value_b"]})

        with pytest.raises(DataTableError):
            data_table.set("b", 1, "value_b")

        with pytest.raises(DataTableError):
            data_table.set("c", 0, "value_c")
    def test_errors() -> None:
        filter_expression_mock = MagicMock()
        projection_expression_mock = MagicMock()
        table_resource_mock = MagicMock()
        query = DynamoQuery.build_query(
            key_condition_expression=ConditionExpression("key", "contains"),
            index_name="my_index",
            filter_expression=filter_expression_mock,
            projection_expression=projection_expression_mock,
            limit=100,
        ).table(table=table_resource_mock, table_keys=("pk", "sk"))

        with pytest.raises(DynamoQueryError):
            query.execute_dict({"key": "value"})

        query = DynamoQuery.build_query(
            key_condition_expression=ConditionExpression("key"),
            index_name="my_index",
            filter_expression=filter_expression_mock,
            projection_expression=projection_expression_mock,
            limit=100,
        ).table(table=table_resource_mock, table_keys=("pk", "sk"))

        with pytest.raises(DynamoQueryError):
            query.execute_dict({"key1": "value"})

        with pytest.raises(DynamoQueryError):
            query.execute(DataTable({"key": [1, 2], "b": [3]}))

        with pytest.raises(DynamoQueryError):
            query.execute(DataTable({"key": [3, DataTable.NOT_SET]}))

        with pytest.raises(DynamoQueryError):
            DynamoQuery.build_batch_get_item().table(
                table=table_resource_mock, table_keys=("pk", "sk")).execute(
                    DataTable({
                        "pk": ["test"],
                        "sk": [DataTable.NOT_SET]
                    }))

        with pytest.raises(DynamoQueryError):
            DynamoQuery.build_batch_get_item().table(
                table=table_resource_mock,
                table_keys=("pk", "sk")).execute(DataTable({"pk": ["test"]}))
 def test_has_column() -> None:
     data_table = DataTable({"a": [1, 2], "b": [3, 4]})
     assert data_table.has_column()
     assert data_table.has_column("a")
     assert data_table.has_column("b")
     assert data_table.has_column("b", "a")
     assert not data_table.has_column("c")
     assert not data_table.has_column("a", "c")
Пример #18
0
    def _execute_method_scan(
        self,
        data_table: DataTable,
    ) -> DataTable:
        self._validate_last_evaluated_key()
        self._validate_required_value_keys(data_table)

        result = DataTable[Dict[str, Any]]()
        for record in data_table.get_records():
            result.add_table(self._execute_paginated_query(data=record))
        return result
Пример #19
0
    def _execute_method_batch_get_item(self,
                                       data_table: DataTable) -> DataTable:
        self._validate_data_table_has_table_keys(data_table)

        record_chunks = chunkify(data_table.get_records(), self.MAX_BATCH_SIZE)
        table_name = self.table_resource.name
        response_table = DataTable[Dict[str, Any]]()
        for record_chunk in record_chunks:
            key_data_list = []
            for record in record_chunk:
                key_data = {
                    k: v
                    for k, v in record.items() if k in self.table_keys
                }
                key_data_list.append(key_data)
            request_items = {
                table_name: {
                    "Keys": key_data_list,
                    "ConsistentRead": self._consistent_read
                }
            }
            response = self._batch_get_item(
                RequestItems=request_items,
                **self._extra_params,
            )
            if response.get("Responses", {}).get(table_name):
                response_table.add_record(*response["Responses"][table_name])

        result = DataTable[Dict[str, Any]]()
        for record in data_table.get_records():
            key_data = {
                k: v
                for k, v in record.items() if k in self.table_keys
            }
            response_records = response_table.filter_records(
                key_data).get_records()
            for response_record in response_records:
                record.update(response_record)
            result.add_record(record)

        return result
    def test_filter_records_not_equals() -> None:
        data_table = DataTable({"a": [1, 2, 1], "b": [3, 4, 5]})
        assert data_table.filter_records({"a": 1},
                                         operand=Filter.NOT_EQUALS) == {
                                             "a": [2],
                                             "b": [4],
                                         }
        assert data_table.filter_records({
            "a": 2,
            "b": 4
        },
                                         operand=Filter.NOT_EQUALS) == {
                                             "a": [1, 1],
                                             "b": [3, 5],
                                         }

        assert data_table.filter_records({
            "a": 1,
            "b": 4
        },
                                         operand=Filter.NOT_EQUALS) == {
                                             "a": [1, 2, 1],
                                             "b": [3, 4, 5],
                                         }

        with pytest.raises(DataTableError):
            DataTable({
                "a": [1, 2, 1],
                "b": [3, 4]
            }).filter_records({"a": 1}, operand=Filter.NOT_EQUALS)
    def test_add_table() -> None:
        data_table = DataTable({"a": [5], "b": [6]})
        assert DataTable({
            "a": [1, 2],
            "b": [3, 4]
        }).add_table(data_table) == {
            "a": [1, 2, 5],
            "b": [3, 4, 6],
        }
        assert DataTable({
            "a": [1, 2],
            "b": [3, 4]
        }).add_table(data_table, data_table) == {
            "a": [1, 2, 5, 5],
            "b": [3, 4, 6, 6],
        }

        with pytest.raises(DataTableError):
            DataTable({
                "a": [1, 2],
                "b": [3, 4]
            }).add_table(DataTable({
                "a": [5],
                "b": []
            }))

        with pytest.raises(DataTableError):
            DataTable({"a": [5], "b": []}).add_table(data_table)
Пример #22
0
 def _execute_method_get_item(self, data_table: DataTable) -> DataTable:
     self._validate_data_table_has_table_keys(data_table)
     result = DataTable[Dict[str, Any]]()
     for record in data_table.get_records():
         key_data = {
             k: v
             for k, v in record.items() if k in self.table_keys
         }
         result_record = self._execute_item_query(key_data=key_data,
                                                  item_data=record)
         if result_record is not None:
             record.update(result_record)
         result.add_record(record)
     return result
    def test_builtin_copy() -> None:
        base_dict = {"a": [[1, 2, 3]]}
        data_table = DataTable(base_dict)

        data_table_copy = copy(data_table)
        assert isinstance(data_table_copy, DataTable)
        assert data_table_copy is not data_table
        assert data_table_copy["a"] is not data_table["a"]
        assert data_table_copy["a"][0] is base_dict["a"][0]

        data_table_deepcopy = deepcopy(data_table)
        assert isinstance(data_table_deepcopy, DataTable)
        assert data_table_deepcopy is not data_table
        assert data_table_deepcopy["a"] is not data_table["a"]
        assert data_table_deepcopy["a"][0] is not base_dict["a"][0]
    def test_get_record() -> None:
        data_table = DataTable({"a": [1, 2], "b": [3, 4]})
        assert data_table.get_record(0) == {"a": 1, "b": 3}
        assert data_table.get_record(1) == {"a": 2, "b": 4}

        with pytest.raises(DataTableError):
            data_table.get_record(2)

        with pytest.raises(DataTableError):
            DataTable({"a": [1, 2], "b": [3]}).get_record(2)
Пример #25
0
    def _execute_method_batch_update_item(self,
                                          data_table: DataTable) -> DataTable:
        self._validate_data_table_has_table_keys(data_table)

        record_chunks = chunkify(data_table.get_records(), self.MAX_BATCH_SIZE)
        table_name = self.table_resource.name
        for record_chunk in record_chunks:
            request_list = []
            for record in record_chunk:
                request_list.append({"PutRequest": {"Item": dict(record)}})
            request_items = {table_name: request_list}
            self._batch_write_item(
                RequestItems=request_items,
                **self._extra_params,
            )

        return data_table
Пример #26
0
    def batch_upsert_records(
            self,
            records: Iterable[_RecordType],
            set_if_not_exists_keys: Iterable[str] = (),
    ) -> None:
        """
        Upsert records to DB.

        See `DynamoTable.batch_upsert`.

        Arguments:
            records -- Full or partial records data.
            set_if_not_exists_keys -- List of keys to set only if they no do exist in DB.
        """
        for records_chunk in chunkify(records, self.max_batch_size):
            upsert_data_table = DataTable(
                record_class=self.record_class).add_record(*records_chunk)
            self.batch_upsert(upsert_data_table,
                              set_if_not_exists_keys=set_if_not_exists_keys)
Пример #27
0
    def _execute_method_update_item(self, data_table: DataTable) -> DataTable:
        self._validate_data_table_has_table_keys(data_table)
        self._validate_required_value_keys(data_table)

        result = DataTable[Dict[str, Any]]()
        for record in data_table.get_records():
            if self.UPDATE_EXPRESSION not in self._expressions:
                raise DynamoQueryError(
                    f"{self} must have {self.UPDATE_EXPRESSION} or `update` method."
                )
            key_data = {
                k: v
                for k, v in record.items() if k in self.table_keys
            }
            result_record = self._execute_item_query(
                key_data=key_data,
                item_data=record,
            )
            if result_record is not None:
                result.add_record(result_record)
        return result
Пример #28
0
    def _execute_method_batch_delete_item(self,
                                          data_table: DataTable) -> DataTable:
        self._validate_data_table_has_table_keys(data_table)

        record_chunks = chunkify(data_table.get_records(), self.MAX_BATCH_SIZE)
        table_name = self.table_resource.name
        for record_chunk in record_chunks:
            request_list = []
            for record in record_chunk:
                key_data = {
                    k: v
                    for k, v in record.items() if k in self.table_keys
                }
                request_item = {"DeleteRequest": {"Key": key_data}}
                if request_item not in request_list:
                    request_list.append(request_item)
            request_items = {table_name: request_list}
            self._batch_write_item(
                RequestItems=request_items,
                **self._extra_params,
            )

        return data_table
Пример #29
0
    def _execute_method_query(self, data_table: DataTable) -> DataTable:
        self._validate_last_evaluated_key()
        self._validate_required_value_keys(data_table)

        for operator in self._expressions[
                self.KEY_CONDITION_EXPRESSION].get_operators():
            if operator not in (
                    Operator.EQ.value,
                    Operator.LT.value,
                    Operator.GT.value,
                    Operator.LTE.value,
                    Operator.GTE.value,
                    Operator.BETWEEN.value,
                    Operator.BEGINS_WITH.value,
            ):
                raise DynamoQueryError(
                    f"{self.KEY_CONDITION_EXPRESSION} does not support operator"
                    f' "{operator}".')

        result = DataTable[Dict[str, Any]]()
        for record in data_table.get_records():
            result.add_table(self._execute_paginated_query(data=record))
        return result
Пример #30
0
    def batch_get_records(
            self,
            records: Iterable[_RecordType],
            consistent_read: bool = False) -> Iterator[_RecordType]:
        """
        Get records as an iterator from DB.

        See `DynamoTable.batch_get`.

        Arguments:
            records -- Full or partial records data.
            consistent_read -- `ConsistentRead` boto3 parameter.

        Yields:
            Found or not found record data.
        """
        for records_chunk in chunkify(records, self.max_batch_size):
            get_data_table = DataTable(
                record_class=self.record_class).add_record(*records_chunk)
            result_data_table = self.batch_get(get_data_table,
                                               consistent_read=consistent_read)
            for record in result_data_table.get_records():
                yield self._convert_record(record)