Пример #1
0
    def test_full_monty(self):
        txt = "sum:c|Sum of Fun"
        ag = Aggy.parse_aggy_string(txt)
        self.assertEqual(ag.slug, "sum")
        self.assertEqual(ag.title, "Sum of Fun")
        self.assertEqual(ag.agg_args, ["c"])

        txt = 'count:a,"Hello,world"|"Counts, are |Fun|"'
        ag = Aggy.parse_aggy_string(txt)
        self.assertEqual(ag.slug, "count")
        self.assertEqual(ag.title, "Counts, are |Fun|")
        self.assertEqual(ag.agg_args, ["a", "Hello,world"])
Пример #2
0
    def test_custom_title(self):
        ag = Aggy.parse_aggy_string("count|Hello")
        self.assertEqual(ag.title, "Hello")
        self.assertEqual(ag.agg_args, [])
        self.assertEqual(ag.slug, "count")

        ag = Aggy.parse_aggy_string('count|"Hello,goodday|and bye"')
        self.assertEqual(
            ag.title,
            "Hello,goodday|and bye",
        )
        self.assertEqual(ag.slug, "count")
Пример #3
0
 def test_basic(self):
     ag = Aggy.parse_aggy_string("count")
     assert isinstance(ag, Aggy)
     self.assertEqual(ag.slug, "count")
     self.assertEqual(ag.title, "count_of")
     self.assertEqual(ag.agg_args, [])
     assert isinstance(ag.aggregation, agate.Aggregation)
Пример #4
0
    def _validate_aggy_column_arguments(self, aggy: Aggy,
                                        table: agate.Table) -> NoReturnType:
        """
        Aggy is csvpivot/agate.Table agnostic, so this method:

        - makes sure that aggy's ostensible column_name argument is actually in the table
        - typecasts the second argument of count() to match the datatype of the column that it counts

        """

        ############################
        # checking valid column name
        # (For now, all possible Aggregates use the first argument (if it exists) as the column_name to aggregate)
        if aggy._args:
            _col = aggy._args[0]
            # the following is redundant with the filter_rows() check already done
            if _col not in table.column_names:
                raise ColumnNameError(
                    "ColumnNameError: " +
                    f"Attempted to perform `{aggy.slug}('{_col}', ...)`. " +
                    f"But '{_col}' was expected to be a valid column name, i.e. from: {table.column_names}",
                )

        ################################################################
        # if the aggregation is count(), and there are 2 arguments
        #   then the 2nd argument is typecasted against the table.column
        #   (i.e. the column name referenced by the first arg)
        if len(aggy._args) > 1 and aggy.slug == "count":
            col_name, cval = aggy.agg_args[0:2]
            try:
                # get column from first arg, which is presumably a column_name
                _col: agate.Column = next(c for c in table.columns
                                          if c.name == col_name)
            except StopIteration as err:
                raise ColumnIdentifierError(
                    f"'{col_name}' – from `{argtext}` – is expected to be a column name, but it was not found in the table: {table.column_names}"
                )
            else:
                dtype = _col.data_type

            # attempt a data_type conversion
            try:
                dval = dtype.cast(cval)
                # modify agg_args
            except agate.CastError as err:
                typename: str = type(dtype).__name__
                raise agate.CastError(
                    f"You attempted to count '{cval}' in column '{col_name}', which has datatype {typename}. But '{cval}' could not be converted to {typename}."
                )
            else:
                aggy._args[1] = dval
Пример #5
0
    def main(self):
        if self.additional_input_expected():
            self.argparser.error(
                "You must provide an input file or piped data.")

        # UniformReader (DRY later)
        self.read_input()
        if self.is_empty:
            return
        # TODO ugly access of internal variables, DRY later:
        self._rows = self._filter_input_rows(self.i_rows, self.i_column_names)
        self._column_names = self.i_rows.column_names

        # extract aggies
        aggies: list
        if not self.args.aggregates_list:
            # set default aggregation if there were none
            aggies = [Aggy.parse_aggy_string("count")]
        else:
            aggies = self.args.aggregates_list.copy()

        for a in aggies:
            self._validate_aggy_column_arguments(a, table=self.i_rows)

        outtable: agate.Table
        outtable = self.i_rows

        if self.pivot_column_name:
            # in this mode, only one aggregation is allowed. This is enforced inside run()
            # Also, this aggregation:
            # - is performed for each group i.e. cell of the aggregated data
            # - aggy.title is not used and thus ignored
            #   - TODO: warn user that title is ignored?
            outtable = outtable.pivot(
                key=self.pivot_row_names or None,
                pivot=self.pivot_column_name or None,
                aggregation=aggies[0].aggregation,
            )
        else:
            # user wants multiple aggregations for rows, so this is essentially a group_by
            for rcol in self.pivot_row_names:
                outtable = outtable.group_by(key=rcol)
            outtable = outtable.aggregate([(a.title, a.aggregation)
                                           for a in aggies])

        outtable.to_csv(self.output_file, **self.writer_kwargs)
        return 0
Пример #6
0
 def __call__(self, parser, namespace, values, option_string=None):
     aggy = Aggy.parse_aggy_string(values)
     super().__call__(parser, namespace, aggy, option_string)
Пример #7
0
 def test_pipes_everywhere(self):
     txt = 'count:hello,"foo|bar"|"Actual Title|really"'
     ag = Aggy.parse_aggy_string(txt)
     self.assertEqual(ag.slug, "count")
     self.assertEqual(ag.agg_args, ["hello", "foo|bar"])
     self.assertEqual(ag.title, "Actual Title|really")
Пример #8
0
 def test_with_multiple_args(self):
     ag = Aggy.parse_aggy_string(r'count:hello,"foo, Bar!t"')
     self.assertEqual(ag.agg_args, ["hello", "foo, Bar!t"])
     self.assertEqual(ag.slug, "count")
     self.assertEqual(ag.title, "count_of_hello_foo_bar_t")
Пример #9
0
 def test_with_single_arg(self):
     ag = Aggy.parse_aggy_string("sum:c")
     self.assertEqual(ag.agg_args, ["c"])
     self.assertEqual(ag.slug, "sum")
     self.assertEqual(ag.title, "sum_of_c")