Пример #1
0
 def test_from_rows(self):
     c = ColumnGroup.from_rows((int, str), [("colA", "colB"), (1, "a"),
                                            (2, "b"), (3, "c")])
     self.assertEqual(
         ColumnGroup([
             Column("colA", int, [1, 2, 3]),
             Column("colB", str, ["a", "b", "c"])
         ]), c)
Пример #2
0
def read_csv(csv_path: Union[str, Path],
             mcsv_path: Optional[Union[str, Path]] = None,
             nrows=100) -> Optional[Data]:
    if isinstance(csv_path, str):
        csv_path = Path(csv_path)
    if mcsv_path is None:
        mcsv_path = csv_path.with_suffix(".mcsv")

    if not mcsv_path.is_file():
        missing_mcsv(csv_path)  # util command to open a window
        return None

    with mcsv.open_csv(csv_path, "r", mcsv_path) as mcsv_reader:
        if nrows >= 0:
            reader = islice(mcsv_reader, nrows)
        else:
            reader = mcsv_reader
        header = [to_standard(n) for n in next(reader)]
        column_group = ColumnGroup([
            Column(name, description,
                   values) for name, description, *values in zip(
                       header, mcsv_reader.descriptions, *reader)
        ])

        return Data(
            column_group,
            DataSource.create(to_standard(csv_path.stem), csv_path,
                              mcsv_reader.meta_csv_data))
Пример #3
0
def data_from_rows(col_types, rows):
    columns = list(zip(*rows))
    return Data(
        ColumnGroup([
            Column(col[0], col_type, col[1:])
            for col_type, col in zip(col_types, columns)
        ]), None)
Пример #4
0
    def update(self, func, col_name=None, col_type=None):
        """
        Update some column using a function.

        Syntax: `data[x].update(func)`

        * `x` is an index
        * `func` is a function of `data[x]` (use numeric indices)

        >>> test_data = original_test_data.copy()
        >>> print(test_data)
         A B C D
         1 3 2 4
         5 2 2 7
         3 4 7 8
        >>> test_data[1].update(lambda x: x*3)
        >>> print(test_data)
         A  B C D
         1  9 2 4
         5  6 2 7
         3 12 7 8
        """
        assert len(self._indices) == 1
        index = self._indices[0]
        column = self._data_column_group[index]
        if col_type is None:
            col_type = self._get_new_col_type(func, column.col_type)

        if col_name is None:
            col_name = column.name

        columns = self._data_column_group.columns
        columns[index] = Column(col_name, col_type,
                                [func(v) for v in column.col_values])
Пример #5
0
 def _append_other_columns_and_put_rows(self, other_handle: "DataHandle",
                                        new_rows):
     columns = [
         Column(col.name, col.col_type, []) for col in itertools.chain(
             self._data_column_group, other_handle._data_column_group)
     ]
     column_group = ColumnGroup(columns)
     column_group.replace_rows(new_rows)
     self._data_column_group.replace_columns(column_group.columns)
Пример #6
0
    def merge(self, func, col_name, col_type=None):
        """
        Create a new col by merging some columns. Those columns are
        consumed during the process.

        Syntax: `data[x].merge(func, col_name, [col_type])`

        * `x` is an index, slice or tuple of slices/indices of column_index
        * `func` is the function to apply to `x` values
        * `col_name` is the name of the new column
        * `col_type` is the type of the new column

        >>> test_data = original_test_data.copy()
        >>> print(test_data)
         A B C D
         1 3 2 4
         5 2 2 7
         3 4 7 8
        >>> test_data[:3].merge(lambda x, y, z: x+y+z, "E", int)
        >>> print(test_data)
          E D
          6 4
          9 7
         14 8
        """
        if col_type is None:
            col_type = self._get_new_col_type(func, Any)

        # TODO: check if indices are always sorted
        columns = [
            col for i, col in enumerate(self._data_column_group.columns)
            if i not in self._indices[1:]
        ]
        column = Column(
            col_name, col_type,
            [func(*vs) for vs in self._data_column_group.rows(self._indices)])

        columns[self._indices[0]] = column

        self._data_column_group.replace_columns(columns)
Пример #7
0
    def create(self, func, col_name, col_type=None, index=None):
        """
        Create a new col

        Syntax: `data[x].create(func, col_name, [col_type, [index]])`

        * `x` is an index, slice or tuple of slices/indices of column_index
        * `func` is the function to apply to `x` values
        * `col_name` is the name of the new column
        * `col_type` is the type of the new column
        * `index` is the index of the new column
        >>> test_data = original_test_data.copy()
        >>> print(test_data)
         A B C D
         1 3 2 4
         5 2 2 7
         3 4 7 8
        >>> test_data[:3].create(lambda x, y, z: x+y+z, "E", int, 1)
        >>> print(test_data)
         A  E B C D
         1  6 3 2 4
         5  9 2 2 7
         3 14 4 7 8
        """
        if col_type is None:
            col_type = self._get_new_col_type(func, Any)

        columns = self._data_column_group.columns
        column = Column(
            col_name, col_type,
            [func(*vs) for vs in self._data_column_group.rows(self._indices)])

        if index is None:
            columns.append(column)
        else:
            columns.insert(index, column)

        self._data_column_group.replace_columns(columns)
Пример #8
0
 def test_col_eq(self):
     self.assertEqual(Column("colA", int, [1, 2, 3]),
                      Column("colA", int, [1, 2, 3]))
Пример #9
0
                    description = TextFieldDescription.INSTANCE
                b.description_by_col_index(i, description)
            meta_csv_data = b.build()
        else:
            b = MetaCSVDataBuilder()
            cur_data = self._data_source.meta_csv_data
            b.encoding(cur_data.encoding)
            b.bom(cur_data.bom)
            b._dialect = cur_data.dialect
            for i, col in enumerate(self._column_group):
                description = self._data_source.get_description(col.col_info)
                b.description_by_col_index(i, description)
            meta_csv_data = b.build()
        return meta_csv_data


if __name__ == "__main__":
    import doctest

    doctest.testmod(
        extraglobs={
            'original_test_data':
            Data(
                ColumnGroup([
                    Column("A", int, [1, 5, 3]),
                    Column("B", int, [3, 2, 4]),
                    Column("C", int, [2, 2, 7]),
                    Column("D", int, [4, 7, 8])
                ]), None)
        })