示例#1
0
 def test_last_row_simple(self):
     s = self.scheduler()
     t1 = Table(name=get_random_name("cst1"),
                data={
                    'xmin': [1],
                    'xmax': [2]
                })
     t2 = Table(name=get_random_name("cst2"),
                data={
                    'ymin': [3],
                    'ymax': [4]
                })
     cst1 = Constant(t1, scheduler=s)
     cst2 = Constant(t2, scheduler=s)
     join = Join(scheduler=s)
     join.input.table = cst1.output.table
     join.input.table = cst2.output.table
     pr = Print(proc=self.terse, scheduler=s)
     pr.input.df = join.output.table
     s.start()
     s.join()
     #res = join.trace_stats(max_runs=1)
     #pd.set_option('display.expand_frame_repr', False)
     #print(res)
     df = join.table()
     last = df.last()
     self.assertTrue(last['xmin']==1 and last['xmax']==2 and \
                     last['ymin']==3 and last['ymax']==4)
示例#2
0
    def __init__(self,
                 name,
                 index,
                 base=None,
                 storagegroup=None,
                 dshape=None,
                 fillvalue=None,
                 shape=None,
                 chunks=None,
                 data=None,
                 indices=None):
        """Create a new column.

        if index is None and self.index return None, a new index and dataset are created.
        """
        super(Column, self).__init__(name, index, base=base)
        if storagegroup is None:
            if index is not None:
                storagegroup = index.storagegroup
            else:
                storagegroup = Group.default(name=get_random_name('column_'))
        self._storagegroup = storagegroup
        self.dataset = None
        self._dshape = None
        if self.index is None:
            if data is not None:  # check before creating everything
                l = len(data)
                if indices and l != len(indices):
                    raise ValueError('Bad index length (%d/%d)', len(indices),
                                     l)
            self._complete_column(dshape, fillvalue, shape, chunks, data)
            if data is not None:
                self.append(data, indices)
示例#3
0
    def __init__(
        self,
        name: Optional[str],
        data: Any = None,
        dshape: Optional[Union[str, DataShape]] = None,
        fillvalues: Optional[Dict[str, Any]] = None,
        storagegroup: Optional[Group] = None,
        chunks: Optional[Chunks] = None,
        create: Optional[bool] = None,
        indices: Optional[Index] = None,
    ):
        # pylint: disable=too-many-arguments, too-many-branches
        super(Table, self).__init__()
        if not (fillvalues is None or isinstance(fillvalues, Mapping)):
            raise ValueError(
                "Invalid fillvalues (%s) should be None or a dictionary" %
                fillvalues)
        if not (chunks is None or isinstance(chunks,
                                             (integer_types, Mapping))):
            raise ValueError(
                "Invalid chunks (%s) should be None or a dictionary" % chunks)
        if data is not None:
            if create is not None and create is not True:
                logger.warning("creating a Table with data and create=False")
            create = True

        self._chunks = chunks
        # self._nrow = 0
        self._name: str = get_random_name("table_") if name is None else name
        # TODO: attach all randomly named tables to a dedicated, common parent node
        if not (storagegroup is None or isinstance(storagegroup, Group)):
            raise ValueError(
                "Invalid storagegroup (%s) should be None or a Group" %
                storagegroup)
        if storagegroup is None:
            assert Group.default
            storagegroup = Group.default(self._name, create=create)
        if storagegroup is None:
            raise RuntimeError("Cannot get a valid default storage Group")
        self._storagegroup = storagegroup
        if dshape is None:
            if data is None:
                self._dshape = EMPTY_DSHAPE
            else:
                data = self.parse_data(data)
                self._dshape = dshape_extract(data) or EMPTY_DSHAPE
        else:
            self._dshape = dshape_create(dshape)
            assert dshape_table_check(self._dshape)
        if create and self._dshape is EMPTY_DSHAPE:
            raise ValueError("Cannot create a table without a dshape")
        if self._dshape is EMPTY_DSHAPE or (not create and metadata.ATTR_TABLE
                                            in self._storagegroup.attrs):
            self._load_table()
        else:
            self._create_table(fillvalues or {})
        if data is not None:
            self.append(data, indices=indices)
示例#4
0
    def __init__(self,
                 name,
                 data=None,
                 dshape=None,
                 fillvalues=None,
                 storagegroup=None,
                 chunks=None,
                 create=None,
                 indices=None):
        # pylint: disable=too-many-arguments, too-many-branches
        super(Table, self).__init__()
        if not (fillvalues is None or isinstance(fillvalues, Mapping)):
            raise ValueError(
                'Invalid fillvalues (%s) should be None or a dictionary' %
                fillvalues)
        if not (chunks is None or isinstance(chunks,
                                             (integer_types, Mapping))):
            raise ValueError(
                'Invalid chunks (%s) should be None or a dictionary' % chunks)
        if data is not None:
            if create is not None:
                logger.warning('creating a Table with data and create=False')
            create = True

        self._chunks = chunks
        #self._nrow = 0
        self._name = get_random_name('table_') if name is None else name
        # TODO: attach all randomly named tables to a dedicated, common parent node
        if not (storagegroup is None or isinstance(storagegroup, Group)):
            raise ValueError(
                'Invalid storagegroup (%s) should be None or a Group' %
                storagegroup)
        if storagegroup is None:
            storagegroup = Group.default(self._name, create=create)
        if storagegroup is None:
            raise RuntimeError('Cannot get a valid default storage Group')
        self._storagegroup = storagegroup
        if dshape is None:
            if data is None:
                self._dshape = None
            else:
                data = self.parse_data(data)
                self._dshape = dshape_extract(data)
        else:
            self._dshape = dshape_create(dshape)
            assert dshape_table_check(self._dshape)
        if create and self._dshape is None:
            raise ValueError('Cannot create a table without a dshape')
        if self._dshape is None or (not create and metadata.ATTR_TABLE
                                    in self._storagegroup.attrs):
            self._load_table()
        else:
            self._create_table(fillvalues or {})
        if data is not None:
            self.append(data, indices=indices)
示例#5
0
 def __init__(self,
              name: Optional[str] = None,
              parent: Optional[GroupImpl] = None):
     if name is None:
         name = get_random_name("mmapstorage_")
     super(MMapGroup, self).__init__(name, parent=parent)
     if parent is not None:
         if name in parent.dict:
             raise ValueError(
                 "Cannot create group {}, already exists".format(name))
         parent.dict[name] = self
     self._is_init = False
示例#6
0
 def test_last_row_simple(self) -> None:
     s = self.scheduler()
     t1 = Table(name=get_random_name("cst1"), data={"xmin": [1], "xmax": [2]})
     t2 = Table(name=get_random_name("cst2"), data={"ymin": [3], "ymax": [4]})
     cst1 = Constant(t1, scheduler=s)
     cst2 = Constant(t2, scheduler=s)
     join = Join(scheduler=s)
     join.input[0] = cst1.output.result
     join.input[0] = cst2.output.result
     pr = Print(proc=self.terse, scheduler=s)
     pr.input[0] = join.output.result
     aio.run(s.start())
     # res = join.trace_stats(max_runs=1)
     # pd.set_option('display.expand_frame_repr', False)
     # print(res)
     last = notNone(join.table.last())
     self.assertTrue(
         last["xmin"] == 1
         and last["xmax"] == 2
         and last["ymin"] == 3
         and last["ymax"] == 4
     )
示例#7
0
 def create_group(name=None, create=True):
     root = StorageEngine.engines()['mmap']
     if name in root.dict:
         if create:
             name = get_random_name(name[:16]+'_')
         else:
             return root.dict[name]
         # TODO : specify this behaviour
         #grp = root.dict[name]
         #if not isinstance(grp, MMapGroup):
         #     raise ValueError("{} already exists and is not a group".format(name))
         #return grp
     return MMapGroup(name, parent=root)
示例#8
0
    def __init__(
        self,
        name: str,
        index: Optional[IndexTable],
        base: Optional[BaseColumn] = None,
        storagegroup: Optional[Group] = None,
        dshape: Optional[Union[None, DataShape, str]] = None,
        fillvalue: Optional[Any] = None,
        shape: Optional[Shape] = None,
        chunks: Optional[Chunks] = None,
        indices: Optional[Index] = None,
        data: Optional[Any] = None,
    ) -> None:
        """Create a new column.

        if index is None and self.index return None, a new index and
        dataset are created.
        """
        indexwasnone: bool = index is None
        if index is None:
            if data is not None:  # check before creating everything
                length = len(data)
                if indices and length != len(indices):
                    raise ValueError("Bad index length (%d/%d)", len(indices),
                                     length)
            index = IndexTable()
        super(Column, self).__init__(name, index, base=base)
        if storagegroup is None:
            if index is not None and hasattr(index, "storagegroup"):
                # i.e. isinstance(index, Table)
                storagegroup = getattr(index, "storagegroup")
                assert isinstance(storagegroup, Group)
            else:
                assert Group.default
                storagegroup = Group.default(name=get_random_name("column_"))
        self._storagegroup = storagegroup
        self.dataset: Optional[Dataset] = None
        self._dshape: DataShape = EMPTY_DSHAPE
        if isinstance(dshape, DataShape):
            self._dshape = dshape
        elif isinstance(dshape, str):
            self._dshape = dshape_create(dshape)
        if indexwasnone:
            self._complete_column(dshape, fillvalue, shape, chunks, data)
            if data is not None:
                self.append(data, indices)
示例#9
0
    def __init__(self, column, min_column=None, max_column=None, reset_index=False, **kwds):
        self._add_slots(kwds,'input_descriptors',
                        [SlotDescriptor('table', type=Table, required=True)])
        super(Stats, self).__init__(table_slot='stats', **kwds)
        self._column = column
        self.default_step_size = 10000

        if min_column is None:
            min_column = '_' + str(column) + '_min'
        if max_column is None:
            max_column = '_' + str(column) + '_max'
        self._min_column = min_column
        self._max_column = max_column
        self._reset_index = reset_index
        # self.schema = [(self._min_column, np.dtype(float), np.nan),
        #                (self._max_column, np.dtype(float), np.nan),]
        self.schema = '{'+self._min_column+': float64, '+self._max_column+': float64}'
        self._table = Table(get_random_name('stats_'), dshape=self.schema)
示例#10
0
 def create_group(name: Optional[str] = None, create: bool = True) -> Group:
     root = StorageEngine.engines()["mmap"]
     assert isinstance(root, GroupImpl)
     if name in root.dict:
         if create:
             name = get_random_name(name[:16] + "_")
         else:
             group = root.dict[name]
             if isinstance(group, GroupImpl):
                 return group
             raise ValueError(
                 f"Cannot create group {name}, already exists as {type(group)}"
             )
         # TODO : specify this behaviour
         # grp = root.dict[name]
         # if not isinstance(grp, MMapGroup):
         #     raise ValueError("{} already exists and is not a group".format(name))
         # return grp
     if create is False:
         raise ValueError(f"group {name} does not exist")
     return MMapGroup(name, parent=root)
示例#11
0
 def __init__(self, name=None, parent=None):
     if name is None:
         name = get_random_name("mmapstorage_")
     super(MMapGroup, self).__init__(name, parent=parent)
     self._directory = self.path()
     metadata = os.path.join(self._directory, METADATA_FILE)
     self._metadata = metadata
     if os.path.exists(self._directory):
         if not os.path.isdir(self._directory):
             raise OSError('Cannot create group %s'%self._directory)
         if not os.path.isfile(metadata):
             raise ValueError('Cannot create group %s, "unsuitable directory'%
                              self._directory)
         _read_attributes(self._attrs.attrs, metadata)
     else:
         os.makedirs(self._directory) # can raise exceptions
         _write_attributes(self._attrs.attrs, metadata)
     if parent is not None:
         if name in parent.dict:
             raise ValueError('Cannot create group {}, already exists'.format(name))
         parent.dict[name] = self
示例#12
0
    def __init__(self,
                 column: Union[str, int],
                 min_column: Optional[str] = None,
                 max_column: Optional[str] = None,
                 reset_index: bool = False,
                 **kwds: Any) -> None:
        super(Stats, self).__init__(**kwds)
        self._column = column
        self.default_step_size = 10000

        if min_column is None:
            min_column = "_" + str(column) + "_min"
        if max_column is None:
            max_column = "_" + str(column) + "_max"
        self._min_column: str = min_column
        self._max_column: str = max_column
        self._reset_index = reset_index
        # self.schema = [(self._min_column, np.dtype(float), np.nan),
        #                (self._max_column, np.dtype(float), np.nan),]
        self.schema = ("{" + self._min_column + ": float64, " +
                       self._max_column + ": float64}")
        self.result = Table(get_random_name("stats_"), dshape=self.schema)