示例#1
0
 def validate(self):
     """Checks if all field values are lists/arrays of the same length."""
     not_arr_error_mess = "Data-chunk field values must be numpy arrays " \
                          "or lists, while '%s' field contains: '%s'."
     not_same_len_error_mess = "All data-chunk field value arrays/lists " \
                               "must be of the same size."
     prev_len = None
     for k, v in self.items():
         if not isinstance(v, (list, np.ndarray)):
             raise DataChunkError(not_arr_error_mess % (k, type(v).__name__))
         curr_len = len(v)
         if prev_len is not None and prev_len != curr_len:
             raise DataChunkError(not_same_len_error_mess)
         prev_len = curr_len
示例#2
0
 def iter(self):
     """Creates a data-units generator."""
     if not self._is_valid():
         raise DataChunkError("Can't iterate over an invalid data-chunk.")
     for indx in range(len(self)):
         data_unit = self[indx]
         yield data_unit
示例#3
0
    def absorb_and_yield_if_full(self, data_chunk):
        for indx in range(len(data_chunk)):
            group_id = data_chunk[indx, self.id_fname]

            if self._prev_group_id and group_id != self._prev_group_id:
                for chunk in self.yield_remaining():
                    yield chunk
                self.reset()
            self._prev_group_id = group_id

            if not len(self._coll):
                for fn in data_chunk.fnames:
                    if isinstance(data_chunk[fn], np.ndarray):
                        self._coll[fn] = np.array([],
                                                  dtype=data_chunk[fn].dtype)
                    else:
                        self._coll[fn] = []

            for fn in data_chunk.fnames:
                val = data_chunk[indx, fn]
                if fn not in self._coll:
                    raise DataChunkError("Input chunks have different field "
                                         "names.")
                if isinstance(self._coll[fn], np.ndarray):
                    self._coll[fn] = np.append(self._coll[fn], val)
                else:
                    self._coll[fn].append(val)
    def append(self, data_unit):
        """
        Appends a new data-unit to the end of a valid data-chunk.

        :param data_unit: data unit or dict with field name and value pairs.
        """
        allowed_types = (dict, OrderedDict, DataUnit)
        rpr = [at.__name__ for at in allowed_types]
        if not isinstance(data_unit, allowed_types):
            raise TypeError("'data_unit' must be %s." % " or ".join(rpr))
        if not self._is_valid():
            raise DataChunkError("Can't append a new data-unit to an "
                                 "invalid data-chunk.")
        if len(self.fnames) > 0:
            for k in data_unit:
                if k not in self:
                    raise ValueError(
                        "Please provide all keys matching existing "
                        "field names.")
        else:
            for k in data_unit:
                if isinstance(data_unit, DataUnit):
                    ds_type = data_unit.ds_type(k)
                    if ds_type == list:
                        ds = []
                    elif ds_type == np.ndarray:
                        ds = np.array([])
                    else:
                        raise TypeError("Can't handle '%s' type." % ds_type)
                else:
                    ds = []
                self[k] = ds

        for k in data_unit:
            if isinstance(self[k], np.ndarray):
                self[k] = np.append(self[k], data_unit[k])
            elif isinstance(self[k], list):
                self[k].append(data_unit[k])
            else:
                raise NotImplementedError
示例#5
0
 def _valid(dc):
     if not isinstance(dc, DataChunk):
         raise DataChunkError("The data-chunk is an invalid object.")
     data_chunk.validate()