示例#1
0
 def process(self, name, X):
   X_new = []
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(X),
                             return_tuple=True)
   for _, x in enumerate(X):
     # apply the indices if _ in target_data
     if _ in data_idx:
       ndim = x.ndim
       axis = self.axis % ndim
       # just one index given
       if isinstance(self.slices, (slice, int)):
         indices = tuple([slice(None) if i != axis else self.slices
                          for i in range(ndim)])
         x = x[indices]
       # multiple indices are given
       else:
         indices = []
         for idx in self.slices:
           indices.append(tuple([slice(None) if i != axis else idx
                                 for i in range(ndim)]))
         x = np.concatenate([x[i] for i in indices], axis=self.axis)
       # check if array still contigous
       x = np.ascontiguousarray(x)
     X_new.append(x)
   return name, X_new
示例#2
0
 def process(self, name, X):
   # ====== not enough data points for sequencing ====== #
   if self.end == 'cut' and \
   any(x.shape[0] < self.frame_length for x in X):
     return None
   if self.end == 'ignore' and \
   any(x.shape[0] > self.frame_length for x in X):
     return None
   end = self.end
   if end == 'ignore':
     end = 'pad'
   # ====== preprocessing data-idx, label-idx ====== #
   data_idx = axis_normalize(axis=self.data_idx, ndim=len(X),
                             return_tuple=True)
   # ====== segments X ====== #
   X_new = []
   for idx, x in enumerate(X):
     ## for data
     if idx in data_idx:
       if end == 'mix':
         x = segment_axis(a=x,
                          frame_length=self.frame_length,
                          step_length=self.step_length, axis=0,
                          end='cut' if x.shape[0] >= self.frame_length else 'pad',
                          pad_value=self.pad_value, pad_mode=self.pad_mode)
       else:
         x = segment_axis(a=x,
                          frame_length=self.frame_length,
                          step_length=self.step_length, axis=0,
                          end=end, pad_value=self.pad_value,
                          pad_mode=self.pad_mode)
     ## for all
     X_new.append(x)
   return name, X_new
示例#3
0
 def process(self, name, X):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(X),
                             return_tuple=True)
   X = [np.expand_dims(x, axis=self.axis)
        if i in data_idx else x
        for i, x in enumerate(X)]
   return name, X
示例#4
0
 def shape_transform(self, shapes):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(shapes),
                             return_tuple=True)
   shapes = [(tuple(shp[:-1] + (shp[-1] // self.size - 2,)), ids)
             if i in data_idx else (shp, ids)
             for i, (shp, ids) in enumerate(shapes)]
   return shapes
示例#5
0
 def process(self, name, X):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(X),
                             return_tuple=True)
   if len(X) > 1 and len(data_idx) > 1:
     X_old = [x for i, x in enumerate(X) if i not in data_idx]
     X_new = [x for i, x in enumerate(X) if i in data_idx]
     X = [np.hstack(X_new)] + X_old
   return name, X
示例#6
0
 def process(self, name, X):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(X),
                             return_tuple=True)
   label_idx = axis_normalize(axis=self.label_idx,
                              ndim=len(X),
                              return_tuple=True)
   index = self._get_index(name)
   # ====== indexing ====== #
   X_new = []
   for i, x in enumerate(X):
     if i in data_idx:
       x = x[index]
       # if NOT label, normalization
       if self.mvn and i not in label_idx:
         x = _mvn(x, varnorm=self.varnorm)
     X_new.append(x)
   return name, X_new
示例#7
0
 def shape_transform(self, shapes):
   data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes), return_tuple=True)
   # ====== update the shape and indices ====== #
   new_shapes = []
   for idx, (shp, ids) in enumerate(shapes):
     if idx in data_idx:
       n_samples = shp[0]
       shp = (n_samples, self.length) + shp[1:]
     new_shapes.append((shp, ids))
   # ====== do the shape infer ====== #
   return new_shapes
示例#8
0
 def shape_transform(self, shapes):
   if self.delta > 0:
     data_idx = axis_normalize(axis=self.data_idx,
                               ndim=len(shapes),
                               return_tuple=True)
     n = (self.delta + 1) if self.keep_original else self.delta
     axis = self.axis
     shapes = [(shp, ids) if i not in data_idx else
               (shp[:axis] + (shp[axis] * n,) + shp[axis:], ids)
               for i, (shp, ids) in enumerate(shapes)]
   return shapes
示例#9
0
 def process(self, name, X):
   if self.delta > 0:
     data_idx = axis_normalize(axis=self.data_idx,
                               ndim=len(X),
                               return_tuple=True)
     X = [x if i not in data_idx else
          np.concatenate(
              ([x] if self.keep_original else []) +
              delta(x, order=self.delta, axis=self.axis),
              axis=self.axis)
          for i, x in enumerate(X)]
   return name, X
示例#10
0
 def process(self, name, X):
   # update the whiten
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(X),
                             return_tuple=True)
   pca_whiten = self._pca.whiten
   self._pca.whiten = self.whiten
   X = [self._pca.transform(x, n_components=self.nb_components)
        if i in data_idx else x
        for i, x in enumerate(X)]
   # reset the white value
   self._pca.whiten = pca_whiten
   return name, X
示例#11
0
文件: data.py 项目: imito/odin
 def __init__(self, data, axis=-1):
   data = as_tuple(data)
   if len(data) < 2:
     raise ValueError("2 or more Data must be given to `DataConcat`")
   if axis == 0:
     raise ValueError("Cannot concatenate axis=0")
   if len(set(d.ndim for d in data)) > 2:
     raise ValueError("All Data must have the same number of dimension (i.e. `ndim`)")
   if len(set(d.shape[0] for d in data)) > 2:
     raise ValueError("All Data must have the same length (i.e. first dimension)")
   super(DataConcat, self).__init__(data, read_only=True)
   self._is_data_list = False
   self._axis = axis_normalize(int(axis), ndim=data[0].ndim)
示例#12
0
 def process(self, name, X):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(X),
                             return_tuple=True)
   # ====== scaling features to [0, 1] ====== #
   X_new = []
   for i, x in enumerate(X):
     if i in data_idx:
       x = x.astype('float32')
       min_ = x.min(); max_ = x.max()
       x = (x - min_) / (max_ - min_)
       X_new.append(x)
   return name, X_new
示例#13
0
 def shape_transform(self, shapes):
   data_idx = axis_normalize(axis=self.data_idx, ndim=len(shapes),
                             return_tuple=True)
   # ====== update the indices ====== #
   new_shapes = []
   for idx, (shp, ids) in enumerate(shapes):
     if idx in data_idx:
       # transoform the indices
       n = 0; ids_new = []
       for name, n_samples in ids:
         ## MODE = cut
         if self.end == 'cut':
           if n_samples < self.frame_length:
             n_samples = 0
           else:
             n_samples = 1 + np.floor(
             (n_samples - self.frame_length) / self.step_length)
         ## MODE = ignore and pad
         elif self.end == 'ignore':
           if n_samples > self.frame_length:
             n_samples = 0
           else:
             n_samples = 1
         ## MODE = mix
         elif self.end == 'mix':
           if n_samples < self.frame_length:
             n_samples = 1
           else:
             n_samples = 1 + np.floor(
             (n_samples - self.frame_length) / self.step_length)
         ## MODE = pad or wrap
         else:
           if n_samples < self.frame_length:
             n_samples = 1
           else:
             n_samples = 1 + np.ceil(
             (n_samples - self.frame_length) / self.step_length)
         # make sure everything is integer
         n_samples = int(n_samples)
         if n_samples > 0:
           ids_new.append((name, n_samples))
         n += n_samples
       # transform the shape for data
       if idx in data_idx:
         feat_shape = (shp[-1],) if len(shp) >= 2 else ()
         mid_shape = tuple(shp[1:-1])
         shp = (n, self.frame_length,) + mid_shape + feat_shape
     # end
     new_shapes.append((shp, ids))
   return new_shapes
示例#14
0
 def process(self, name, X):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(X),
                             return_tuple=True)
   X_pooled = []
   for i, x in enumerate(X):
     if i in data_idx:
       shape = x.shape
       x = x[:, 2:-2]
       x = x.reshape(shape[0], -1, 2)
       x = self.pool_func(x, axis=-1)
       x = x.reshape(shape[0], -1)
     X_pooled.append(x)
   return name, X_pooled
示例#15
0
 def shape_transform(self, shapes):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(shapes),
                             return_tuple=True)
   new_shapes = []
   for idx, (shp, ids) in enumerate(shapes):
     if idx in data_idx:
       shp = list(shp)
       axis = self.axis if self.axis >= 0 else \
           (len(shp) + 1 - self.axis)
       shp.insert(axis, 1)
       shp = tuple(shp)
     new_shapes.append((shp, ids))
   return new_shapes
示例#16
0
 def process(self, name, X):
   data_idx = axis_normalize(axis=self.data_idx, ndim=len(X), return_tuple=True)
   # ====== stacking  ====== #
   X_new = []
   for idx, x in enumerate(X):
     # stack the data
     if idx in data_idx:
       if x.ndim == 1:
         x = np.expand_dims(x, axis=-1)
       feat_shape = x.shape[1:]
       x = stack_frames(x, frame_length=self.length,
                        step_length=1, keep_length=True, make_contigous=True)
       x = np.reshape(x, newshape=(-1, self.length) + feat_shape)
     X_new.append(x)
   return name, X_new
示例#17
0
文件: helper.py 项目: imito/odin
 def _apply(self, X, mask=None):
   def _step_fn(outs, ins):
     return [f(ins) for f in self._apply_ops]
   # ====== need to apply the ops to know initializer information ====== #
   ndim = X.shape.ndims
   axis = axis_normalize(self.time_axis, ndim=ndim)
   with tf.device("/cpu:0"):
     sample = tf.zeros_like(X)
     sample = sample[[slice(None, None) if i != axis else 0
                      for i in range(ndim)]]
   initializer = [tf.zeros_like(f(sample)) for f in self._apply_ops]
   # ====== scan ====== #
   outputs = K.scan_tensors(_step_fn,
                            sequences=X, mask=mask, initializer=initializer,
                            axis=axis,
                            backward=self.backward, reverse=self.reverse,
                            reshape_outputs=True)
   return outputs[0] if len(self._apply_ops) == 1 else outputs
示例#18
0
 def shape_transform(self, shapes):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(shapes),
                             return_tuple=True)
   new_shapes = []
   # ====== check if first dimension is sliced ====== #
   for idx, (shp, ids) in enumerate(shapes):
     if idx in data_idx:
       if self.axis == 0:
         ids = [(name, self._from_indices(length))
                for name, length in ids]
         n = sum(i[1] for i in ids)
         shp = (n,) + shp[1:]
       else:
         axis = self.axis % len(shp) # axis in case if negative
         # int indices, just 1
         n = self._from_indices(shp[axis])
         shp = tuple([j if i != axis else n
                      for i, j in enumerate(shp)])
     new_shapes.append((shp, ids))
   return new_shapes
示例#19
0
 def shape_transform(self, shapes):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(shapes),
                             return_tuple=True)
   shapes_new = []
   for i, (shp, ids) in enumerate(shapes):
     if i in data_idx:
       ids_new = []
       n_total = 0
       # ====== update the indices ====== #
       for name, _ in ids:
         # this take a lot of time, but
         # we only calculate new shapes once.
         index = self._get_index(name)
         n = np.sum(index)
         n_total += n
         ids_new.append((name, n))
       # ====== update the shape ====== #
       ids = ids_new
       shp = (n_total,) + shp[1:]
     shapes_new.append((shp, ids))
   return shapes_new
示例#20
0
 def shape_transform(self, shapes):
   data_idx = axis_normalize(axis=self.data_idx,
                             ndim=len(shapes),
                             return_tuple=True)
   # just 1 shape, nothing to merge
   if len(shapes) <= 1 or len(data_idx) <= 1:
     return shapes
   # merge
   old_shapes = []
   new_shapes = []
   for idx, (shp, ids) in enumerate(shapes):
     if idx in data_idx:
       new_shapes.append((shp, ids))
     else:
       old_shapes.append((shp, ids))
   # ====== horizontal stacking ====== #
   shape, ids = new_shapes[0]
   new_shapes = (
       shape[:-1] + (sum(shp[-1] for shp, _ in new_shapes),),
       ids
   )
   return [new_shapes] + old_shapes
示例#21
0
    def _apply(self, X, mask=None):
        def _step_fn(outs, ins):
            return [f(ins) for f in self._apply_ops]

        # ====== need to apply the ops to know initializer information ====== #
        ndim = X.shape.ndims
        axis = axis_normalize(self.time_axis, ndim=ndim)
        with tf.device("/cpu:0"):
            sample = tf.zeros_like(X)
            sample = sample[[
                slice(None, None) if i != axis else 0 for i in range(ndim)
            ]]
        initializer = [tf.zeros_like(f(sample)) for f in self._apply_ops]
        # ====== scan ====== #
        outputs = K.scan_tensors(_step_fn,
                                 sequences=X,
                                 mask=mask,
                                 initializer=initializer,
                                 axis=axis,
                                 backward=self.backward,
                                 reverse=self.reverse,
                                 reshape_outputs=True)
        return outputs[0] if len(self._apply_ops) == 1 else outputs
示例#22
0
 def process(self, name, X):
   X_normlized = []
   data_idx = axis_normalize(axis=self.data_idx, ndim=len(X),
                             return_tuple=True)
   for i, x in enumerate(X):
     if i in data_idx:
       x = x.astype('float32')
       # ====== global normalization ====== #
       if self.mean is not None and self.std is not None:
         x = (x - self.mean) / (self.std + 1e-20)
       # ====== perform local normalization ====== #
       if 'normal' in self.local_normalize or 'true' in self.local_normalize:
         x = ((x - x.mean(self.axis, keepdims=True)) /
              (x.std(self.axis, keepdims=True) + 1e-20))
       elif 'sigmoid' in self.local_normalize:
         min_, max_ = np.min(x), np.max(x)
         x = (x - min_) / (max_ - min_)
       elif 'tanh' in self.local_normalize:
         min_, max_ = np.min(x), np.max(x)
         x = 2 * (x - min_) / (max_ - min_) - 1
       elif 'mean' in self.local_normalize:
         x -= x.mean(0)
     X_normlized.append(x)
   return name, X_normlized
示例#23
0
def _get_data_label_idx(data_idx, label_idx, ndim):
  data_idx = axis_normalize(axis=data_idx, ndim=ndim, return_tuple=True)
  label_idx = axis_normalize(axis=label_idx, ndim=ndim, return_tuple=True)
  # exclude all index in label_idx
  data_idx = [i for i in data_idx if i not in label_idx]
  return data_idx, label_idx