Exemplo n.º 1
0
    def __init__(self,
                 datastore,
                 group,
                 name,
                 timestamp=None,
                 write_mode='write'):
        if timestamp is None:
            timestamp = datastore.timestamp
        fieldtype = f'indexedstring'
        super().__init__(datastore, group, name, write_mode,
                         (('fieldtype', fieldtype), ('timestamp', timestamp),
                          ('chunksize', datastore.chunksize)))
        self.fieldtype = fieldtype
        self.timestamp = timestamp
        self.datastore = datastore
        self.chunk_accumulated = 0

        # self.values = np.zeros(self.datastore.chunksize, dtype=np.uint8)
        # self.indices = np.zeros(self.datastore.chunksize, dtype=np.int64)
        # self.ever_written = False
        # self.accumulated = 0
        # self.value_index = 0
        # self.index_index = 0
        # self.chunk_accumulated = 0
        if 'index' not in self.field.keys():
            DataWriter.write(self.field, 'index', [0], 1)
Exemplo n.º 2
0
 def write_part(self, values):
     timestamps = np.zeros(len(values), dtype=np.float64)
     for i in range(len(values)):
         value = values[i].strip()
         if value == b'':
             timestamps[i] = 0
         else:
             if len(value) == 32:
                 # ts = datetime.strptime(value.decode(), '%Y-%m-%d %H:%M:%S.%f%z')
                 ts = datetime(int(value[0:4]), int(value[5:7]),
                               int(value[8:10]), int(value[11:13]),
                               int(value[14:16]), int(value[17:19]),
                               int(value[20:26]))
             elif len(value) == 25:
                 # ts = datetime.strptime(value.decode(), '%Y-%m-%d %H:%M:%S%z')
                 ts = datetime(int(value[0:4]), int(value[5:7]),
                               int(value[8:10]), int(value[11:13]),
                               int(value[14:16]), int(value[17:19]))
             elif len(value) == 19:
                 ts = datetime(int(value[0:4]), int(value[5:7]),
                               int(value[8:10]), int(value[11:13]),
                               int(value[14:16]), int(value[17:19]))
             else:
                 raise ValueError(
                     f"Date field '{self.field}' has unexpected format '{value}'"
                 )
             timestamps[i] = ts.timestamp()
     DataWriter.write(self.field, 'values', timestamps, len(timestamps))
Exemplo n.º 3
0
 def write_part_raw(self, index, values):
     if index.dtype != np.int64:
         raise ValueError(f"'index' must be an ndarray of '{np.int64}'")
     if values.dtype not in (np.uint8, 'S1'):
         raise ValueError(f"'values' must be an ndarray of '{np.uint8}' or 'S1'")
     DataWriter.write(self.field, 'index', index, len(index))
     DataWriter.write(self.field, 'values', values, len(values))
Exemplo n.º 4
0
 def write(self, part):
     DataWriter.write(self._field,
                      self._name,
                      part,
                      len(part),
                      dtype=self._dataset.dtype)
     self.complete()
Exemplo n.º 5
0
    def write_part(self, values):
        """
        Writes a list of strings in indexed string form to a field.
        
        :param values: a list of utf8 strings
        """
        if not self.ever_written:
            self.indices[0] = self.accumulated
            self.index_index = 1
            self.ever_written = True

        for s in values:
            evalue = s.encode()
            for v in evalue:
                self.values[self.value_index] = v
                self.value_index += 1
                if self.value_index == self.datastore.chunksize:
                    DataWriter.write(self.field, 'values', self.values,
                                     self.value_index)
                    self.value_index = 0
                self.accumulated += 1
            self.indices[self.index_index] = self.accumulated
            self.index_index += 1
            if self.index_index == self.datastore.chunksize:
                DataWriter.write(self.field, 'index', self.indices,
                                 self.index_index)
                self.index_index = 0
Exemplo n.º 6
0
def timestamp_field_constructor(session,
                                group,
                                name,
                                timestamp=None,
                                chunksize=None):
    field = base_field_contructor(session, group, name, timestamp, chunksize)
    field.attrs['fieldtype'] = 'timestamp'
    DataWriter.write(field, 'values', [], 0, 'float64')
Exemplo n.º 7
0
def indexed_string_field_constructor(session,
                                     group,
                                     name,
                                     timestamp=None,
                                     chunksize=None):
    field = base_field_contructor(session, group, name, timestamp, chunksize)
    field.attrs['fieldtype'] = 'indexedstring'
    DataWriter.write(field, 'index', [], 0, 'int64')
    DataWriter.write(field, 'values', [], 0, 'uint8')
Exemplo n.º 8
0
def numeric_field_constructor(session,
                              group,
                              name,
                              nformat,
                              timestamp=None,
                              chunksize=None):
    field = base_field_contructor(session, group, name, timestamp, chunksize)
    field.attrs['fieldtype'] = 'numeric,{}'.format(nformat)
    field.attrs['nformat'] = nformat
    DataWriter.write(field, 'values', [], 0, nformat)
Exemplo n.º 9
0
def fixed_string_field_constructor(session,
                                   group,
                                   name,
                                   length,
                                   timestamp=None,
                                   chunksize=None):
    field = base_field_contructor(session, group, name, timestamp, chunksize)
    field.attrs['fieldtype'] = 'fixedstring,{}'.format(length)
    field.attrs['strlen'] = length
    DataWriter.write(field, 'values', [], 0, "S{}".format(length))
Exemplo n.º 10
0
    def write_part(self, values):

        timestamps = np.zeros(len(values), dtype=np.float64)
        for i in range(len(values)):
            value = values[i]
            if value == b'':
                timestamps[i] = 0
            else:
                ts = datetime.strptime(value.decode(), '%Y-%m-%d')
                timestamps[i] = ts.timestamp()
        DataWriter.write(self.field, 'values', timestamps, len(timestamps))
Exemplo n.º 11
0
 def flush(self):
     if self.value_index != 0 or 'values' not in self.field:
         DataWriter.write(self.field, 'values', self.values, self.value_index)
         self.value_index = 0
     if self.index_index != 0:
         DataWriter.write(self.field, 'index', self.indices, self.index_index)
         self.index_index = 0
     # self.field.attrs['fieldtype'] = self.fieldtype
     # self.field.attrs['timestamp'] = self.timestamp
     # self.field.attrs['chunksize'] = self.chunksize
     # self.field.attrs['completed'] = True
     super().flush()
Exemplo n.º 12
0
 def flush(self):
     key_strs = list()
     key_values = np.zeros(len(self.keys), dtype='int8')
     items = self.keys.items()
     for i, kv in enumerate(items):
         k, v = kv
         key_strs.append(k)
         key_values[i] = v
     DataWriter.write(self.field, 'key_values', key_values, len(key_values))
     DataWriter.write(self.field, 'key_names', key_strs, len(key_strs),
                      dtype=h5py.string_dtype())
     # self.field.attrs['fieldtype'] = self.fieldtype
     # self.field.attrs['timestamp'] = self.timestamp
     # self.field.attrs['chunksize'] = self.chunksize
     # self.field.attrs['completed'] = True
     super().flush()
Exemplo n.º 13
0
 def clear(self):
     self._accumulated = 0
     DataWriter.clear_dataset(self._field, self._index_name)
     DataWriter.clear_dataset(self._field, self._values_name)
     DataWriter.write(self._field, self._index_name, [], 0, 'int64')
     DataWriter.write(self._field, self._values_name, [], 0, 'uint8')
     self._index_dataset = self._field[self._index_name]
     self._values_dataset = self._field[self._values_name]
     self._accumulated = 0
Exemplo n.º 14
0
 def __init__(self, datastore, group, name, write_mode, attributes):
     self.trash_field = None
     if write_mode not in write_modes:
         raise ValueError(f"'write_mode' must be one of {write_modes}")
     if name in group:
         if write_mode == 'overwrite':
             field = group[name]
             trash = datastore.get_trash_group(field)
             dest_name = trash.name + f"/{name.split('/')[-1]}"
             group.move(field.name, dest_name)
             self.trash_field = trash[name]
             DataWriter.create_group(group, name, attributes)
         else:
             error = (f"Field '{name}' already exists. Set 'write_mode' to 'overwrite' "
                      "if you want to overwrite the existing contents")
             raise KeyError(error)
     else:
         DataWriter.create_group(group, name, attributes)
     self.field = group[name]
     self.name = name
Exemplo n.º 15
0
    def write_part(self, values):
        """
        Writes a list of strings in indexed string form to a field.

        :param values: a list of utf8 strings
        """
        # if not self.ever_written:
        #     self.indices[0] = self.accumulated
        #     self.index_index = 1
        #     self.ever_written = True
        #
        # for s in values:
        #     if isinstance(s, str):
        #         evalue = s.encode()
        #     else:
        #         evalue = s
        #
        #     for v in evalue:
        #         self.values[self.value_index] = v
        #         self.value_index += 1
        #         if self.value_index == self.datastore.chunksize:
        #             DataWriter.write(self.field, 'values', self.values, self.value_index)
        #             self.value_index = 0
        #         self.accumulated += 1
        #     self.indices[self.index_index] = self.accumulated
        #     self.index_index += 1
        #     if self.index_index == self.datastore.chunksize:
        #         DataWriter.write(self.field, 'index', self.indices, self.index_index)
        #         self.index_index = 0

        accumulated = self.field['index'][-1]
        indices = np.zeros(len(values), dtype=np.int64)
        chars = np.zeros(self.chunksize, dtype=np.uint8)
        index_index = 0
        char_index = 0
        for s in values:
            if isinstance(s, str):
                evalue = s.encode()
            else:
                evalue = s

            for v in evalue:
                chars[char_index] = v
                char_index += 1
                if char_index == len(chars):
                    DataWriter.write(self.field, 'values', chars, char_index)
                    char_index = 0
                accumulated += 1
            indices[index_index] = accumulated
            index_index += 1

        if index_index > 0:
            DataWriter.write(self.field, 'index', indices, index_index)
        if char_index > 0:
            DataWriter.write(self.field, 'values', chars, char_index)
Exemplo n.º 16
0
 def complete(self):
     if self._value_index != 0:
         DataWriter.write(self._field, self._values_name, self._raw_values,
                          self._value_index)
         self._value_index = 0
     if self._index_index != 0:
         if len(self._field['index']) == 0:
             DataWriter.write(self._field, self._index_name, [0], 1)
         DataWriter.write(self._field, self._index_name, self._raw_indices,
                          self._index_index)
         self._index_index = 0
Exemplo n.º 17
0
def categorical_field_constructor(session,
                                  group,
                                  name,
                                  nformat,
                                  key,
                                  timestamp=None,
                                  chunksize=None):
    field = base_field_contructor(session, group, name, timestamp, chunksize)
    field.attrs['fieldtype'] = 'categorical,{}'.format(nformat)
    field.attrs['nformat'] = nformat
    DataWriter.write(field, 'values', [], 0, nformat)
    key_values = [v for k, v in key.items()]
    key_names = [k for k, v in key.items()]
    DataWriter.write(field, 'key_values', key_values, len(key_values), 'int8')
    DataWriter.write(field, 'key_names', key_names, len(key_names),
                     h5py.special_dtype(vlen=str))
Exemplo n.º 18
0
 def write_part(self, part):
     for s in part:
         evalue = s.encode()
         for v in evalue:
             self._raw_values[self._value_index] = v
             self._value_index += 1
             if self._value_index == self._chunksize:
                 DataWriter.write(self._field, self._values_name,
                                  self._raw_values, self._value_index)
                 self._value_index = 0
             self._accumulated += 1
         self._raw_indices[self._index_index] = self._accumulated
         self._index_index += 1
         if self._index_index == self._chunksize:
             if len(self._field['index']) == 0:
                 DataWriter.write(self._field, self._index_name, [0], 1)
             DataWriter.write(self._field, self._index_name,
                              self._raw_indices, self._index_index)
             self._index_index = 0
Exemplo n.º 19
0
 def write_part(self, values):
     DataWriter.write(self.field, 'values', values, len(values))
Exemplo n.º 20
0
 def clear(self):
     """
     TODO: unlink the dataset
     """
     DataWriter._clear_dataset(self._field, self._name)
Exemplo n.º 21
0
 def write_part(self, values):
     if not np.issubdtype(values.dtype, np.dtype(self.nformat)):
         values = values.astype(self.nformat)
     DataWriter.write(self.field, 'values', values, len(values))
Exemplo n.º 22
0
 def complete(self):
     DataWriter.flush(self._field[self._name])
Exemplo n.º 23
0
 def flush(self):
     DataWriter.flush(self.field)
     if self.trash_field is not None:
         del self.trash_field