Пример #1
0
 def iterate_seqs(self,
                  chunk_size=None,
                  chunk_step=None,
                  used_data_keys=None):
     """
 Takes chunking into consideration.
 :param int chunk_size:
 :param int chunk_step:
 :param set(str)|None used_data_keys:
 :return: generator which yields tuples (seq index, seq start, seq end)
 :rtype: list[(int,NumbersDict,NumbersDict)]
 """
     if chunk_size is None:
         chunk_size = self.chunk_size
     if chunk_step is None:
         chunk_step = self.chunk_step
     s = 0
     while self.is_less_than_num_seqs(s):
         length = self.get_seq_length(s)
         if chunk_size == 0:
             yield (s, length.constant_like(0), length)
         else:
             if used_data_keys is not None:
                 length = NumbersDict(
                     {k: length[k]
                      for k in used_data_keys})
             t = length.constant_like(0)
             default_key = "data"
             # There are usually the 'data' (input) and 'classes' (targets) data-keys in `length` but there can be others.
             # We expect them all of the same length so that we can do chunking.
             # In case that some length is 0 or 1,
             # we treat it special and always return the full seq repeated for every chunk.
             keys_with_full_seqs = []
             for key in length.keys():
                 if length[key] == length[default_key]:
                     continue  # ok
                 if length[key] <= 1:
                     keys_with_full_seqs.append(key)
                     continue
                 raise Exception(
                     "Chunking with multiple data-keys of different length: %r"
                     % length)
             while length[default_key] > t[default_key]:
                 chunk_start = NumbersDict(t)
                 chunk_end = NumbersDict.min([t + chunk_size, length])
                 for key in keys_with_full_seqs:
                     chunk_start[key] = 0
                     chunk_end[key] = length[key]
                 if length.value is None:
                     chunk_start.value = None
                     chunk_end.value = None
                 yield (s, chunk_start, chunk_end)
                 t += chunk_step
                 if length[default_key] - t[
                         default_key] <= self.min_chunk_size:
                     break
         s += 1
Пример #2
0
 def iterate_seqs(self,
                  chunk_size=None,
                  chunk_step=None,
                  used_data_keys=None):
     """
 Takes chunking into consideration.
 :param int|NumbersDict chunk_size:
 :param int|NumbersDict chunk_step:
 :param set(str)|None used_data_keys:
 :return: generator which yields tuples (seq index, seq start, seq end)
 :rtype: list[(int,NumbersDict,NumbersDict)]
 """
     if chunk_size is None:
         chunk_size = self.chunk_size
     if chunk_step is None:
         chunk_step = self.chunk_step
     chunk_size = NumbersDict(chunk_size)
     chunk_step = NumbersDict(chunk_step)
     s = 0
     while self.is_less_than_num_seqs(s):
         length = self.get_seq_length(s)
         if chunk_size == 0:
             yield (s, NumbersDict.constant_like(0, numbers_dict=length),
                    length)
         else:
             default_key = "data"
             if used_data_keys is not None:
                 length = NumbersDict(
                     {k: length[k]
                      for k in used_data_keys})
                 if default_key not in used_data_keys:
                     default_key = sorted(used_data_keys)[0]
                 if chunk_step[
                         default_key] == 0:  # allow some keys with zero chunk-step
                     assert chunk_step.max_value() > 0
                     default_key = [
                         key for key in sorted(used_data_keys)
                         if chunk_step[key] > 0
                     ][0]
             assert chunk_step[default_key] > 0
             t = NumbersDict.constant_like(0, numbers_dict=length)
             # There are usually the 'data' (input) and 'classes' (targets) data-keys in `length` but there can be others.
             # We expect them all of the same length so that we can do chunking.
             # In case that some length is 0 or 1,
             # we treat it special and always return the full seq repeated for every chunk.
             keys_with_full_seqs = []
             for key in length.keys():
                 if chunk_step[key] == chunk_step[default_key]:
                     if length[key] == length[default_key]:
                         continue  # ok
                 if length[key] <= 1:  # special case as explained above
                     keys_with_full_seqs.append(key)
                     continue
                 if chunk_step[key] == chunk_step[default_key]:
                     raise Exception(
                         "Chunking with multiple data-keys of different length: %r"
                         % length)
                 else:
                     nr_of_full_chunks_key = (length[key] - chunk_size[key]
                                              ) // chunk_step[key] + 1
                     nr_of_full_chunks_default_key = (
                         length[default_key] - chunk_size[default_key]
                     ) // chunk_step[default_key] + 1
                     assert nr_of_full_chunks_key == nr_of_full_chunks_default_key
             while length[default_key] > t[default_key]:
                 chunk_start = NumbersDict(t)
                 chunk_end = NumbersDict.min([t + chunk_size, length])
                 for key in keys_with_full_seqs:
                     chunk_start[key] = 0
                     chunk_end[key] = length[key]
                 if length.value is None:
                     chunk_start.value = None
                     chunk_end.value = None
                 yield (s, chunk_start, chunk_end)
                 t += chunk_step
                 if length[default_key] - t[
                         default_key] <= self.min_chunk_size:
                     break
         s += 1
Пример #3
0
 def iterate_seqs(self, chunk_size=None, chunk_step=None, used_data_keys=None):
   """
   Takes chunking into consideration.
   :param int|NumbersDict chunk_size:
   :param int|NumbersDict chunk_step:
   :param set(str)|None used_data_keys:
   :return: generator which yields tuples (seq index, seq start, seq end)
   :rtype: list[(int,NumbersDict,NumbersDict)]
   """
   if chunk_size is None:
     chunk_size = self.chunk_size
   if chunk_step is None:
     chunk_step = self.chunk_step
   chunk_size = NumbersDict(chunk_size)
   chunk_step = NumbersDict(chunk_step)
   s = 0
   while self.is_less_than_num_seqs(s):
     length = self.get_seq_length(s)
     if chunk_size == 0:
       yield (s, NumbersDict.constant_like(0, numbers_dict=length), length)
     else:
       default_key = "data"
       if used_data_keys is not None:
         length = NumbersDict({k: length[k] for k in used_data_keys})
         if default_key not in used_data_keys:
           default_key = sorted(used_data_keys)[0]
         if chunk_step[default_key] == 0:  # allow some keys with zero chunk-step
           assert chunk_step.max_value() > 0
           default_key = [key for key in sorted(used_data_keys) if chunk_step[key] > 0][0]
       assert chunk_step[default_key] > 0
       t = NumbersDict.constant_like(0, numbers_dict=length)
       # There are usually the 'data' (input) and 'classes' (targets) data-keys in `length` but there can be others.
       # We expect them all of the same length so that we can do chunking.
       # In case that some length is 0 or 1,
       # we treat it special and always return the full seq repeated for every chunk.
       keys_with_full_seqs = []
       for key in length.keys():
         if chunk_step[key] == chunk_step[default_key]:
           if length[key] == length[default_key]:
             continue  # ok
         if length[key] <= 1:  # special case as explained above
           keys_with_full_seqs.append(key)
           continue
         if chunk_step[key] == chunk_step[default_key]:
           raise Exception("Chunking with multiple data-keys of different length: %r" % length)
         else:
           nr_of_full_chunks_key = (length[key] - chunk_size[key]) // chunk_step[key] + 1
           nr_of_full_chunks_default_key = (
             (length[default_key] - chunk_size[default_key]) // chunk_step[default_key] + 1)
           assert nr_of_full_chunks_key == nr_of_full_chunks_default_key
       while length[default_key] > t[default_key]:
         chunk_start = NumbersDict(t)
         chunk_end = NumbersDict.min([t + chunk_size, length])
         for key in keys_with_full_seqs:
           chunk_start[key] = 0
           chunk_end[key] = length[key]
         if length.value is None:
           chunk_start.value = None
           chunk_end.value = None
         yield (s, chunk_start, chunk_end)
         t += chunk_step
         if length[default_key] - t[default_key] <= self.min_chunk_size:
           break
     s += 1