def str_slice(x, start=0, stop=None): # TODO: support n """Slice substrings from each string element in a column. :param int start: The start position for the slice operation. :param int end: The stop position for the slice operation. :returns: an expression containing the sliced substrings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.slice(start=2, stop=5) Expression = str_pandas_slice(text, start=2, stop=5) Length: 5 dtype: str (expression) --------------------------------- 0 met 1 ry 2 co 3 r 4 y. """ if stop is None: sll = _to_string_sequence(x).slice_string_end(start) else: sll = _to_string_sequence(x).slice_string(start, stop) return sll
def str_cat(x, other): """Concatenate two string columns on a row-by-row basis. :param expression other: The expression of the other column to be concatenated. :returns: an expression containing the concatenated columns. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.cat(df.text) Expression = str_cat(text, text) Length: 5 dtype: str (expression) --------------------------------- 0 SomethingSomething 1 very prettyvery pretty 2 is comingis coming 3 ourour 4 way.way. """ sl1 = _to_string_sequence(x) sl2 = _to_string_sequence(other) sl = sl1.concat(sl2) return column.ColumnStringArrow.from_string_sequence(sl)
def process(self, thread_index, i1, i2, filter_mask, ar): from vaex.column import _to_string_sequence if self.set is None: self.set = self.ordered_set_type() if self.selection: selection_mask = self.df.evaluate_selection_mask(self.selection, i1=i1, i2=i2, cache=True) ar = filter(ar, selection_mask) if self.dtype.is_list and self.flatten: ar = ar.values if self.dtype_item.is_string: ar = _to_string_sequence(ar) else: ar = vaex.array_types.to_numpy(ar) if np.ma.isMaskedArray(ar): mask = np.ma.getmaskarray(ar) self.set.update(ar, mask) else: self.set.update(ar) if self.unique_limit is not None: count = self.set.count # we skip null and nan here, since this is just an early bail out if count > self.unique_limit: raise vaex.RowLimitException( f'Resulting set would have >= {self.unique_limit} unique combinations' )
def str_isspace(x): """Check if all characters in a string sample are whitespaces. :returns: an expression evaluated to True if a sample contains only whitespaces, otherwise False. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', ' ', ' '] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 4 >>> df.text.str.isspace() Expression = str_isspace(text) Length: 5 dtype: bool (expression) ---------------------------------- 0 False 1 False 2 False 3 True 4 True """ return _to_string_sequence(x).isspace()
def str_isalpha(x): """Check if all characters in a string sample are alphabetic. :returns: an expression evaluated to True if a sample contains only alphabetic characters, otherwise False. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.isalpha() Expression = str_isalpha(text) Length: 5 dtype: bool (expression) ---------------------------------- 0 True 1 False 2 False 3 True 4 False """ return _to_string_sequence(x).isalpha()
def str_zfill(x, width): """Pad strings in a column by prepanding "0" characters. :param int width: The minimum length of the resulting string. Strings shorter less than `width` will be prepended with zeros. :returns: an expression containing the modified strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.zfill(width=12) Expression = str_zfill(text, width=12) Length: 5 dtype: str (expression) --------------------------------- 0 000Something 1 0very pretty 2 000is coming 3 000000000our 4 00000000way. """ sl = _to_string_sequence(x).pad(width, '0', True, False) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_upper(x): """Converts all strings in a column to uppercase. :returns: an expression containing the converted strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.upper() Expression = str_upper(text) Length: 5 dtype: str (expression) --------------------------------- 0 SOMETHING 1 VERY PRETTY 2 IS COMING 3 OUR 4 WAY. """ sl = _to_string_sequence(x).upper() return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_startswith(x, pat): """Check if a start of a string matches a pattern. :param str pat: A string pattern. Regular expressions are not supported. :returns: an expression which is evaluated to True if the pattern is found at the start of a string sample, False otherwise. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.startswith(pat='is') Expression = str_startswith(text, pat='is') Length: 5 dtype: bool (expression) ---------------------------------- 0 False 1 False 2 True 3 False 4 False """ return _to_string_sequence(x).startswith(pat)
def str_replace(x, pat, repl, n=-1, flags=0, regex=False): """Replace occurences of a pattern/regex in a column with some other string. :param str pattern: string or a regex pattern :param str replace: a replacement string :param int n: number of replacements to be made from the start. If -1 make all replacements. :param int flags: ?? :param bool regex: If True, ...? :returns: an expression containing the string replacements. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.replace(pat='et', repl='__') Expression = str_replace(text, pat='et', repl='__') Length: 5 dtype: str (expression) --------------------------------- 0 Som__hing 1 very pr__ty 2 is coming 3 our 4 way. """ sl = _to_string_sequence(x).replace(pat, repl, n, flags, regex) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_repeat(x, repeats): """Duplicate each string in a column. :param int repeats: number of times each string sample is to be duplicated. :returns: an expression containing the duplicated strings Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.repeat(3) Expression = str_repeat(text, 3) Length: 5 dtype: str (expression) --------------------------------- 0 SomethingSomethingSomething 1 very prettyvery prettyvery pretty 2 is comingis comingis coming 3 ourourour 4 way.way.way. """ sl = _to_string_sequence(x).repeat(repeats) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_pad(x, width, side='left', fillchar=' '): """Pad strings in a given column. :param int width: The total width of the string :param str side: If 'left' than pad on the left, if 'right' than pad on the right side the string. :param str fillchar: The character used for padding. :returns: an expression containing the padded strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.pad(width=10, side='left', fillchar='!') Expression = str_pad(text, width=10, side='left', fillchar='!') Length: 5 dtype: str (expression) --------------------------------- 0 !Something 1 very pretty 2 !is coming 3 !!!!!!!our 4 !!!!!!way. """ sl = _to_string_sequence(x).pad(width, fillchar, side in ['left', 'both'], side in ['right', 'both']) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_capitalize(x): """Capitalize the first letter of a string sample. :returns: an expression containing the capitalized strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.capitalize() Expression = str_capitalize(text) Length: 5 dtype: str (expression) --------------------------------- 0 Something 1 Very pretty 2 Is coming 3 Our 4 Way. """ sl = _to_string_sequence(x).capitalize() return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_isupper(x): """Check if all characters in a string sample are lowercase characters. :returns: an expression evaluated to True if a sample contains only lowercase characters, otherwise False. Example: >>> import vaex >>> text = ['SOMETHING', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 SOMETHING 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.isupper() Expression = str_isupper(text) Length: 5 dtype: bool (expression) ---------------------------------- 0 True 1 False 2 False 3 False 4 False """ return _to_string_sequence(x).isupper()
def str_contains(x, pattern, regex=True): """Check if a string pattern or regex is contained within a sample of a string column. :param str pattern: A string or regex pattern :param bool regex: If True, :returns: an expression which is evaluated to True if the pattern is found in a given sample, and it is False otherwise. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.contains('very') Expression = str_contains(text, 'very') Length: 5 dtype: bool (expression) ---------------------------------- 0 False 1 True 2 False 3 False 4 False """ return _to_string_sequence(x).search(pattern, regex)
def str_lstrip(x, to_strip=None): """Remove leading characters from a string sample. :param str to_strip: The string to be removed :returns: an expression containing the modified string column. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.lstrip(to_strip='very ') Expression = str_lstrip(text, to_strip='very ') Length: 5 dtype: str (expression) --------------------------------- 0 Something 1 pretty 2 is coming 3 our 4 way. """ # in c++ we give empty string the same meaning as None sl = _to_string_sequence(x).lstrip('' if to_strip is None else to_strip) if to_strip != '' else x return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_ljust(x, width, fillchar=' '): """Fills the right side of string samples with a specified character such that the strings are right-hand justified. :param int width: The minimal width of the strings. :param str fillchar: The character used for filling. :returns: an expression containing the filled strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.ljust(width=10, fillchar='!') Expression = str_ljust(text, width=10, fillchar='!') Length: 5 dtype: str (expression) --------------------------------- 0 Something! 1 very pretty 2 is coming! 3 our!!!!!!! 4 way.!!!!!! """ sl = _to_string_sequence(x).pad(width, fillchar, False, True) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_lower(x): """Converts string samples to lower case. :returns: an expression containing the converted strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.lower() Expression = str_lower(text) Length: 5 dtype: str (expression) --------------------------------- 0 something 1 very pretty 2 is coming 3 our 4 way. """ sl = _to_string_sequence(x).lower() return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_find(x, sub, start=0, end=None): """Returns the lowest indices in each string in a column, where the provided substring is fully contained between within a sample. If the substring is not found, -1 is returned. :param str sub: A substring to be found in the samples :param int start: :param int end: :returns: an expression containing the lowest indices specifying the start of the substring. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.find(sub="et") Expression = str_find(text, sub='et') Length: 5 dtype: int64 (expression) ----------------------------------- 0 3 1 7 2 -1 3 -1 4 -1 """ return _to_string_sequence(x).find(sub, start, 0 if end is None else end, end is None, True)
def str_byte_length(x): """Returns the number of bytes in a string sample. :returns: an expression contains the number of bytes in each sample of a string column. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.byte_length() Expression = str_byte_length(text) Length: 5 dtype: int64 (expression) ----------------------------------- 0 9 1 11 2 9 3 3 4 4 """ return _to_string_sequence(x).byte_length()
def str_endswith(x, pat): """Check if the end of each string sample matches the specified pattern. :param str pat: A string pattern or a regex :returns: an expression evaluated to True if the pattern is found at the end of a given sample, False otherwise. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.endswith(pat="ing") Expression = str_endswith(text, pat='ing') Length: 5 dtype: bool (expression) ---------------------------------- 0 True 1 False 2 True 3 False 4 False """ return _to_string_sequence(x).endswith(pat)
def str_count(x, pat, regex=False): """Count the occurences of a pattern in sample of a string column. :param str pat: A string or regex pattern :param bool regex: If True, :returns: an expression containing the number of times a pattern is found in each sample. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.count(pat="et", regex=False) Expression = str_count(text, pat='et', regex=False) Length: 5 dtype: int64 (expression) ----------------------------------- 0 1 1 1 2 0 3 0 4 0 """ return _to_string_sequence(x).count(pat, regex)
def str_match(x, pattern): """Check if a string sample matches a given regular expression. :param str pattern: a string or regex to match to a string sample. :returns: an expression which is evaluated to True if a match is found, False otherwise. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.match(pattern='our') Expression = str_match(text, pattern='our') Length: 5 dtype: bool (expression) ---------------------------------- 0 False 1 False 2 False 3 True 4 False """ return _to_string_sequence(x).match(pattern)
def str_center(x, width, fillchar=' '): """ Fills the left and right side of the strings with additional characters, such that the sample has a total of `width` characters. :param int width: The total number of characters of the resulting string sample. :param str fillchar: The character used for filling. :returns: an expression containing the filled strings. Example: >>> import vaex >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.'] >>> df = vaex.from_arrays(text=text) >>> df # text 0 Something 1 very pretty 2 is coming 3 our 4 way. >>> df.text.str.center(width=11, fillchar='!') Expression = str_center(text, width=11, fillchar='!') Length: 5 dtype: str (expression) --------------------------------- 0 !Something! 1 very pretty 2 !is coming! 3 !!!!our!!!! 4 !!!!way.!!! """ sl = _to_string_sequence(x).pad(width, fillchar, True, True) return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
def str_split(x, pattern=None): # TODO: support n x = _to_string_sequence(x) if isinstance(x, vaex.strings.StringArray): x = x.to_arrow() if pattern == '': raise ValueError('empty separator') sll = x.split('' if pattern is None else pattern) return sll
def process(self, thread_index, i1, i2, filter_mask, ar): from vaex.column import _to_string_sequence self._check_row_limit() if self.selection: selection_mask = self.df.evaluate_selection_mask(self.selection, i1=i1, i2=i2, cache=True) ar = filter(ar, selection_mask) if len(ar) == 0: return if self.dtype.is_list and self.flatten: ar = ar.values if self.dtype_item.is_string: ar = _to_string_sequence(ar) else: ar = vaex.array_types.to_numpy(ar) if ar.strides != (1, ): ar = ar.copy() chunk_size = 1024 * 1024 self._check_row_limit() if np.ma.isMaskedArray(ar): mask = np.ma.getmaskarray(ar) if self.return_inverse: values, map_index = self.set.update( ar, mask, -1, chunk_size=chunk_size, bucket_size=chunk_size * 4, return_values=self.return_inverse) self.chunks.append((i1, i2, values, map_index)) else: self.set.update(ar, mask, -1, chunk_size=chunk_size, bucket_size=chunk_size * 4) else: if self.return_inverse: values, map_index = self.set.update( ar, -1, chunk_size=chunk_size, bucket_size=chunk_size * 4, return_values=self.return_inverse) self.chunks.append((i1, i2, values, map_index)) else: self.set.update(ar, -1, chunk_size=chunk_size, bucket_size=chunk_size * 4) if logger.level >= logging.DEBUG: logger.debug( f"set uses {sys.getsizeof(self.set):,} bytes (offset {i1:,}, length {i2-i1:,})" ) self._check_row_limit()
def write(self, values): no_values = len(values) if no_values: # to_column = to_array from_sequence = _to_string_sequence(values) to_sequence = self.to_array.string_sequence.slice( self.to_offset, self.to_offset + no_values, self.string_byte_offset) self.string_byte_offset += to_sequence.fill_from(from_sequence) self.to_offset += no_values if self.to_offset == self.count: # last offset self.to_array.indices[self.count] = self.string_byte_offset
def isin(self, values): if vaex.column._is_stringy(values) or self.dtype_item == str: values = vaex.column._to_string_column(values) values = _to_string_sequence(values) # return x.string_sequence.isin(values) return self._internal.isin(values) else: if np.ma.isMaskedArray(values): isin = self._internal.isin(values.data) isin[values.mask] = False return isin else: return self._internal.isin(values)
def map(thread_index, i1, i2, ar): if counters[thread_index] is None: counters[thread_index] = counter_type() if dtype == str_type: previous_ar = ar ar = _to_string_sequence(ar) if not transient: assert ar is previous_ar.string_sequence if np.ma.isMaskedArray(ar): mask = np.ma.getmaskarray(ar) counters[thread_index].update(ar, mask) else: counters[thread_index].update(ar) return 0
def map(thread_index, i1, i2, ar): if counters[thread_index] is None: counters[thread_index] = counter_type() if data_type.is_list and flatten: ar = ar.values if data_type_item.is_string: ar = _to_string_sequence(ar) else: ar = vaex.array_types.to_numpy(ar) if np.ma.isMaskedArray(ar): mask = np.ma.getmaskarray(ar) counters[thread_index].update(ar, mask) else: counters[thread_index].update(ar) return 0
def map(self, keys, check_missing=False): '''Map key values to unique integers''' from vaex.column import _to_string_sequence if not isinstance(keys, vaex.array_types.supported_array_types) or self.dtype == str: # sometimes the dtype can be object, but seen as an string array keys = _to_string_sequence(keys) else: keys = vaex.array_types.to_numpy(keys) indices = self._internal.map_ordinal(keys) if np.ma.isMaskedArray(keys): indices[keys.mask] = self.null_value if check_missing: indices = np.ma.array(indices, mask=indices==-1) return indices
def _export_column(dataset_input, dataset_output, column_name, full_mask, shuffle, sort, selection, N, order_array, order_array_inverse, progress_status): if 1: block_scope = dataset_input._block_scope(0, vaex.execution.buffer_size_default) to_array = dataset_output.columns[column_name] dtype = dataset_input.dtype(column_name) if shuffle or sort: # we need to create a in memory copy, otherwise we will do random writes which is VERY inefficient to_array_disk = to_array if np.ma.isMaskedArray(to_array): to_array = np.empty_like(to_array_disk) else: if dtype == str_type: # we create an empty column copy to_array = to_array._zeros_like() else: to_array = np.zeros_like(to_array_disk) to_offset = 0 # we need this for selections count = len(dataset_input) if not selection else dataset_input.length_unfiltered() is_string = dtype == str_type # TODO: if no filter, selection or mask, we can choose the quick path for str string_byte_offset = 0 for i1, i2 in vaex.utils.subdivide(count, max_length=max_length): logger.debug("from %d to %d (total length: %d, output length: %d)", i1, i2, len(dataset_input), N) block_scope.move(i1, i2) if selection: mask = full_mask[i1:i2] values = dataset_input.evaluate(column_name, i1=i1, i2=i2, filtered=False) #selection=selection) values = values[mask] no_values = len(values) if no_values: if is_string: to_column = to_array assert isinstance(to_column, ColumnStringArrow) from_sequence = _to_string_sequence(values) to_sequence = to_column.string_sequence.slice(to_offset, to_offset+no_values, string_byte_offset) string_byte_offset += to_sequence.fill_from(from_sequence) to_offset += no_values else: fill_value = np.nan if dtype.kind == "f" else None # assert np.ma.isMaskedArray(to_array) == np.ma.isMaskedArray(values), "to (%s) and from (%s) array are not of both masked or unmasked (%s)" %\ # (np.ma.isMaskedArray(to_array), np.ma.isMaskedArray(values), column_name) if dtype.type == np.datetime64: values = values.view(np.int64) if np.ma.isMaskedArray(to_array) and np.ma.isMaskedArray(values): to_array.data[to_offset:to_offset + no_values] = values.filled(fill_value) to_array.mask[to_offset:to_offset + no_values] = values.mask elif not np.ma.isMaskedArray(to_array) and np.ma.isMaskedArray(values): to_array[to_offset:to_offset + no_values] = values.filled(fill_value) else: to_array[to_offset:to_offset + no_values] = values to_offset += no_values else: values = dataset_input.evaluate(column_name, i1=i1, i2=i2) if is_string: no_values = len(values) # for strings, we don't take sorting/shuffling into account when building the structure to_column = to_array assert isinstance(to_column, ColumnStringArrow) from_sequence = _to_string_sequence(values) to_sequence = to_column.string_sequence.slice(i1, i2, string_byte_offset) string_byte_offset += to_sequence.fill_from(from_sequence) else: assert len(values) == (i2-i1) fill_value = np.nan if dtype.kind == "f" else None # assert np.ma.isMaskedArray(to_array) == np.ma.isMaskedArray(values), "to (%s) and from (%s) array are not of both masked or unmasked (%s)" %\ # (np.ma.isMaskedArray(to_array), np.ma.isMaskedArray(values), column_name) if dtype.type == np.datetime64: values = values.view(np.int64) if shuffle or sort: indices = order_array[i1:i2] if np.ma.isMaskedArray(to_array) and np.ma.isMaskedArray(values): to_array.data[indices] = values.filled(fill_value) to_array.mask[indices] = values.mask elif not np.ma.isMaskedArray(to_array) and np.ma.isMaskedArray(values): to_array[indices] = values.filled(fill_value) else: to_array[indices] = values else: if np.ma.isMaskedArray(to_array) and np.ma.isMaskedArray(values): to_array.data[i1:i2] = values.filled(fill_value) to_array.mask[i1:i2] = values.mask elif np.ma.isMaskedArray(to_array) and np.ma.isMaskedArray(values): to_array[i1:i2] = values.filled(fill_value) else: to_array[i1:i2] = values with progress_lock: progress_status.value += i2 - i1 if progress_status.cancelled: break #if not progress(progress_value / float(progress_total)): # break if is_string: # write out the last index to_column = to_array if selection: to_column.indices[to_offset] = string_byte_offset else: to_column.indices[count] = string_byte_offset if shuffle or sort: # write to disk in one go if dtype == str_type: # strings are sorted afterwards view = to_array.string_sequence.lazy_index(order_array_inverse) to_array_disk.string_sequence.fill_from(view) else: if np.ma.isMaskedArray(to_array) and np.ma.isMaskedArray(to_array_disk): to_array_disk.data[:] = to_array.data to_array_disk.mask[:] = to_array.mask else: to_array_disk[:] = to_array