def _encode_complex(self, obj, context): if callable(getattr(obj, "__awkward_serialize__", None)): return obj.__awkward_serialize__(self) if hasattr(obj, "tojson") and hasattr(type(obj), "fromjson"): try: return self.encode_call(self._obj2spec(type(obj).fromjson), self.encode_json(obj.tojson())) except: pass if isinstance(obj, numpy.ndarray): return self._encode_numpy(obj, context) if hasattr(obj, "__module__") and (hasattr(obj, "__qualname__") or hasattr( obj, "__name__")) and obj.__module__ != "__main__": try: return {"function": self._obj2spec(obj)} except: pass try: return self.encode_json(obj) except TypeError: pass try: return self.encode_python(obj) except: pass
def typeof(obj): if obj is None: return None elif isinstance(obj, (bool, numpy.bool_)): return BoolFillable elif isinstance(obj, (numbers.Number, awkward0.numpy.number)): return NumberFillable elif isinstance(obj, bytes): return BytesFillable elif isinstance(obj, awkward0.util.string): return StringFillable elif isinstance(obj, dict): if any(not isinstance(x, str) for x in obj): raise TypeError("only dicts with str-typed keys may be converted") if len(obj) == 0: return None else: return set(obj) elif isinstance(obj, tuple) and hasattr( obj, "_fields") and obj._fields is type(obj)._fields: return obj._fields, type(obj) elif isinstance(obj, Iterable): return JaggedFillable else: return set(n for n in obj.__dict__ if not n.startswith("_")), type(obj)
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if "out" in kwargs: raise NotImplementedError("in-place operations not supported") if method != "__call__": return NotImplemented tokeep = None for x in inputs: if isinstance(x, MaskedArray): x._valid() if tokeep is None: tokeep = x.boolmask(maskedwhen=False) else: tokeep = tokeep & x.boolmask(maskedwhen=False) assert tokeep is not None inputs = list(inputs) for i in range(len(inputs)): if isinstance(inputs[i], IndexedMaskedArray): inputs[i] = inputs[i]._content[inputs[i]._mask[tokeep]] elif isinstance(inputs[i], MaskedArray): inputs[i] = inputs[i]._content[tokeep] elif isinstance( inputs[i], (self.numpy.ndarray, awkward0.array.base.AwkwardArray)): inputs[i] = inputs[i][tokeep] else: try: for first in inputs[i]: break except TypeError: pass else: inputs[i] = self.numpy.array(inputs[i], copy=False)[tokeep] # compute only the non-masked elements result = getattr(ufunc, method)(*inputs, **kwargs) # put the masked out values back index = self.numpy.full(len(tokeep), -1, dtype=self.INDEXTYPE) index[tokeep] = self.numpy.arange(self.numpy.count_nonzero(tokeep)) if isinstance(result, tuple): return tuple( self.Methods.maybemixin(type(x), IndexedMaskedArray) (index, x, maskedwhen=-1 ) if isinstance(x, (self.numpy.ndarray, awkward0.array.base.AwkwardArray)) else x for x in result) elif method == "at": return None else: return self.Methods.maybemixin(type(result), IndexedMaskedArray)(index, result, maskedwhen=-1)
def _prepare(self, ufunc, identity, dtype): if isinstance(self._content, awkward0.array.table.Table): out = self._content.copy(contents={}) for n, x in self._content._contents.items(): out[n] = self.copy(content=x)._prepare(ufunc, identity, dtype) return out if isinstance(self._content, self.numpy.ndarray): if dtype is None and issubclass(self._content.dtype.type, (bool, self.numpy.bool_)): dtype = self.numpy.dtype(type(identity)) if dtype is None: content = self._content else: content = self._content.astype(dtype) else: content = self._content._prepare(ufunc, identity, dtype) if identity == self.numpy.inf: if issubclass(dtype.type, (bool, self.numpy.bool_)): identity = True elif self._util_isintegertype(dtype.type): identity = self.numpy.iinfo(dtype.type).max elif identity == -self.numpy.inf: if issubclass(dtype.type, (bool, self.numpy.bool_)): identity = False elif self._util_isintegertype(dtype.type): identity = self.numpy.iinfo(dtype.type).min out = self.numpy.full(self._mask.shape + content.shape[1:], identity, dtype=content.dtype) out[self.isunmasked] = content[self.mask[self.mask >= 0]] return out
def __call__(self, obj, context=""): out = self._encode_primitive(obj) if out is not None: return out if obj in self.seen: return {"ref": self.seen[obj]} else: ident = self.seen[obj] out = self._encode_complex(obj, context) if out is None: raise TypeError("failed to encode {0} (type: {1})".format( repr(obj), type(obj))) if "id" in out: if out["id"] is False: del self.seen[obj] elif out["id"] != self.seen[obj]: raise RuntimeError("unexpected id change") else: out["id"] = ident return out
def _prepare(self, ufunc, identity, dtype): self.knowchunksizes() out = None pos = 0 for chunkid, chunk in enumerate(self._chunks): if self._chunksizes[chunkid] > 0: this = chunk[:self._chunksizes[chunkid]] if out is None: if dtype is None and issubclass(this.dtype.type, (bool, self.numpy.bool_)): dtype = self.numpy.dtype(type(identity)) if dtype is None: dtype = this.dtype out = self.numpy.empty( (sum(self._chunksizes), ) + this.shape[1:], dtype=dtype) newpos = pos + this.shape[0] out[pos:newpos] = this pos = newpos if out is None: if dtype is None: dtype = self.DEFAULTTYPE return self.numpy.array([identity], dtype=dtype) else: return out
def _topandas(self, seen): import awkward0.pandas if id(self) in seen: return seen[id(self)] else: out = seen[id(self)] = self.VirtualArray(self._topandas_doit, (self,), cache=self.cache, persistentkey=self.persistentkey, type=self.type, nbytes=self.nbytes, persistvirtual=self.persistvirtual) out.__class__ = awkward0.pandas.mixin(type(self)) return out
def maybemixin(sample, awkwardtype): if issubclass(sample, Methods): assert issubclass(sample, awkward0.array.base.AwkwardArray) allbases = tuple(x for x in sample.__bases__ if not issubclass( x, awkward0.array.base.AwkwardArray)) + (awkwardtype, ) return type(awkwardtype.__name__ + "Methods", allbases, {}) else: return awkwardtype
def _topandas(self, seen): import awkward0.pandas if id(self) in seen: return seen[id(self)] else: out = seen[id(self)] = self.copy() out.__class__ = awkward0.pandas.mixin(type(self)) out._contents = [x._topandas(seen) if isinstance(x, awkward0.array.base.AwkwardArray) else x for x in out._contents] return out
def _topandas(self, seen): import awkward0.pandas if id(self) in seen: return seen[id(self)] else: out = seen[id(self)] = self.copy() out.__class__ = awkward0.pandas.mixin(type(self)) out._contents = OrderedDict((n, x._topandas(seen) if isinstance( x, awkward0.array.base.AwkwardArray) else x) for n, x in out._contents.items()) return out
def _prepare(self, ufunc, identity, dtype): array = self.array if isinstance(array, self.numpy.ndarray): if dtype is None and issubclass(array.dtype.type, (bool, self.numpy.bool_)): dtype = self.numpy.dtype(type(identity)) if dtype is None: return array else: return array.astype(dtype) else: return array._prepare(ufunc, identity, dtype)
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if "out" in kwargs: raise NotImplementedError("in-place operations not supported") if method != "__call__": return NotImplemented torow = not any( not isinstance(x, Table.Row) and isinstance(x, Iterable) for x in inputs) inputs = list(inputs) for i in range(len(inputs)): if isinstance(inputs[i], Table.Row): inputs[i] = inputs[i]._table[inputs[i]. _index:inputs[i]._index + 1] result = getattr(ufunc, method)(*inputs, **kwargs) if torow: if isinstance(result, tuple): out = [] for x in result: if isinstance(x, Table): out.append( awkward0.array.objects.Methods.maybemixin( type(x), self._table.Table.Row)(x, 0)) out[-1]._table._showdict = True else: out.append(x) return tuple(out) elif method == "at": return None else: out = awkward0.array.objects.Methods.maybemixin( type(result), self._table.Table.Row)(result, 0) out._table._showdict = True return out else: return result
def dense(self): self._valid() if isinstance(self._content, self.numpy.ndarray): out = self.numpy.full(self.shape, self.default, dtype=self.dtype) if len(self._index) != 0: mask = self.boolmask(maskedwhen=True) out[mask] = self._content[self._inverse[mask]] return out else: raise NotImplementedError(type(self._content))
def _prepare(self, ufunc, identity, dtype): out = self.copy(contents={}) for n, x in self._contents.items(): if isinstance(x, self.numpy.ndarray): if dtype is None and issubclass(x.dtype.type, (bool, self.numpy.bool_)): dtype = self.numpy.dtype(type(identity)) if dtype is not None: x = x.astype(dtype) else: x = x._prepare(ufunc, identity, dtype) out[n] = x return out
def __getattr__(self, where): if where in dir(super(AwkwardArray, self)): return super(AwkwardArray, self).__getattribute__(where) else: if where in self.columns: try: return self[where] except Exception as err: raise AttributeError( "while trying to get column {0}, an exception occurred:\n{1}: {2}" .format(repr(where), type(err), str(err))) else: raise AttributeError("no column named {0}".format(repr(where)))
def _prepare(self, ufunc, identity, dtype): if isinstance(self._content, awkward0.array.table.Table): out = self._content.copy(contents={}) for n, x in self._content._contents.items(): out[n] = self.copy(content=x)._prepare(ufunc, identity, dtype) return out if isinstance(self._content, self.numpy.ndarray): if dtype is None and issubclass(self._content.dtype.type, (bool, self.numpy.bool_)): dtype = self.numpy.dtype(type(identity)) if ufunc is None: content = self.numpy.zeros(self._content.shape, dtype=self.numpy.float32) content[self.numpy.isnan(self._content)] = self.numpy.nan elif ufunc is self.numpy.count_nonzero: content = self.numpy.ones(self._content.shape, dtype=self.numpy.int8) content[self.numpy.isnan(self._content)] = 0 content[self._content == 0] = 0 elif dtype is None: content = self._content else: content = self._content.astype(dtype) else: content = self._content._prepare(ufunc, identity, dtype) if content is self._content or not content.flags.owndata: content = content.copy() if ufunc is None: content[self.ismasked] = self.numpy.nan else: dtype = content.dtype if identity == self.numpy.inf: if issubclass(dtype.type, (bool, self.numpy.bool_)): identity = True elif self._util_isintegertype(dtype.type): identity = self.numpy.iinfo(dtype.type).max elif identity == -self.numpy.inf: if issubclass(dtype.type, (bool, self.numpy.bool_)): identity = False elif self._util_isintegertype(dtype.type): identity = self.numpy.iinfo(dtype.type).min content[self.ismasked] = identity return content
def valid(self, exception=False, message=False): try: self._valid() except Exception as err: if exception: raise err elif message: return "{0}: {1}".format(type(err), str(err)) else: return False else: if message: return None else: return True
def _concatenate_axis0(cls, tables): for i in range(len(tables) - 1): if set(tables[i]._contents) != set(tables[i + 1]._contents): raise ValueError( "cannot concatenate Tables with different fields") out = tables[0].deepcopy(contents=OrderedDict()) for n in tables[0]._contents: content_type = type(tables[0]._contents[n]) if content_type == cls.numpy.ndarray: concatenate = cls.numpy.concatenate else: concatenate = content_type.concatenate out._contents[n] = concatenate([t[n] for t in tables], axis=0) out._valid() return out
def _normalize_arrays(cls, arrays): length = None for i in range(len(arrays)): if isinstance(arrays[i], Iterable): if length is None: length = len(arrays[i]) break if length is None: raise TypeError( "cannot construct an array if all arguments are scalar") arrays = list(arrays) jaggedtype = [cls.awkward0.JaggedArray] * len(arrays) starts, stops = None, None for i in range(len(arrays)): if starts is None and isinstance(arrays[i], cls.awkward0.JaggedArray): starts, stops = arrays[i].starts, arrays[i].stops if isinstance(arrays[i], cls.awkward0.JaggedArray): jaggedtype[i] = type(arrays[i]) if not isinstance(arrays[i], Iterable): arrays[i] = cls.awkward0.numpy.full(length, arrays[i]) arrays[i] = cls.awkward0.util.toarray(arrays[i], cls.awkward0.numpy.float64) if starts is None: return arrays for i in range(len(arrays)): if not isinstance(arrays[i], cls.awkward0.JaggedArray) or not ( cls.awkward0.numpy.array_equal(starts, arrays[i].starts) and cls.awkward0.numpy.array_equal(stops, arrays[i].stops)): content = cls.awkward0.numpy.zeros( stops.max(), dtype=cls.awkward0.numpy.float64) arrays[i] = jaggedtype[i](starts, stops, content) + arrays[ i] # invoke jagged broadcasting to align arrays return arrays
def __getitem__(self, where): self._valid() if self._util_isstringslice(where): content = self._content[where] cls = awkward0.array.objects.Methods.maybemixin( type(content), self.BitMaskedArray) out = cls.__new__(cls) out.__dict__.update(self.__dict__) out._content = content return out if isinstance(where, tuple) and len(where) == 0: return self if not isinstance(where, tuple): where = (where, ) head, tail = where[0], where[1:] if self._util_isinteger(head): if self._maskwhere(head) == self._maskedwhen: if tail != (): raise ValueError( "masked element ({0}) is not subscriptable".format( self.masked)) return self.masked else: return self._content[(head, ) + tail] else: mask = self._maskwhere(head) if tail != () and ((self._maskedwhen and mask.any()) or (not self._maskedwhen and not mask.all())): raise ValueError( "masked element ({0}) is not subscriptable".format( self.masked)) else: return self.copy(mask=self.bool2bit(mask, lsborder=self._lsborder), content=self._content[(head, ) + tail], lsborder=self._lsborder)
def _prepare(self, ufunc, identity, dtype): if dtype is None and issubclass(self.dtype.type, (bool, self.numpy.bool_)): dtype = self.numpy.dtype(type(identity)) if dtype is None: dtype = self.dtype out = None index = self._index[:len(self._tags)] for tag, content in enumerate(self._contents): if not isinstance(content, self.numpy.ndarray): content = content._prepare(ufunc, identity, dtype) if not isinstance(content, self.numpy.ndarray): raise TypeError("cannot reduce a UnionArray of non-primitive type") mask = (self._tags == tag) c = content[index[mask]] if out is None: out = self.numpy.full(self._tags.shape[:1] + c.shape[1:], identity, dtype=dtype) out[mask] = c return out
def __getitem__(self, where): self._valid() if self._util_isstringslice(where): content = self._content[where] cls = awkward0.array.objects.Methods.maybemixin( type(content), self.IndexedMaskedArray) out = cls.__new__(cls) out.__dict__.update(self.__dict__) out._content = content return out if isinstance(where, tuple) and len(where) == 0: return self if not isinstance(where, tuple): where = (where, ) head, tail = where[0], where[1:] if self._util_isinteger(head): maskindex = self._mask[head] if maskindex == self._maskedwhen: if tail != (): raise ValueError( "masked element ({0}) is not subscriptable".format( self.masked)) return self.masked else: return self._content[(maskindex, ) + tail] else: maskindex = self._mask[head] if tail != () and (maskindex == self._maskedwhen).any(): raise ValueError( "masked element ({0}) is not subscriptable".format( self.masked)) else: return self.copy(mask=maskindex)
def __getitem__(self, where): self._valid() if self._util_isstringslice(where): content = self._content[where] cls = awkward0.array.objects.Methods.maybemixin( type(content), self.IndexedArray) out = cls.__new__(cls) out.__dict__.update(self.__dict__) out._content = content return out if isinstance(where, tuple) and len(where) == 0: return self if not isinstance(where, tuple): where = (where, ) head, tail = where[:len(self._index.shape)], where[len(self._index. shape):] head = self._index[head] if len(head.shape) != 0 and len(head) == 0: return self.numpy.empty(0, dtype=self._content.dtype)[tail] else: return self._content[(head, ) + tail]
def concatenate(isclassmethod, cls_or_self, arrays, axis=0): if len(arrays) < 1: raise ValueError("at least one array needed to concatenate") if isclassmethod: cls = cls_or_self else: self = cls_or_self cls = type(self) arrays = (self, ) + tuple(arrays) def resolve(t): for b in t.__bases__: if issubclass(t, AwkwardArray): return resolve(b) else: return t if all(type(x) == cls.numpy.ndarray for x in arrays): return cls.numpy.concatenate(arrays, axis=axis) if not all( resolve(type(x)) == resolve(type(arrays[0])) for x in arrays): if axis == 0: tags = cls.numpy.concatenate([ cls.numpy.full(len(x), i, dtype=cls.TAGTYPE) for i, x in enumerate(arrays) ]) return cls.UnionArray.fget(None).fromtags(tags, arrays) else: raise NotImplementedError("axis > 0 for different types") for x in arrays: x.valid() if axis == 0: return type(arrays[0])._concatenate_axis0(arrays) elif axis == 1: return type(arrays[0])._concatenate_axis1(arrays) else: raise NotImplementedError("axis > 1")
def mixin(methods, awkwardtype): assert not issubclass(methods, awkward0.array.base.AwkwardArray) assert not issubclass(awkwardtype, Methods) return type(awkwardtype.__name__ + "Methods", (methods, awkwardtype), {})
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if "out" in kwargs: raise NotImplementedError("in-place operations not supported") if method != "__call__": return NotImplemented first = None rest = [] for x in inputs: if isinstance(x, ChunkedArray): x._valid() if first is None: first = x else: rest.append(x) assert first is not None if not all(first._aligned(x) for x in rest): # FIXME: we may need to handle a more general case if ChunkedArrays are inside other Awkward types # perhaps split at the largest possible slices such that all of them are one chunk each, and then unpack the single chunk after slicing raise ValueError( "ChunkedArrays can only be combined if they have the same chunk sizes" ) batches = [] for i, slc in enumerate(first._slices()): batch = [] for x in inputs: if isinstance(x, ChunkedArray): batch.append(x._chunks[i]) elif isinstance( x, (self.numpy.ndarray, awkward0.array.base.AwkwardArray)): batch.append(x[slc]) else: batch.append(x) batches.append(batch) out = None chunks = {} types = {} for batch in batches: result = getattr(ufunc, method)(*batch, **kwargs) if isinstance(result, tuple): if out is None: out = list(result) for i, x in enumerate(result): if isinstance(x, (self.numpy.ndarray, awkward0.array.base.AwkwardArray)): if i not in chunks: chunks[i] = [] chunks[i].append(x) types[i] = type(x) elif method == "at": pass else: if isinstance( result, (self.numpy.ndarray, awkward0.array.base.AwkwardArray)): if None not in chunks: chunks[None] = [] chunks[None].append(result) types[None] = type(result) if out is None: if None in chunks: return self.Methods.maybemixin(types[None], ChunkedArray)(chunks[None]) else: return None else: for i in range(len(out)): if i in chunks: out[i] = self.Methods.maybemixin(types[i], ChunkedArray)(chunks[i]) return tuple(out)
def __getitem__(self, where): import awkward0.array.virtual self._valid() if self._util_isstringslice(where): chunks = [] chunksizes = [] for chunk in self._chunks: chunks.append(chunk[where]) chunksizes.append(len(chunks[-1])) if len(chunks) == 0: return self.copy(chunks=chunks, chunksizes=chunksizes) else: return awkward0.array.objects.Methods.maybemixin( type(chunks[0]), self.ChunkedArray)(chunks, chunksizes=chunksizes) if isinstance(where, tuple) and len(where) == 0: return self if not isinstance(where, tuple): where = (where, ) head, tail = where[0], where[1:] if isinstance(head, ChunkedArray): if not self._aligned(head): raise ValueError( "A ChunkedArray can only be used as a slice of a ChunkedArray if they have the same chunk sizes" ) chunks = [] chunksizes = [] for c, h in zip(self.chunks, head.chunks): if isinstance(c, awkward0.array.virtual.VirtualArray): c = c.array if isinstance(h, awkward0.array.virtual.VirtualArray): h = h.array chunks.append(c[(h, ) + tail]) chunksizes.append(len(chunks[-1])) return self.copy(chunks=chunks, chunksizes=chunksizes) if self._util_isinteger(head): chunk, localhead = self.global2local(head) return chunk[(localhead, ) + tail] elif isinstance(head, slice): if head.step == 0: raise ValueError("slice step cannot be zero") elif (head.start is None or head.start >= 0) and ( head.stop is not None and head.stop >= 0) and (head.step is None or head.step > 0): # case A start, stop, step = head.start, head.stop, head.step if start is None: start = 0 if step is None: step = 1 elif (head.start is not None and head.start >= 0) and ( head.stop is None or head.stop >= 0) and (head.step is not None and head.step < 0): # case B start, stop, step = head.start, head.stop, head.step if stop is None: stop = -1 else: # case C (requires potentially expensive len(self)) start, stop, step = head.indices(len(self)) # if step > 0, stop can be len(self) # if step < 0, stop can be -1 (not a Python "negative index", but an indicator to go all the way to 0) if start == -1: # case C start below 0 start_chunkid = -1 else: try: start_chunkid = self.global2chunkid(start) except IndexError: if start >= 0: # case A or B start was set beyond len(self), clamp it start, start_chunkid = len(self), len(self._chunks) if step < 0: start -= 1 start_chunkid -= 1 if stop == -1: # case B or C stop not set with step < 0; go all the way to 0 stop_chunkid = -1 else: try: stop_chunkid = self.global2chunkid(stop) except IndexError: # stop is at or beyond len(self), clamp it stop = len(self) if step > 0: # we want the chunkid at or to the right of stop (no -1) stop_chunkid = min( self.numpy.searchsorted(self.offsets, stop, "right"), len(self._chunks)) else: # we want the chunkid to the left of stop stop_chunkid = max( self.numpy.searchsorted(self.offsets, stop, "right") - 2, -1) offsets = self.offsets chunks = [] skip = 0 for chunkid in range(start_chunkid, stop_chunkid, 1 if step > 0 else -1): # set the local_start if chunkid == start_chunkid: local_start = start - offsets[chunkid] else: if step > 0: local_start = skip else: local_start = self._chunksizes[chunkid] - 1 - skip if local_start < 0: # skip is bigger than this entire chunk skip -= self._chunksizes[chunkid] continue # set the local_stop and new skip if chunkid == stop_chunkid - (1 if step > 0 else -1): if stop == -1: local_stop = None else: local_stop = stop - offsets[chunkid] else: local_stop = None if step > 0: skip = (local_start - self._chunksizes[chunkid]) % step else: skip = (-1 - local_start) % -step # add a sliced chunk chunk = self._chunks[chunkid][(slice(local_start, local_stop, step), )] if len(chunk) > 0: chunk = chunk[(slice(None), ) + tail] if len(chunk) > 0: chunks.append(chunk) if len(chunks) == 0 and len(self._chunks) > 0: chunks.append( self._chunks[0][(slice(0, 0), ) + tail]) # so that sliced.type == self.type return self.copy(chunks=chunks) else: head = self.numpy.array(head, copy=False) if len(head.shape) == 1 and self._util_isintegertype( head.dtype.type): if len(head) == 0 and len(self._chunks) == 0: return self.copy(chunks=[])[tail] elif len(head) == 0: return self.copy( chunks=[self._chunks[0][(slice(0, 0), ) + tail]]) chunkid, head = self.global2chunkid(head, return_normalized=True) diff = (chunkid[1:] - chunkid[:-1]) if (diff >= 0).all(): diff2 = self.numpy.empty(len(chunkid), dtype=self.INDEXTYPE) diff2[0] = 1 diff2[1:] = diff mask = (diff2 > 0) offsets = list( self.numpy.nonzero(mask)[0]) + [len(chunkid)] chunks = [] for i, cid in enumerate(chunkid[mask]): localindex = head[ offsets[i]:offsets[i + 1]] - self.offsets[cid] chunks.append(self._chunks[cid][localindex]) return self.copy(chunks=chunks) elif self._util_isnumpy(self.type): out = self.numpy.empty((len(head), ) + self.type.shape[1:], dtype=self.type.dtype) self.knowchunksizes(chunkid.max() + 1) offsets = self.offsets for cid in self.numpy.unique(chunkid): mask = (chunkid == cid) out[mask] = self._chunks[cid][head[mask] - offsets[cid]] if tail == (): return out else: return out[(slice(None), ) + tail] elif tail == (): return self.IndexedArray(head, self) else: raise NotImplementedError elif len(head.shape) == 1 and issubclass(head.dtype.type, (bool, self.numpy.bool_)): if len(self) != len(head): raise IndexError( "boolean index did not match indexed array along dimension 0; dimension is {0} but corresponding boolean dimension is {1}" .format(len(self), len(head))) chunks = [] for chunk, slc in zip(self._chunks, self._slices()): x = chunk[head[slc]] if len(x) > 0: x = x[(slice(None), ) + tail] if len(x) > 0: chunks.append(x) return self.copy(chunks=chunks) else: raise TypeError( "cannot interpret shape {0}, dtype {1} as a fancy index or mask" .format(head.shape, head.dtype))
def __init__(self, *args, **kwds): raise TypeError( "{0} is an abstract base class; do not instantiate".format( type(self)))
def toparquet(where, obj, **options): import pyarrow.parquet options["where"] = where def convert(obj, message): if isinstance(obj, (awkward0.array.base.AwkwardArray, numpy.ndarray)): out = toarrow(obj) if isinstance(out, pyarrow.Table): return out else: return pyarrow.Table.from_batches( [pyarrow.RecordBatch.from_arrays([out], [""])]) else: raise TypeError(message) if isinstance(obj, awkward0.array.chunked.ChunkedArray): obj = iter(obj.chunks) try: awkitem = next(obj) except StopIteration: raise ValueError("iterable is empty") arritem = convert(awkitem, None) if "schema" not in options: options["schema"] = arritem.schema writer = pyarrow.parquet.ParquetWriter(**options) writer.write_table(arritem) try: while True: try: awkitem = next(obj) except StopIteration: break else: writer.write_table(convert(awkitem, None)) finally: writer.close() elif isinstance(obj, (awkward0.array.base.AwkwardArray, numpy.ndarray)): arritem = convert(obj, None) options["schema"] = arritem.schema writer = pyarrow.parquet.ParquetWriter(**options) writer.write_table(arritem) writer.close() else: try: obj = iter(obj) except TypeError: raise TypeError("cannot write {0} to Parquet file".format( type(obj))) try: awkitem = next(obj) except StopIteration: raise ValueError("iterable is empty") arritem = convert( awkitem, "cannot write iterator of {0} to Parquet file".format( type(awkitem))) if "schema" not in options: options["schema"] = arritem.schema writer = pyarrow.parquet.ParquetWriter(**options) writer.write_table(arritem) try: while True: try: awkitem = next(obj) except StopIteration: break else: writer.write_table( convert( awkitem, "cannot write iterator of {0} to Parquet file". format(type(awkitem)))) finally: writer.close()
def check_2_tuple_contents(two_tuple, one, two): assert type(two_tuple) is tuple assert len(two_tuple) == 2 assert all((two_tuple[0] == one).flatten()) assert all((two_tuple[1] == two).flatten())