Exemplo n.º 1
0
    def _encode_complex(self, obj, context):
        if callable(getattr(obj, "__awkward_serialize__", None)):
            return obj.__awkward_serialize__(self)

        if hasattr(obj, "tojson") and hasattr(type(obj), "fromjson"):
            try:
                return self.encode_call(self._obj2spec(type(obj).fromjson),
                                        self.encode_json(obj.tojson()))
            except:
                pass

        if isinstance(obj, numpy.ndarray):
            return self._encode_numpy(obj, context)

        if hasattr(obj,
                   "__module__") and (hasattr(obj, "__qualname__") or hasattr(
                       obj, "__name__")) and obj.__module__ != "__main__":
            try:
                return {"function": self._obj2spec(obj)}
            except:
                pass

        try:
            return self.encode_json(obj)
        except TypeError:
            pass

        try:
            return self.encode_python(obj)
        except:
            pass
Exemplo n.º 2
0
def typeof(obj):
    if obj is None:
        return None

    elif isinstance(obj, (bool, numpy.bool_)):
        return BoolFillable
    elif isinstance(obj, (numbers.Number, awkward0.numpy.number)):
        return NumberFillable
    elif isinstance(obj, bytes):
        return BytesFillable
    elif isinstance(obj, awkward0.util.string):
        return StringFillable

    elif isinstance(obj, dict):
        if any(not isinstance(x, str) for x in obj):
            raise TypeError("only dicts with str-typed keys may be converted")
        if len(obj) == 0:
            return None
        else:
            return set(obj)

    elif isinstance(obj, tuple) and hasattr(
            obj, "_fields") and obj._fields is type(obj)._fields:
        return obj._fields, type(obj)

    elif isinstance(obj, Iterable):
        return JaggedFillable

    else:
        return set(n for n in obj.__dict__ if not n.startswith("_")), type(obj)
Exemplo n.º 3
0
    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
        if "out" in kwargs:
            raise NotImplementedError("in-place operations not supported")

        if method != "__call__":
            return NotImplemented

        tokeep = None
        for x in inputs:
            if isinstance(x, MaskedArray):
                x._valid()
                if tokeep is None:
                    tokeep = x.boolmask(maskedwhen=False)
                else:
                    tokeep = tokeep & x.boolmask(maskedwhen=False)

        assert tokeep is not None

        inputs = list(inputs)
        for i in range(len(inputs)):
            if isinstance(inputs[i], IndexedMaskedArray):
                inputs[i] = inputs[i]._content[inputs[i]._mask[tokeep]]
            elif isinstance(inputs[i], MaskedArray):
                inputs[i] = inputs[i]._content[tokeep]
            elif isinstance(
                    inputs[i],
                (self.numpy.ndarray, awkward0.array.base.AwkwardArray)):
                inputs[i] = inputs[i][tokeep]
            else:
                try:
                    for first in inputs[i]:
                        break
                except TypeError:
                    pass
                else:
                    inputs[i] = self.numpy.array(inputs[i], copy=False)[tokeep]

        # compute only the non-masked elements
        result = getattr(ufunc, method)(*inputs, **kwargs)

        # put the masked out values back
        index = self.numpy.full(len(tokeep), -1, dtype=self.INDEXTYPE)
        index[tokeep] = self.numpy.arange(self.numpy.count_nonzero(tokeep))

        if isinstance(result, tuple):
            return tuple(
                self.Methods.maybemixin(type(x), IndexedMaskedArray)
                (index, x, maskedwhen=-1
                 ) if isinstance(x, (self.numpy.ndarray,
                                     awkward0.array.base.AwkwardArray)) else x
                for x in result)
        elif method == "at":
            return None
        else:
            return self.Methods.maybemixin(type(result),
                                           IndexedMaskedArray)(index,
                                                               result,
                                                               maskedwhen=-1)
Exemplo n.º 4
0
    def _prepare(self, ufunc, identity, dtype):
        if isinstance(self._content, awkward0.array.table.Table):
            out = self._content.copy(contents={})
            for n, x in self._content._contents.items():
                out[n] = self.copy(content=x)._prepare(ufunc, identity, dtype)
            return out

        if isinstance(self._content, self.numpy.ndarray):
            if dtype is None and issubclass(self._content.dtype.type,
                                            (bool, self.numpy.bool_)):
                dtype = self.numpy.dtype(type(identity))
            if dtype is None:
                content = self._content
            else:
                content = self._content.astype(dtype)
        else:
            content = self._content._prepare(ufunc, identity, dtype)

        if identity == self.numpy.inf:
            if issubclass(dtype.type, (bool, self.numpy.bool_)):
                identity = True
            elif self._util_isintegertype(dtype.type):
                identity = self.numpy.iinfo(dtype.type).max

        elif identity == -self.numpy.inf:
            if issubclass(dtype.type, (bool, self.numpy.bool_)):
                identity = False
            elif self._util_isintegertype(dtype.type):
                identity = self.numpy.iinfo(dtype.type).min

        out = self.numpy.full(self._mask.shape + content.shape[1:],
                              identity,
                              dtype=content.dtype)
        out[self.isunmasked] = content[self.mask[self.mask >= 0]]
        return out
Exemplo n.º 5
0
    def __call__(self, obj, context=""):
        out = self._encode_primitive(obj)

        if out is not None:
            return out

        if obj in self.seen:
            return {"ref": self.seen[obj]}
        else:
            ident = self.seen[obj]

        out = self._encode_complex(obj, context)
        if out is None:
            raise TypeError("failed to encode {0} (type: {1})".format(
                repr(obj), type(obj)))

        if "id" in out:
            if out["id"] is False:
                del self.seen[obj]
            elif out["id"] != self.seen[obj]:
                raise RuntimeError("unexpected id change")
        else:
            out["id"] = ident

        return out
Exemplo n.º 6
0
    def _prepare(self, ufunc, identity, dtype):
        self.knowchunksizes()
        out = None
        pos = 0
        for chunkid, chunk in enumerate(self._chunks):
            if self._chunksizes[chunkid] > 0:
                this = chunk[:self._chunksizes[chunkid]]
                if out is None:
                    if dtype is None and issubclass(this.dtype.type,
                                                    (bool, self.numpy.bool_)):
                        dtype = self.numpy.dtype(type(identity))
                    if dtype is None:
                        dtype = this.dtype
                    out = self.numpy.empty(
                        (sum(self._chunksizes), ) + this.shape[1:],
                        dtype=dtype)

                newpos = pos + this.shape[0]
                out[pos:newpos] = this
                pos = newpos

        if out is None:
            if dtype is None:
                dtype = self.DEFAULTTYPE
            return self.numpy.array([identity], dtype=dtype)
        else:
            return out
Exemplo n.º 7
0
 def _topandas(self, seen):
     import awkward0.pandas
     if id(self) in seen:
         return seen[id(self)]
     else:
         out = seen[id(self)] = self.VirtualArray(self._topandas_doit, (self,), cache=self.cache, persistentkey=self.persistentkey, type=self.type, nbytes=self.nbytes, persistvirtual=self.persistvirtual)
         out.__class__ = awkward0.pandas.mixin(type(self))
         return out
Exemplo n.º 8
0
 def maybemixin(sample, awkwardtype):
     if issubclass(sample, Methods):
         assert issubclass(sample, awkward0.array.base.AwkwardArray)
         allbases = tuple(x for x in sample.__bases__ if not issubclass(
             x, awkward0.array.base.AwkwardArray)) + (awkwardtype, )
         return type(awkwardtype.__name__ + "Methods", allbases, {})
     else:
         return awkwardtype
Exemplo n.º 9
0
 def _topandas(self, seen):
     import awkward0.pandas
     if id(self) in seen:
         return seen[id(self)]
     else:
         out = seen[id(self)] = self.copy()
         out.__class__ = awkward0.pandas.mixin(type(self))
         out._contents = [x._topandas(seen) if isinstance(x, awkward0.array.base.AwkwardArray) else x for x in out._contents]
         return out
Exemplo n.º 10
0
 def _topandas(self, seen):
     import awkward0.pandas
     if id(self) in seen:
         return seen[id(self)]
     else:
         out = seen[id(self)] = self.copy()
         out.__class__ = awkward0.pandas.mixin(type(self))
         out._contents = OrderedDict((n, x._topandas(seen) if isinstance(
             x, awkward0.array.base.AwkwardArray) else x)
                                     for n, x in out._contents.items())
         return out
Exemplo n.º 11
0
 def _prepare(self, ufunc, identity, dtype):
     array = self.array
     if isinstance(array, self.numpy.ndarray):
         if dtype is None and issubclass(array.dtype.type, (bool, self.numpy.bool_)):
             dtype = self.numpy.dtype(type(identity))
         if dtype is None:
             return array
         else:
             return array.astype(dtype)
     else:
         return array._prepare(ufunc, identity, dtype)
Exemplo n.º 12
0
        def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
            if "out" in kwargs:
                raise NotImplementedError("in-place operations not supported")

            if method != "__call__":
                return NotImplemented

            torow = not any(
                not isinstance(x, Table.Row) and isinstance(x, Iterable)
                for x in inputs)

            inputs = list(inputs)
            for i in range(len(inputs)):
                if isinstance(inputs[i], Table.Row):
                    inputs[i] = inputs[i]._table[inputs[i].
                                                 _index:inputs[i]._index + 1]

            result = getattr(ufunc, method)(*inputs, **kwargs)

            if torow:
                if isinstance(result, tuple):
                    out = []
                    for x in result:
                        if isinstance(x, Table):
                            out.append(
                                awkward0.array.objects.Methods.maybemixin(
                                    type(x), self._table.Table.Row)(x, 0))
                            out[-1]._table._showdict = True
                        else:
                            out.append(x)
                    return tuple(out)
                elif method == "at":
                    return None
                else:
                    out = awkward0.array.objects.Methods.maybemixin(
                        type(result), self._table.Table.Row)(result, 0)
                    out._table._showdict = True
                    return out

            else:
                return result
Exemplo n.º 13
0
    def dense(self):
        self._valid()

        if isinstance(self._content, self.numpy.ndarray):
            out = self.numpy.full(self.shape, self.default, dtype=self.dtype)
            if len(self._index) != 0:
                mask = self.boolmask(maskedwhen=True)
                out[mask] = self._content[self._inverse[mask]]
            return out

        else:
            raise NotImplementedError(type(self._content))
Exemplo n.º 14
0
 def _prepare(self, ufunc, identity, dtype):
     out = self.copy(contents={})
     for n, x in self._contents.items():
         if isinstance(x, self.numpy.ndarray):
             if dtype is None and issubclass(x.dtype.type,
                                             (bool, self.numpy.bool_)):
                 dtype = self.numpy.dtype(type(identity))
             if dtype is not None:
                 x = x.astype(dtype)
         else:
             x = x._prepare(ufunc, identity, dtype)
         out[n] = x
     return out
Exemplo n.º 15
0
 def __getattr__(self, where):
     if where in dir(super(AwkwardArray, self)):
         return super(AwkwardArray, self).__getattribute__(where)
     else:
         if where in self.columns:
             try:
                 return self[where]
             except Exception as err:
                 raise AttributeError(
                     "while trying to get column {0}, an exception occurred:\n{1}: {2}"
                     .format(repr(where), type(err), str(err)))
         else:
             raise AttributeError("no column named {0}".format(repr(where)))
Exemplo n.º 16
0
    def _prepare(self, ufunc, identity, dtype):
        if isinstance(self._content, awkward0.array.table.Table):
            out = self._content.copy(contents={})
            for n, x in self._content._contents.items():
                out[n] = self.copy(content=x)._prepare(ufunc, identity, dtype)
            return out

        if isinstance(self._content, self.numpy.ndarray):
            if dtype is None and issubclass(self._content.dtype.type,
                                            (bool, self.numpy.bool_)):
                dtype = self.numpy.dtype(type(identity))
            if ufunc is None:
                content = self.numpy.zeros(self._content.shape,
                                           dtype=self.numpy.float32)
                content[self.numpy.isnan(self._content)] = self.numpy.nan
            elif ufunc is self.numpy.count_nonzero:
                content = self.numpy.ones(self._content.shape,
                                          dtype=self.numpy.int8)
                content[self.numpy.isnan(self._content)] = 0
                content[self._content == 0] = 0
            elif dtype is None:
                content = self._content
            else:
                content = self._content.astype(dtype)
        else:
            content = self._content._prepare(ufunc, identity, dtype)

        if content is self._content or not content.flags.owndata:
            content = content.copy()

        if ufunc is None:
            content[self.ismasked] = self.numpy.nan

        else:
            dtype = content.dtype

            if identity == self.numpy.inf:
                if issubclass(dtype.type, (bool, self.numpy.bool_)):
                    identity = True
                elif self._util_isintegertype(dtype.type):
                    identity = self.numpy.iinfo(dtype.type).max
            elif identity == -self.numpy.inf:
                if issubclass(dtype.type, (bool, self.numpy.bool_)):
                    identity = False
                elif self._util_isintegertype(dtype.type):
                    identity = self.numpy.iinfo(dtype.type).min

            content[self.ismasked] = identity

        return content
Exemplo n.º 17
0
 def valid(self, exception=False, message=False):
     try:
         self._valid()
     except Exception as err:
         if exception:
             raise err
         elif message:
             return "{0}: {1}".format(type(err), str(err))
         else:
             return False
     else:
         if message:
             return None
         else:
             return True
Exemplo n.º 18
0
    def _concatenate_axis0(cls, tables):
        for i in range(len(tables) - 1):
            if set(tables[i]._contents) != set(tables[i + 1]._contents):
                raise ValueError(
                    "cannot concatenate Tables with different fields")

        out = tables[0].deepcopy(contents=OrderedDict())

        for n in tables[0]._contents:
            content_type = type(tables[0]._contents[n])
            if content_type == cls.numpy.ndarray:
                concatenate = cls.numpy.concatenate
            else:
                concatenate = content_type.concatenate

            out._contents[n] = concatenate([t[n] for t in tables], axis=0)

        out._valid()
        return out
Exemplo n.º 19
0
def _normalize_arrays(cls, arrays):
    length = None
    for i in range(len(arrays)):
        if isinstance(arrays[i], Iterable):
            if length is None:
                length = len(arrays[i])
                break
    if length is None:
        raise TypeError(
            "cannot construct an array if all arguments are scalar")

    arrays = list(arrays)
    jaggedtype = [cls.awkward0.JaggedArray] * len(arrays)
    starts, stops = None, None
    for i in range(len(arrays)):
        if starts is None and isinstance(arrays[i], cls.awkward0.JaggedArray):
            starts, stops = arrays[i].starts, arrays[i].stops

        if isinstance(arrays[i], cls.awkward0.JaggedArray):
            jaggedtype[i] = type(arrays[i])

        if not isinstance(arrays[i], Iterable):
            arrays[i] = cls.awkward0.numpy.full(length, arrays[i])

        arrays[i] = cls.awkward0.util.toarray(arrays[i],
                                              cls.awkward0.numpy.float64)

    if starts is None:
        return arrays

    for i in range(len(arrays)):
        if not isinstance(arrays[i], cls.awkward0.JaggedArray) or not (
                cls.awkward0.numpy.array_equal(starts, arrays[i].starts)
                and cls.awkward0.numpy.array_equal(stops, arrays[i].stops)):
            content = cls.awkward0.numpy.zeros(
                stops.max(), dtype=cls.awkward0.numpy.float64)
            arrays[i] = jaggedtype[i](starts, stops, content) + arrays[
                i]  # invoke jagged broadcasting to align arrays

    return arrays
Exemplo n.º 20
0
    def __getitem__(self, where):
        self._valid()

        if self._util_isstringslice(where):
            content = self._content[where]
            cls = awkward0.array.objects.Methods.maybemixin(
                type(content), self.BitMaskedArray)
            out = cls.__new__(cls)
            out.__dict__.update(self.__dict__)
            out._content = content
            return out

        if isinstance(where, tuple) and len(where) == 0:
            return self
        if not isinstance(where, tuple):
            where = (where, )
        head, tail = where[0], where[1:]

        if self._util_isinteger(head):
            if self._maskwhere(head) == self._maskedwhen:
                if tail != ():
                    raise ValueError(
                        "masked element ({0}) is not subscriptable".format(
                            self.masked))
                return self.masked
            else:
                return self._content[(head, ) + tail]

        else:
            mask = self._maskwhere(head)
            if tail != () and ((self._maskedwhen and mask.any()) or
                               (not self._maskedwhen and not mask.all())):
                raise ValueError(
                    "masked element ({0}) is not subscriptable".format(
                        self.masked))
            else:
                return self.copy(mask=self.bool2bit(mask,
                                                    lsborder=self._lsborder),
                                 content=self._content[(head, ) + tail],
                                 lsborder=self._lsborder)
Exemplo n.º 21
0
    def _prepare(self, ufunc, identity, dtype):
        if dtype is None and issubclass(self.dtype.type, (bool, self.numpy.bool_)):
            dtype = self.numpy.dtype(type(identity))
        if dtype is None:
            dtype = self.dtype

        out = None
        index = self._index[:len(self._tags)]
        for tag, content in enumerate(self._contents):
            if not isinstance(content, self.numpy.ndarray):
                content = content._prepare(ufunc, identity, dtype)

            if not isinstance(content, self.numpy.ndarray):
                raise TypeError("cannot reduce a UnionArray of non-primitive type")

            mask = (self._tags == tag)
            c = content[index[mask]]

            if out is None:
                out = self.numpy.full(self._tags.shape[:1] + c.shape[1:], identity, dtype=dtype)
            out[mask] = c

        return out
Exemplo n.º 22
0
    def __getitem__(self, where):
        self._valid()

        if self._util_isstringslice(where):
            content = self._content[where]
            cls = awkward0.array.objects.Methods.maybemixin(
                type(content), self.IndexedMaskedArray)
            out = cls.__new__(cls)
            out.__dict__.update(self.__dict__)
            out._content = content
            return out

        if isinstance(where, tuple) and len(where) == 0:
            return self
        if not isinstance(where, tuple):
            where = (where, )
        head, tail = where[0], where[1:]

        if self._util_isinteger(head):
            maskindex = self._mask[head]
            if maskindex == self._maskedwhen:
                if tail != ():
                    raise ValueError(
                        "masked element ({0}) is not subscriptable".format(
                            self.masked))
                return self.masked
            else:
                return self._content[(maskindex, ) + tail]

        else:
            maskindex = self._mask[head]
            if tail != () and (maskindex == self._maskedwhen).any():
                raise ValueError(
                    "masked element ({0}) is not subscriptable".format(
                        self.masked))
            else:
                return self.copy(mask=maskindex)
Exemplo n.º 23
0
    def __getitem__(self, where):
        self._valid()

        if self._util_isstringslice(where):
            content = self._content[where]
            cls = awkward0.array.objects.Methods.maybemixin(
                type(content), self.IndexedArray)
            out = cls.__new__(cls)
            out.__dict__.update(self.__dict__)
            out._content = content
            return out

        if isinstance(where, tuple) and len(where) == 0:
            return self
        if not isinstance(where, tuple):
            where = (where, )
        head, tail = where[:len(self._index.shape)], where[len(self._index.
                                                               shape):]

        head = self._index[head]
        if len(head.shape) != 0 and len(head) == 0:
            return self.numpy.empty(0, dtype=self._content.dtype)[tail]
        else:
            return self._content[(head, ) + tail]
Exemplo n.º 24
0
    def concatenate(isclassmethod, cls_or_self, arrays, axis=0):
        if len(arrays) < 1:
            raise ValueError("at least one array needed to concatenate")

        if isclassmethod:
            cls = cls_or_self
        else:
            self = cls_or_self
            cls = type(self)
            arrays = (self, ) + tuple(arrays)

        def resolve(t):
            for b in t.__bases__:
                if issubclass(t, AwkwardArray):
                    return resolve(b)
            else:
                return t

        if all(type(x) == cls.numpy.ndarray for x in arrays):
            return cls.numpy.concatenate(arrays, axis=axis)

        if not all(
                resolve(type(x)) == resolve(type(arrays[0])) for x in arrays):
            if axis == 0:
                tags = cls.numpy.concatenate([
                    cls.numpy.full(len(x), i, dtype=cls.TAGTYPE)
                    for i, x in enumerate(arrays)
                ])
                return cls.UnionArray.fget(None).fromtags(tags, arrays)
            else:
                raise NotImplementedError("axis > 0 for different types")

        for x in arrays:
            x.valid()

        if axis == 0:
            return type(arrays[0])._concatenate_axis0(arrays)
        elif axis == 1:
            return type(arrays[0])._concatenate_axis1(arrays)
        else:
            raise NotImplementedError("axis > 1")
Exemplo n.º 25
0
 def mixin(methods, awkwardtype):
     assert not issubclass(methods, awkward0.array.base.AwkwardArray)
     assert not issubclass(awkwardtype, Methods)
     return type(awkwardtype.__name__ + "Methods", (methods, awkwardtype),
                 {})
Exemplo n.º 26
0
    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
        if "out" in kwargs:
            raise NotImplementedError("in-place operations not supported")

        if method != "__call__":
            return NotImplemented

        first = None
        rest = []
        for x in inputs:
            if isinstance(x, ChunkedArray):
                x._valid()
                if first is None:
                    first = x
                else:
                    rest.append(x)

        assert first is not None
        if not all(first._aligned(x) for x in rest):
            # FIXME: we may need to handle a more general case if ChunkedArrays are inside other Awkward types
            # perhaps split at the largest possible slices such that all of them are one chunk each, and then unpack the single chunk after slicing
            raise ValueError(
                "ChunkedArrays can only be combined if they have the same chunk sizes"
            )

        batches = []
        for i, slc in enumerate(first._slices()):
            batch = []
            for x in inputs:
                if isinstance(x, ChunkedArray):
                    batch.append(x._chunks[i])
                elif isinstance(
                        x,
                    (self.numpy.ndarray, awkward0.array.base.AwkwardArray)):
                    batch.append(x[slc])
                else:
                    batch.append(x)
            batches.append(batch)

        out = None
        chunks = {}
        types = {}
        for batch in batches:
            result = getattr(ufunc, method)(*batch, **kwargs)

            if isinstance(result, tuple):
                if out is None:
                    out = list(result)
                for i, x in enumerate(result):
                    if isinstance(x, (self.numpy.ndarray,
                                      awkward0.array.base.AwkwardArray)):
                        if i not in chunks:
                            chunks[i] = []
                        chunks[i].append(x)
                        types[i] = type(x)

            elif method == "at":
                pass

            else:
                if isinstance(
                        result,
                    (self.numpy.ndarray, awkward0.array.base.AwkwardArray)):
                    if None not in chunks:
                        chunks[None] = []
                    chunks[None].append(result)
                    types[None] = type(result)

        if out is None:
            if None in chunks:
                return self.Methods.maybemixin(types[None],
                                               ChunkedArray)(chunks[None])
            else:
                return None
        else:
            for i in range(len(out)):
                if i in chunks:
                    out[i] = self.Methods.maybemixin(types[i],
                                                     ChunkedArray)(chunks[i])
            return tuple(out)
Exemplo n.º 27
0
    def __getitem__(self, where):
        import awkward0.array.virtual

        self._valid()

        if self._util_isstringslice(where):
            chunks = []
            chunksizes = []
            for chunk in self._chunks:
                chunks.append(chunk[where])
                chunksizes.append(len(chunks[-1]))
            if len(chunks) == 0:
                return self.copy(chunks=chunks, chunksizes=chunksizes)
            else:
                return awkward0.array.objects.Methods.maybemixin(
                    type(chunks[0]), self.ChunkedArray)(chunks,
                                                        chunksizes=chunksizes)

        if isinstance(where, tuple) and len(where) == 0:
            return self
        if not isinstance(where, tuple):
            where = (where, )
        head, tail = where[0], where[1:]

        if isinstance(head, ChunkedArray):
            if not self._aligned(head):
                raise ValueError(
                    "A ChunkedArray can only be used as a slice of a ChunkedArray if they have the same chunk sizes"
                )
            chunks = []
            chunksizes = []
            for c, h in zip(self.chunks, head.chunks):
                if isinstance(c, awkward0.array.virtual.VirtualArray):
                    c = c.array
                if isinstance(h, awkward0.array.virtual.VirtualArray):
                    h = h.array
                chunks.append(c[(h, ) + tail])
                chunksizes.append(len(chunks[-1]))
            return self.copy(chunks=chunks, chunksizes=chunksizes)

        if self._util_isinteger(head):
            chunk, localhead = self.global2local(head)
            return chunk[(localhead, ) + tail]

        elif isinstance(head, slice):
            if head.step == 0:
                raise ValueError("slice step cannot be zero")
            elif (head.start is None or head.start >= 0) and (
                    head.stop is not None and
                    head.stop >= 0) and (head.step is None or head.step > 0):
                # case A
                start, stop, step = head.start, head.stop, head.step
                if start is None:
                    start = 0
                if step is None:
                    step = 1
            elif (head.start is not None and head.start >= 0) and (
                    head.stop is None
                    or head.stop >= 0) and (head.step is not None
                                            and head.step < 0):
                # case B
                start, stop, step = head.start, head.stop, head.step
                if stop is None:
                    stop = -1
            else:
                # case C (requires potentially expensive len(self))
                start, stop, step = head.indices(len(self))

            # if step > 0, stop can be len(self)
            # if step < 0, stop can be -1 (not a Python "negative index", but an indicator to go all the way to 0)

            if start == -1:
                # case C start below 0
                start_chunkid = -1
            else:
                try:
                    start_chunkid = self.global2chunkid(start)
                except IndexError:
                    if start >= 0:
                        # case A or B start was set beyond len(self), clamp it
                        start, start_chunkid = len(self), len(self._chunks)
                    if step < 0:
                        start -= 1
                        start_chunkid -= 1

            if stop == -1:
                # case B or C stop not set with step < 0; go all the way to 0
                stop_chunkid = -1
            else:
                try:
                    stop_chunkid = self.global2chunkid(stop)
                except IndexError:
                    # stop is at or beyond len(self), clamp it
                    stop = len(self)
                if step > 0:
                    # we want the chunkid at or to the right of stop (no -1)
                    stop_chunkid = min(
                        self.numpy.searchsorted(self.offsets, stop, "right"),
                        len(self._chunks))
                else:
                    # we want the chunkid to the left of stop
                    stop_chunkid = max(
                        self.numpy.searchsorted(self.offsets, stop, "right") -
                        2, -1)

            offsets = self.offsets
            chunks = []
            skip = 0
            for chunkid in range(start_chunkid, stop_chunkid,
                                 1 if step > 0 else -1):
                # set the local_start
                if chunkid == start_chunkid:
                    local_start = start - offsets[chunkid]
                else:
                    if step > 0:
                        local_start = skip
                    else:
                        local_start = self._chunksizes[chunkid] - 1 - skip

                if local_start < 0:
                    # skip is bigger than this entire chunk
                    skip -= self._chunksizes[chunkid]
                    continue

                # set the local_stop and new skip
                if chunkid == stop_chunkid - (1 if step > 0 else -1):
                    if stop == -1:
                        local_stop = None
                    else:
                        local_stop = stop - offsets[chunkid]
                else:
                    local_stop = None
                    if step > 0:
                        skip = (local_start - self._chunksizes[chunkid]) % step
                    else:
                        skip = (-1 - local_start) % -step

                # add a sliced chunk
                chunk = self._chunks[chunkid][(slice(local_start, local_stop,
                                                     step), )]
                if len(chunk) > 0:
                    chunk = chunk[(slice(None), ) + tail]
                    if len(chunk) > 0:
                        chunks.append(chunk)

            if len(chunks) == 0 and len(self._chunks) > 0:
                chunks.append(
                    self._chunks[0][(slice(0, 0), ) +
                                    tail])  # so that sliced.type == self.type

            return self.copy(chunks=chunks)

        else:
            head = self.numpy.array(head, copy=False)
            if len(head.shape) == 1 and self._util_isintegertype(
                    head.dtype.type):
                if len(head) == 0 and len(self._chunks) == 0:
                    return self.copy(chunks=[])[tail]
                elif len(head) == 0:
                    return self.copy(
                        chunks=[self._chunks[0][(slice(0, 0), ) + tail]])

                chunkid, head = self.global2chunkid(head,
                                                    return_normalized=True)

                diff = (chunkid[1:] - chunkid[:-1])
                if (diff >= 0).all():
                    diff2 = self.numpy.empty(len(chunkid),
                                             dtype=self.INDEXTYPE)
                    diff2[0] = 1
                    diff2[1:] = diff
                    mask = (diff2 > 0)
                    offsets = list(
                        self.numpy.nonzero(mask)[0]) + [len(chunkid)]
                    chunks = []
                    for i, cid in enumerate(chunkid[mask]):
                        localindex = head[
                            offsets[i]:offsets[i + 1]] - self.offsets[cid]
                        chunks.append(self._chunks[cid][localindex])
                    return self.copy(chunks=chunks)

                elif self._util_isnumpy(self.type):
                    out = self.numpy.empty((len(head), ) + self.type.shape[1:],
                                           dtype=self.type.dtype)
                    self.knowchunksizes(chunkid.max() + 1)
                    offsets = self.offsets

                    for cid in self.numpy.unique(chunkid):
                        mask = (chunkid == cid)
                        out[mask] = self._chunks[cid][head[mask] -
                                                      offsets[cid]]

                    if tail == ():
                        return out
                    else:
                        return out[(slice(None), ) + tail]

                elif tail == ():
                    return self.IndexedArray(head, self)

                else:
                    raise NotImplementedError

            elif len(head.shape) == 1 and issubclass(head.dtype.type,
                                                     (bool, self.numpy.bool_)):
                if len(self) != len(head):
                    raise IndexError(
                        "boolean index did not match indexed array along dimension 0; dimension is {0} but corresponding boolean dimension is {1}"
                        .format(len(self), len(head)))

                chunks = []
                for chunk, slc in zip(self._chunks, self._slices()):
                    x = chunk[head[slc]]
                    if len(x) > 0:
                        x = x[(slice(None), ) + tail]
                        if len(x) > 0:
                            chunks.append(x)

                return self.copy(chunks=chunks)

            else:
                raise TypeError(
                    "cannot interpret shape {0}, dtype {1} as a fancy index or mask"
                    .format(head.shape, head.dtype))
Exemplo n.º 28
0
 def __init__(self, *args, **kwds):
     raise TypeError(
         "{0} is an abstract base class; do not instantiate".format(
             type(self)))
Exemplo n.º 29
0
def toparquet(where, obj, **options):
    import pyarrow.parquet

    options["where"] = where

    def convert(obj, message):
        if isinstance(obj, (awkward0.array.base.AwkwardArray, numpy.ndarray)):
            out = toarrow(obj)
            if isinstance(out, pyarrow.Table):
                return out
            else:
                return pyarrow.Table.from_batches(
                    [pyarrow.RecordBatch.from_arrays([out], [""])])
        else:
            raise TypeError(message)

    if isinstance(obj, awkward0.array.chunked.ChunkedArray):
        obj = iter(obj.chunks)
        try:
            awkitem = next(obj)
        except StopIteration:
            raise ValueError("iterable is empty")

        arritem = convert(awkitem, None)
        if "schema" not in options:
            options["schema"] = arritem.schema
        writer = pyarrow.parquet.ParquetWriter(**options)
        writer.write_table(arritem)

        try:
            while True:
                try:
                    awkitem = next(obj)
                except StopIteration:
                    break
                else:
                    writer.write_table(convert(awkitem, None))
        finally:
            writer.close()

    elif isinstance(obj, (awkward0.array.base.AwkwardArray, numpy.ndarray)):
        arritem = convert(obj, None)
        options["schema"] = arritem.schema
        writer = pyarrow.parquet.ParquetWriter(**options)
        writer.write_table(arritem)
        writer.close()

    else:
        try:
            obj = iter(obj)
        except TypeError:
            raise TypeError("cannot write {0} to Parquet file".format(
                type(obj)))
        try:
            awkitem = next(obj)
        except StopIteration:
            raise ValueError("iterable is empty")

        arritem = convert(
            awkitem, "cannot write iterator of {0} to Parquet file".format(
                type(awkitem)))
        if "schema" not in options:
            options["schema"] = arritem.schema
        writer = pyarrow.parquet.ParquetWriter(**options)
        writer.write_table(arritem)

        try:
            while True:
                try:
                    awkitem = next(obj)
                except StopIteration:
                    break
                else:
                    writer.write_table(
                        convert(
                            awkitem,
                            "cannot write iterator of {0} to Parquet file".
                            format(type(awkitem))))
        finally:
            writer.close()
Exemplo n.º 30
0
 def check_2_tuple_contents(two_tuple, one, two):
     assert type(two_tuple) is tuple
     assert len(two_tuple) == 2
     assert all((two_tuple[0] == one).flatten())
     assert all((two_tuple[1] == two).flatten())