Exemplo n.º 1
0
def index_from_optional_constructor(
        value: IndexInitializer,
        *,
        default_constructor: IndexConstructor,
        explicit_constructor: tp.Optional[IndexConstructor] = None,
        ) -> IndexBase:
    '''
    Given a value that is an IndexInitializer (which means it might be an Index), determine if that value is really an Index, and if so, determine if a copy has to be made; otherwise, use the default_constructor. If an explicit_constructor is given, that is always used.
    '''
    # NOTE: this might return an own_index flag to show callers when a new index has been created

    if explicit_constructor:
        return explicit_constructor(value)

    # default constructor could be a function with a STATIC attribute
    if isinstance(value, IndexBase):
        # if default is STATIC, and value is not STATIC, get an immutabel
        if is_static(default_constructor): # type: ignore
            if not value.STATIC:
                # v: ~S, dc: S, use immutable alternative
                return value._IMMUTABLE_CONSTRUCTOR(value)
            # v: S, dc: S, both immutable
            return value
        else: # default constructor is mutable
            if not value.STATIC:
                # v: ~S, dc: ~S, both are mutable
                return value.copy()
            # v: S, dc: ~S, return a mutable version of something that is not mutable
            return default_constructor(value)

    # cannot always deterine satic status from constructors; fallback on using default constructor
    return default_constructor(value)
Exemplo n.º 2
0
    def __init__(self,
                 labels: IndexInitializer,
                 *,
                 loc_is_iloc: bool = False,
                 name: tp.Hashable = None,
                 dtype: DtypeSpecifier = None) -> None:

        self._recache = False
        self._map = None

        positions = None

        # NOTE: this will not, and shold not, catch IndexHierarchy
        if issubclass(labels.__class__, Index):
            # get a reference to the immutable arrays
            # even if this is an IndexGO index, we can take the cached arrays, assuming they are up to date
            if labels.STATIC:  # can take the map
                self._map = labels._map
            if labels._recache:
                labels._update_array_cache()

            positions = labels._positions
            loc_is_iloc = labels._loc_is_iloc

            if name is None and labels.name is not None:
                name = labels.name  # immutable, so no copy necessary

            labels = labels._labels

        self._name = name if name is None else name_filter(name)

        if self._map is None:
            self._map = self._get_map(labels, positions)

        if dtype is None:
            dtype_extract = self._DTYPE  # set in specialized Index classes
        else:  # dtype is not None
            if self._DTYPE is not None and dtype != self._DTYPE:
                raise RuntimeError('invalid dtype argument for this Index',
                                   dtype, self._DTYPE)
            # self._DTYPE is None, dtype is not None, use dtype
            dtype_extract = dtype

        # this might be NP array, or a list, depending on if static or grow only
        self._labels = self._extract_labels(self._map, labels, dtype_extract)
        self._positions = self._extract_positions(self._map, positions)

        if self._DTYPE and self._labels.dtype != self._DTYPE:
            raise RuntimeError('invalid label dtype for this Index',
                               self._labels.dtype, self._DTYPE)
        if len(self._map) != len(self._labels):
            raise KeyError('labels have non-unique values')

        # NOTE: automatic discovery is possible, but not yet implemented
        self._loc_is_iloc = loc_is_iloc
Exemplo n.º 3
0
    def __init__(self,
            labels: IndexInitializer,
            loc_is_iloc: bool=False,
            dtype: DtypeSpecifier=None
            ) -> None:

        '''
        Args:
            labels: Initializer of the Index. If an Index or IndexGO object is provided, appropriate usage of those objects is implemented. An Index cannot be initialized from an IndexHierarhy directly (instead, pass the indices lables via .values).
        '''
        self._recache = False
        self._map = None

        positions = None

        # NOTE: this will not, and shold not, catch IndexHierarchy
        if issubclass(labels.__class__, Index):
            # get a reference to the immutable arrays
            # even if this is an IndexGO index, we can take the cached arrays, assuming they are up to date
            if labels.STATIC: # can take the map
                self._map = labels._map
            if labels._recache:
                labels._update_array_cache()

            positions = labels._positions
            loc_is_iloc = labels._loc_is_iloc
            labels = labels._labels

        if self._map is None:
            self._map = self._get_map(labels, positions)

        # this might be NP array, or a list, depending on if static or grow only
        self._labels = self._extract_labels(self._map, labels, dtype)
        self._positions = self._extract_positions(self._map, positions)

        if self._DTYPE and self._labels.dtype != self._DTYPE:
            raise Exception('invalide label dtype for this index',
                    self._labels.dtype, self._DTYPE)
        if len(self._map) != len(self._labels):
            raise KeyError('labels have non-unique values')

        # NOTE: automatic discovery is possible, but not yet implemented
        self._loc_is_iloc = loc_is_iloc
Exemplo n.º 4
0
    def __init__(self,
            labels: IndexInitializer,
            *,
            loc_is_iloc: bool = False,
            name: tp.Hashable = None,
            dtype: DtypeSpecifier = None
            ) -> None:

        self._recache = False
        self._map = None
        positions = None

        # resolve the targetted labels dtype, by lookin at the class attr _DTYPE and/or the passed dtype argument
        if dtype is None:
            dtype_extract = self._DTYPE # set in some specialized Index classes
        else: # passed dtype is not None
            if self._DTYPE is not None and dtype != self._DTYPE:
                raise RuntimeError('invalid dtype argument for this Index',
                        dtype, self._DTYPE)
            # self._DTYPE is None, passed dtype is not None, use dtype
            dtype_extract = dtype

        # handle all Index subclasses
        # check isinstance(labels, IndexBase)
        if issubclass(labels.__class__, IndexBase):
            if labels._recache:
                labels._update_array_cache()
            if name is None and labels.name is not None:
                name = labels.name # immutable, so no copy necessary
            if labels.depth == 1: # not an IndexHierarchy
                if labels.STATIC: # can take the map
                    self._map = labels._map
                # get a reference to the immutable arrays, even if this is an IndexGO index, we can take the cached arrays, assuming they are up to date
                positions = labels._positions
                loc_is_iloc = labels._loc_is_iloc
                labels = labels._labels
            else: # IndexHierarchy
                # will be a generator of tuples; already updated caches
                labels = array2d_to_tuples(labels._labels)
        elif hasattr(labels, 'values'):
            # it is a Series or similar
            array = labels.values
            if array.ndim == 1:
                labels = array
            else:
                labels = array2d_to_tuples(array)

        if self._DTYPE is not None:
            # do not need to check arrays, as will and checked to match dtype_extract in _extract_labels
            if not isinstance(labels, np.ndarray):
                # for now, assume that if _DTYPE is defined, we have a date
                labels = (to_datetime64(v, dtype_extract) for v in labels)
            else: # coerce to target type
                labels = labels.astype(dtype_extract)

        self._name = name if name is None else name_filter(name)

        if self._map is None:
            self._map = self._get_map(labels, positions)

        # this might be NP array, or a list, depending on if static or grow only; if an array, dtype will be compared with passed dtype_extract
        self._labels = self._extract_labels(self._map, labels, dtype_extract)
        self._positions = self._extract_positions(self._map, positions)

        if self._DTYPE and self._labels.dtype != self._DTYPE:
            raise RuntimeError('invalid label dtype for this Index',
                    self._labels.dtype, self._DTYPE)
        if len(self._map) != len(self._labels):
            raise KeyError(f'labels ({len(self._labels)}) have non-unique values ({len(self._map)})')

        # NOTE: automatic discovery is possible, but not yet implemented
        self._loc_is_iloc = loc_is_iloc
Exemplo n.º 5
0
    def __init__(self,
                 values: SeriesInitializer,
                 *,
                 index: IndexInitializer = None,
                 dtype: DtypeSpecifier = None,
                 own_index: bool = False) -> None:
        #-----------------------------------------------------------------------
        # values assignment

        values_constructor = None  # if deferred

        # expose .values directly as it is immutable
        if not isinstance(values, np.ndarray):
            if isinstance(values, dict):
                # not sure if we should sort; not sure what to do if index is provided
                if index is not None:
                    raise Exception(
                        'cannot create a Series from a dictionary when an index is defined'
                    )
                index = []

                def values_gen():
                    for k, v in _dict_to_sorted_items(values):
                        # populate index as side effect of iterating values
                        index.append(k)
                        yield v

                if dtype and dtype != object:
                    # fromiter does not work with object types
                    self.values = np.fromiter(values_gen(),
                                              dtype=dtype,
                                              count=len(values))
                else:
                    self.values = np.array(tuple(values_gen()), dtype=dtype)
                self.values.flags.writeable = False

            # NOTE: not sure if we need to check __iter__ here
            elif (dtype and dtype != object and dtype != str
                  and hasattr(values, '__iter__')
                  and hasattr(values, '__len__')):
                self.values = np.fromiter(values,
                                          dtype=dtype,
                                          count=len(values))
                self.values.flags.writeable = False
            elif hasattr(values, '__len__') and not isinstance(values, str):
                self.values = np.array(values, dtype=dtype)
                self.values.flags.writeable = False
            elif hasattr(values, '__next__'):  # a generator-like
                self.values = np.array(tuple(values), dtype=dtype)
                self.values.flags.writeable = False
            else:  # it must be a single item
                # we cannot create the values until we realize the index, which might be hierarchical and not have final size equal to length
                def values_constructor(shape):
                    self.values = np.full(shape, values, dtype=dtype)
                    self.values.flags.writeable = False
        else:  # is numpy
            if dtype is not None and dtype != values.dtype:
                # what to do here?
                raise Exception(
                    'when supplying values via array, the dtype argument is not necessary; if provided, it must agree with the dtype of the array'
                )
            if values.shape == ():  # handle special case of NP element

                def values_constructor(shape):
                    self.values = np.repeat(values, shape)
                    self.values.flags.writeable = False
            else:
                self.values = immutable_filter(values)

        #-----------------------------------------------------------------------
        # index assignment
        # NOTE: this generally must be done after values assignment, as from_items needs a values generator to be exhausted before looking to values

        if index is None or (hasattr(index, '__len__') and len(index) == 0):
            # create an integer index
            self._index = Index(range(len(self.values)), loc_is_iloc=True)
        elif own_index:
            self._index = index
        elif hasattr(index, 'STATIC'):
            if index.STATIC:
                self._index = index
            else:
                raise Exception(
                    'non-static index cannot be assigned to Series')
        else:  # let index handle instantiation
            if isinstance(index, (Index, IndexHierarchy)):
                # call with the class of the passed-in index, in case it is hierarchical
                self._index = index.__class__(index)
            else:
                self._index = Index(index)

        shape = self._index.__len__()

        if values_constructor:
            values_constructor(shape)  # updates self.values

        if len(self.values) != shape:
            raise Exception('values and index do not match length')
Exemplo n.º 6
0
    def __init__(self,
                 labels: IndexInitializer,
                 *,
                 loc_is_iloc: bool = False,
                 name: NameType = NAME_DEFAULT,
                 dtype: DtypeSpecifier = None) -> None:

        self._recache: bool = False
        self._map: tp.Optional[FrozenAutoMap] = None

        positions = None
        is_typed = self._DTYPE is not None

        # resolve the targetted labels dtype, by lookin at the class attr _DTYPE and/or the passed dtype argument
        if dtype is None:
            dtype_extract = self._DTYPE  # set in some specialized Index classes
        else:  # passed dtype is not None
            if is_typed and dtype != self._DTYPE:
                # NOTE: should never get to this branch, as derived Index classes that set _DTYPE remove dtype from __init__
                raise ErrorInitIndex('invalid dtype argument for this Index',
                                     dtype, self._DTYPE)  #pragma: no cover
            # self._DTYPE is None, passed dtype is not None, use dtype
            dtype_extract = dtype

        #-----------------------------------------------------------------------
        # handle all Index subclasses
        if isinstance(labels, IndexBase):
            if labels._recache:
                labels._update_array_cache()
            if name is NAME_DEFAULT:
                name = labels.name  # immutable, so no copy necessary
            if isinstance(labels, Index):  # not an IndexHierarchy
                if (labels.STATIC and self.STATIC and dtype is None):
                    if not is_typed or (is_typed
                                        and self._DTYPE == labels.dtype):
                        # can take the map if static and if types in the dict are the same as those in the labels (or to become the labels after conversion)
                        self._map = labels._map
                # get a reference to the immutable arrays, even if this is an IndexGO index, we can take the cached arrays, assuming they are up to date; for datetime64 indices, we might need to translate to a different type
                positions = labels._positions
                loc_is_iloc = labels._map is None
                labels = labels._labels
            else:  # IndexHierarchy
                # will be a generator of tuples; already updated caches
                labels = array2d_to_tuples(labels.__iter__())
        elif isinstance(labels, ContainerOperand):
            # it is a Series or similar
            array = labels.values
            if array.ndim == 1:
                labels = array
            else:
                labels = array2d_to_tuples(array)
        # else: assume an iterable suitable for labels usage

        #-----------------------------------------------------------------------
        if is_typed:
            # do not need to check arrays, as will and checked to match dtype_extract in _extract_labels
            if not isinstance(labels, np.ndarray):
                # for now, assume that if _DTYPE is defined, we have a date
                labels = (to_datetime64(v, dtype_extract) for v in labels)
            # coerce to target type
            elif labels.dtype != dtype_extract:
                labels = labels.astype(dtype_extract)
                labels.flags.writeable = False  #type: ignore

        self._name = None if name is NAME_DEFAULT else name_filter(name)

        if self._map is None:  # if _map not shared from another Index
            if not loc_is_iloc:
                try:
                    self._map = FrozenAutoMap(
                        labels) if self.STATIC else AutoMap(labels)
                except ValueError:  # Automap will raise ValueError of non-unique values are encountered
                    pass
                if self._map is None:
                    raise ErrorInitIndex(
                        f'labels ({len(tuple(labels))}) have non-unique values ({len(set(labels))})'
                    )
                size = len(self._map)
            else:  # must assume labels are unique
                # labels must not be a generator, but we assume that internal clients that provided loc_is_iloc will not give a generator
                size = len(labels)  #type: ignore
                if positions is None:
                    positions = PositionsAllocator.get(size)
        else:  # map shared from another Index
            size = len(self._map)

        # this might be NP array, or a list, depending on if static or grow only; if an array, dtype will be compared with passed dtype_extract
        self._labels = self._extract_labels(self._map, labels, dtype_extract)
        self._positions = self._extract_positions(size, positions)

        if self._DTYPE and self._labels.dtype != self._DTYPE:
            raise ErrorInitIndex(
                'invalid label dtype for this Index',  #pragma: no cover
                self._labels.dtype,
                self._DTYPE)