Exemplos de Index.set_names em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: pandas

Classe / Tipo: Index

Método / Função: set_names

Exemplos em hotexamples.com: 2

Index.set_names em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de pandas.Index.set_names em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Index(30)

get_loc(30)

intersection(27)

union(21)

name(14)

join(12)

get_level_values(11)

sort_values(11)

astype(10)

difference(10)

get_indexer(9)

slice_locs(9)

get_indexer_non_unique(8)

duplicated(8)

symmetric_difference(7)

append(7)

isin(6)

tolist(6)

identical(6)

equals(6)

tz_localize(6)

get_slice_bound(5)

map(5)

rename(4)

argmax(4)

copy(4)

argmin(4)

to_timestamp(3)

capitalize(3)

format(3)

fillna(3)

_with_infer(3)

to_series(3)

get_indexer_for(3)

set_levels(2)

delete(2)

to_frame(2)

insert(2)

remove_unused_levels(2)

reindex(2)

max(2)

min(2)

tz_convert(1)

take(1)

take_nd(1)

to_csv(1)

to_numpy(1)

to_datetime(1)

_summary(1)

set_names(1)

Métodos Frequentes

Index (30)

get_loc (30)

intersection (27)

union (21)

name (14)

join (12)

get_level_values (11)

sort_values (11)

astype (10)

difference (10)

Métodos Frequentes

get_indexer (9)

slice_locs (9)

get_indexer_non_unique (8)

duplicated (8)

symmetric_difference (7)

append (7)

isin (6)

tolist (6)

identical (6)

equals (6)

tz_localize (6)

get_slice_bound (5)

map (5)

rename (4)

argmax (4)

copy (4)

argmin (4)

to_timestamp (3)

capitalize (3)

format (3)

Métodos Frequentes

tz_localize (6)

get_slice_bound (5)

map (5)

rename (4)

argmax (4)

copy (4)

argmin (4)

to_timestamp (3)

capitalize (3)

format (3)

fillna (3)

_with_infer (3)

to_series (3)

get_indexer_for (3)

set_levels (2)

delete (2)

to_frame (2)

insert (2)

remove_unused_levels (2)

reindex (2)

max (2)

min (2)

tz_convert (1)

take (1)

take_nd (1)

to_csv (1)

to_numpy (1)

to_datetime (1)

_summary (1)

set_names (1)

Métodos Frequentes

fillna (3)

_with_infer (3)

to_series (3)

get_indexer_for (3)

set_levels (2)

delete (2)

to_frame (2)

insert (2)

remove_unused_levels (2)

reindex (2)

max (2)

min (2)

tz_convert (1)

take (1)

take_nd (1)

to_csv (1)

to_numpy (1)

to_datetime (1)

_summary (1)

set_names (1)

get_locs (1)

contains (1)

drop (1)

drop_duplicates (1)

factorize (1)

_simple_new (1)

get_loc_level (1)

index (1)

repeat (1)

_set_names (1)

_union (1)

isna (1)

isnull (1)

notna (1)

remove_unused_categories (1)

unique (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: grouputils.py Projeto: cisco00/Sentimental-Analysis-on-threat

class Grouping(object): def __init__(self, index, names=None): """ index : index-like Can be pandas MultiIndex or Index or array-like. If array-like and is a MultipleIndex (more than one grouping variable), groups are expected to be in each row. E.g., [('red', 1), ('red', 2), ('green', 1), ('green', 2)] names : list or str, optional The names to use for the groups. Should be a str if only one grouping variable is used. Notes ----- If index is already a pandas Index then there is no copy. """ if isinstance(index, (Index, MultiIndex)): if names is not None: if hasattr(index, 'set_names'): # newer pandas index.set_names(names, inplace=True) else: index.names = names self.index = index else: # array_like if _is_hierarchical(index): self.index = _make_hierarchical_index(index, names) else: self.index = Index(index, name=names) if names is None: names = _make_generic_names(self.index) if hasattr(self.index, 'set_names'): self.index.set_names(names, inplace=True) else: self.index.names = names self.nobs = len(self.index) self.nlevels = len(self.index.names) self.slices = None @property def index_shape(self): if hasattr(self.index, 'levshape'): return self.index.levshape else: return self.index.shape @property def levels(self): if hasattr(self.index, 'levels'): return self.index.levels else: return pd.Categorical(self.index).levels @property def labels(self): # this was index_int, but that's not a very good name... codes = getattr(self.index, 'codes', None) if codes is None: if hasattr(self.index, 'labels'): codes = self.index.labels else: codes = pd.Categorical(self.index).codes[None] return codes @property def group_names(self): return self.index.names def reindex(self, index=None, names=None): """ Resets the index in-place. """ # NOTE: this is not of much use if the rest of the data does not change # This needs to reset cache if names is None: names = self.group_names self = Grouping(index, names) def get_slices(self, level=0): """ Sets the slices attribute to be a list of indices of the sorted groups for the first index level. I.e., self.slices[0] is the index where each observation is in the first (sorted) group. """ # TODO: refactor this groups = self.index.get_level_values(level).unique() groups = np.array(groups) groups.sort() if isinstance(self.index, MultiIndex): self.slices = [ self.index.get_loc_level(x, level=level)[0] for x in groups ] else: self.slices = [self.index.get_loc(x) for x in groups] def count_categories(self, level=0): """ Sets the attribute counts to equal the bincount of the (integer-valued) labels. """ # TODO: refactor this not to set an attribute. Why would we do this? self.counts = np.bincount(self.labels[level]) def check_index(self, is_sorted=True, unique=True, index=None): """Sanity checks""" if not index: index = self.index if is_sorted: test = pd.DataFrame(lrange(len(index)), index=index) test_sorted = test.sort() if not test.index.equals(test_sorted.index): raise Exception('Data is not be sorted') if unique: if len(index) != len(index.unique()): raise Exception('Duplicate index entries') def sort(self, data, index=None): """Applies a (potentially hierarchical) sort operation on a numpy array or pandas series/dataframe based on the grouping index or a user-supplied index. Returns an object of the same type as the original data as well as the matching (sorted) Pandas index. """ if index is None: index = self.index if data_util._is_using_ndarray_type(data, None): if data.ndim == 1: out = pd.Series(data, index=index, copy=True) out = out.sort_index() else: out = pd.DataFrame(data, index=index) out = out.sort_index(inplace=False) # copies return np.array(out), out.index elif data_util._is_using_pandas(data, None): out = data out = out.reindex(index) # copies? out = out.sort_index() return out, out.index else: msg = 'data must be a Numpy array or a Pandas Series/DataFrame' raise ValueError(msg) def transform_dataframe(self, dataframe, function, level=0, **kwargs): """Apply function to each column, by group Assumes that the dataframe already has a proper index""" if dataframe.shape[0] != self.nobs: raise Exception('dataframe does not have the same shape as index') out = dataframe.groupby(level=level).apply(function, **kwargs) if 1 in out.shape: return np.ravel(out) else: return np.array(out) def transform_array(self, array, function, level=0, **kwargs): """Apply function to each column, by group """ if array.shape[0] != self.nobs: raise Exception('array does not have the same shape as index') dataframe = pd.DataFrame(array, index=self.index) return self.transform_dataframe(dataframe, function, level=level, **kwargs) def transform_slices(self, array, function, level=0, **kwargs): """Apply function to each group. Similar to transform_array but does not coerce array to a DataFrame and back and only works on a 1D or 2D numpy array. function is called function(group, group_idx, **kwargs). """ array = np.asarray(array) if array.shape[0] != self.nobs: raise Exception('array does not have the same shape as index') # always reset because level is given. need to refactor this. self.get_slices(level=level) processed = [] for s in self.slices: if array.ndim == 2: subset = array[s, :] elif array.ndim == 1: subset = array[s] processed.append(function(subset, s, **kwargs)) processed = np.array(processed) return processed.reshape(-1, processed.shape[-1]) # TODO: this is not general needs to be a PanelGrouping object def dummies_time(self): self.dummy_sparse(level=1) return self._dummies def dummies_groups(self, level=0): self.dummy_sparse(level=level) return self._dummies def dummy_sparse(self, level=0): """create a sparse indicator from a group array with integer labels Parameters ---------- groups : ndarray, int, 1d (nobs,) An array of group indicators for each observation. Group levels are assumed to be defined as consecutive integers, i.e. range(n_groups) where n_groups is the number of group levels. A group level with no observations for it will still produce a column of zeros. Returns ------- indi : ndarray, int8, 2d (nobs, n_groups) an indicator array with one row per observation, that has 1 in the column of the group level for that observation Examples -------- >>> g = np.array([0, 0, 2, 1, 1, 2, 0]) >>> indi = dummy_sparse(g) >>> indi <7x3 sparse matrix of type '<type 'numpy.int8'>' with 7 stored elements in Compressed Sparse Row format> >>> indi.todense() matrix([[1, 0, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]], dtype=int8) current behavior with missing groups >>> g = np.array([0, 0, 2, 0, 2, 0]) >>> indi = dummy_sparse(g) >>> indi.todense() matrix([[1, 0, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0]], dtype=int8) """ indi = dummy_sparse(self.labels[level]) self._dummies = indi

Exemplo n.º 2

0

Exibir arquivo

Arquivo: grouputils.py Projeto: cong1989/statsmodels

class Grouping(object): def __init__(self, index, names=None): """ index : index-like Can be pandas MultiIndex or Index or array-like. If array-like and is a MultipleIndex (more than one grouping variable), groups are expected to be in each row. E.g., [('red', 1), ('red', 2), ('green', 1), ('green', 2)] names : list or str, optional The names to use for the groups. Should be a str if only one grouping variable is used. Notes ----- If index is already a pandas Index then there is no copy. """ if isinstance(index, (Index, MultiIndex)): if names is not None: if hasattr(index, 'set_names'): # newer pandas index.set_names(names, inplace=True) else: index.names = names self.index = index else: # array-like if _is_hierarchical(index): self.index = _make_hierarchical_index(index, names) else: self.index = Index(index, name=names) if names is None: names = _make_generic_names(self.index) if hasattr(self.index, 'set_names'): self.index.set_names(names, inplace=True) else: self.index.names = names self.nobs = len(self.index) self.nlevels = len(self.index.names) self.slices = None @property def index_shape(self): if hasattr(self.index, 'levshape'): return self.index.levshape else: return self.index.shape @property def levels(self): if hasattr(self.index, 'levels'): return self.index.levels else: return pd.Categorical(self.index).levels @property def labels(self): # this was index_int, but that's not a very good name... if hasattr(self.index, 'labels'): return self.index.labels else: # pandas version issue here # Compat code for the labels -> codes change in pandas 0.15 # FIXME: use .codes directly when we don't want to support # pandas < 0.15 tmp = pd.Categorical(self.index) try: labl = tmp.codes except AttributeError: labl = tmp.labels # Old pandsd return labl[None] @property def group_names(self): return self.index.names def reindex(self, index=None, names=None): """ Resets the index in-place. """ # NOTE: this isn't of much use if the rest of the data doesn't change # This needs to reset cache if names is None: names = self.group_names self = Grouping(index, names) def get_slices(self, level=0): """ Sets the slices attribute to be a list of indices of the sorted groups for the first index level. I.e., self.slices[0] is the index where each observation is in the first (sorted) group. """ # TODO: refactor this groups = self.index.get_level_values(level).unique() groups = np.array(groups) groups.sort() if isinstance(self.index, MultiIndex): self.slices = [self.index.get_loc_level(x, level=level)[0] for x in groups] else: self.slices = [self.index.get_loc(x) for x in groups] def count_categories(self, level=0): """ Sets the attribute counts to equal the bincount of the (integer-valued) labels. """ # TODO: refactor this not to set an attribute. Why would we do this? self.counts = np.bincount(self.labels[level]) def check_index(self, is_sorted=True, unique=True, index=None): """Sanity checks""" if not index: index = self.index if is_sorted: test = pd.DataFrame(lrange(len(index)), index=index) test_sorted = test.sort() if not test.index.equals(test_sorted.index): raise Exception('Data is not be sorted') if unique: if len(index) != len(index.unique()): raise Exception('Duplicate index entries') def sort(self, data, index=None): """Applies a (potentially hierarchical) sort operation on a numpy array or pandas series/dataframe based on the grouping index or a user-supplied index. Returns an object of the same type as the original data as well as the matching (sorted) Pandas index. """ if index is None: index = self.index if data_util._is_using_ndarray_type(data, None): if data.ndim == 1: out = pd.Series(data, index=index, copy=True) out = out.sort_index() else: out = pd.DataFrame(data, index=index) out = out.sort_index(inplace=False) # copies return np.array(out), out.index elif data_util._is_using_pandas(data, None): out = data out = out.reindex(index) # copies? out = out.sort_index() return out, out.index else: msg = 'data must be a Numpy array or a Pandas Series/DataFrame' raise ValueError(msg) def transform_dataframe(self, dataframe, function, level=0, **kwargs): """Apply function to each column, by group Assumes that the dataframe already has a proper index""" if dataframe.shape[0] != self.nobs: raise Exception('dataframe does not have the same shape as index') out = dataframe.groupby(level=level).apply(function, **kwargs) if 1 in out.shape: return np.ravel(out) else: return np.array(out) def transform_array(self, array, function, level=0, **kwargs): """Apply function to each column, by group """ if array.shape[0] != self.nobs: raise Exception('array does not have the same shape as index') dataframe = pd.DataFrame(array, index=self.index) return self.transform_dataframe(dataframe, function, level=level, **kwargs) def transform_slices(self, array, function, level=0, **kwargs): """Apply function to each group. Similar to transform_array but does not coerce array to a DataFrame and back and only works on a 1D or 2D numpy array. function is called function(group, group_idx, **kwargs). """ array = np.asarray(array) if array.shape[0] != self.nobs: raise Exception('array does not have the same shape as index') # always reset because level is given. need to refactor this. self.get_slices(level=level) processed = [] for s in self.slices: if array.ndim == 2: subset = array[s, :] elif array.ndim == 1: subset = array[s] processed.append(function(subset, s, **kwargs)) processed = np.array(processed) return processed.reshape(-1, processed.shape[-1]) # TODO: this isn't general needs to be a PanelGrouping object def dummies_time(self): self.dummy_sparse(level=1) return self._dummies def dummies_groups(self, level=0): self.dummy_sparse(level=level) return self._dummies def dummy_sparse(self, level=0): """create a sparse indicator from a group array with integer labels Parameters ---------- groups: ndarray, int, 1d (nobs,) an array of group indicators for each observation. Group levels are assumed to be defined as consecutive integers, i.e. range(n_groups) where n_groups is the number of group levels. A group level with no observations for it will still produce a column of zeros. Returns ------- indi : ndarray, int8, 2d (nobs, n_groups) an indicator array with one row per observation, that has 1 in the column of the group level for that observation Examples -------- >>> g = np.array([0, 0, 2, 1, 1, 2, 0]) >>> indi = dummy_sparse(g) >>> indi <7x3 sparse matrix of type '<type 'numpy.int8'>' with 7 stored elements in Compressed Sparse Row format> >>> indi.todense() matrix([[1, 0, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]], dtype=int8) current behavior with missing groups >>> g = np.array([0, 0, 2, 0, 2, 0]) >>> indi = dummy_sparse(g) >>> indi.todense() matrix([[1, 0, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0]], dtype=int8) """ from scipy import sparse groups = self.labels[level] indptr = np.arange(len(groups)+1) data = np.ones(len(groups), dtype=np.int8) self._dummies = sparse.csr_matrix((data, groups, indptr))