Exemplos de Index.duplicated em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: pandas

Classe / Tipo: Index

Método / Função: duplicated

Exemplos em hotexamples.com: 8

Index.duplicated em Python - 8 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de pandas.Index.duplicated em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Index(30)

get_loc(30)

intersection(27)

union(21)

name(14)

join(12)

get_level_values(11)

sort_values(11)

astype(10)

difference(10)

get_indexer(9)

slice_locs(9)

get_indexer_non_unique(8)

duplicated(8)

symmetric_difference(7)

append(7)

isin(6)

tolist(6)

identical(6)

equals(6)

tz_localize(6)

get_slice_bound(5)

map(5)

rename(4)

argmax(4)

copy(4)

argmin(4)

to_timestamp(3)

capitalize(3)

format(3)

fillna(3)

_with_infer(3)

to_series(3)

get_indexer_for(3)

set_levels(2)

delete(2)

to_frame(2)

insert(2)

remove_unused_levels(2)

reindex(2)

max(2)

min(2)

tz_convert(1)

take(1)

take_nd(1)

to_csv(1)

to_numpy(1)

to_datetime(1)

_summary(1)

set_names(1)

Métodos Frequentes

Index (30)

get_loc (30)

intersection (27)

union (21)

name (14)

join (12)

get_level_values (11)

sort_values (11)

astype (10)

difference (10)

Métodos Frequentes

get_indexer (9)

slice_locs (9)

get_indexer_non_unique (8)

duplicated (8)

symmetric_difference (7)

append (7)

isin (6)

tolist (6)

identical (6)

equals (6)

tz_localize (6)

get_slice_bound (5)

map (5)

rename (4)

argmax (4)

copy (4)

argmin (4)

to_timestamp (3)

capitalize (3)

format (3)

Métodos Frequentes

tz_localize (6)

get_slice_bound (5)

map (5)

rename (4)

argmax (4)

copy (4)

argmin (4)

to_timestamp (3)

capitalize (3)

format (3)

fillna (3)

_with_infer (3)

to_series (3)

get_indexer_for (3)

set_levels (2)

delete (2)

to_frame (2)

insert (2)

remove_unused_levels (2)

reindex (2)

max (2)

min (2)

tz_convert (1)

take (1)

take_nd (1)

to_csv (1)

to_numpy (1)

to_datetime (1)

_summary (1)

set_names (1)

Métodos Frequentes

fillna (3)

_with_infer (3)

to_series (3)

get_indexer_for (3)

set_levels (2)

delete (2)

to_frame (2)

insert (2)

remove_unused_levels (2)

reindex (2)

max (2)

min (2)

tz_convert (1)

take (1)

take_nd (1)

to_csv (1)

to_numpy (1)

to_datetime (1)

_summary (1)

set_names (1)

get_locs (1)

contains (1)

drop (1)

drop_duplicates (1)

factorize (1)

_simple_new (1)

get_loc_level (1)

index (1)

repeat (1)

_set_names (1)

_union (1)

isna (1)

isnull (1)

notna (1)

remove_unused_categories (1)

unique (1)

Exemplo n.º 1

0

Exibir arquivo

def make_index_unique(index: pd.Index, join: str = '-'): """Makes the index unique by appending '1', '2', etc. The first occurance of a non-unique value is ignored. Parameters ---------- join The connecting string between name and integer. Examples -------- >>> adata1 = sc.AnnData(np.ones((3, 2)), {'obs_names': ['a', 'b', 'c']}) >>> adata2 = sc.AnnData(np.zeros((3, 2)), {'obs_names': ['d', 'b', 'b']}) >>> adata = adata1.concatenate(adata2) >>> adata.obs_names Index(['a', 'b', 'c', 'd', 'b', 'b'], dtype='object') >>> adata.obs_names_make_unique() >>> adata.obs_names Index(['a', 'b', 'c', 'd', 'b-1', 'b-2'], dtype='object') """ if index.is_unique: return index from collections import defaultdict values = index.values indices_dup = index.duplicated(keep='first') values_dup = values[indices_dup] counter = defaultdict(lambda: 0) for i, v in enumerate(values_dup): counter[v] += 1 values_dup[i] += join + str(counter[v]) values[indices_dup] = values_dup index = pd.Index(values) return index

Exemplo n.º 2

0

Exibir arquivo

Arquivo: utils.py Projeto: rcannood/anndata-py

def make_index_unique(index: pd.Index, join: str = "-"): """ Makes the index unique by appending a number string to each duplicate index element: '1', '2', etc. If a tentative name created by the algorithm already exists in the index, it tries the next integer in the sequence. The first occurrence of a non-unique value is ignored. Parameters ---------- join The connecting string between name and integer. Examples -------- >>> from anndata import AnnData >>> adata = AnnData(np.ones((2, 3)), var=pd.DataFrame(index=["a", "a", "b"])) >>> adata.var_names Index(['a', 'a', 'b'], dtype='object') >>> adata.var_names_make_unique() >>> adata.var_names Index(['a', 'a-1', 'b'], dtype='object') """ if index.is_unique: return index from collections import Counter values = index.values.copy() indices_dup = index.duplicated(keep="first") values_dup = values[indices_dup] values_set = set(values) counter = Counter() issue_interpretation_warning = False example_colliding_values = [] for i, v in enumerate(values_dup): while True: counter[v] += 1 tentative_new_name = v + join + str(counter[v]) if tentative_new_name not in values_set: values_set.add(tentative_new_name) values_dup[i] = tentative_new_name break issue_interpretation_warning = True if len(example_colliding_values) < 5: example_colliding_values.append(tentative_new_name) if issue_interpretation_warning: warnings.warn( f"Suffix used ({join}[0-9]+) to deduplicate index values may make index " + "values difficult to interpret. There values with a similar suffixes in " + "the index. Consider using a different delimiter by passing " + "`join={delimiter}`" + "Example key collisions generated by the make_index_unique algorithm: " + str(example_colliding_values)) values[indices_dup] = values_dup index = pd.Index(values, name=index.name) return index

Exemplo n.º 3

0

Exibir arquivo

def assert_index(obj: pd.Index): r"""Check if index is conform to audformat.""" if isinstance(obj, pd.MultiIndex) and len(obj.levels) == 2: if obj.has_duplicates: max_display = 10 duplicates = obj[obj.duplicated()] msg_tail = '\n...' if len(duplicates) > max_display else '' msg_duplicates = '\n'.join([ str(duplicate) for duplicate in duplicates[:max_display].tolist() ]) raise ValueError('Found duplicates:\n' f'{msg_duplicates}{msg_tail}') if not (obj.names[0] == audformat.define.IndexField.START and obj.names[1] == audformat.define.IndexField.END): expected_names = [ audformat.define.IndexField.START, audformat.define.IndexField.END, ] raise ValueError('Found two levels with names ' f'{obj.names}, ' f'but expected names ' f'{expected_names}.') if not pd.api.types.is_timedelta64_dtype(obj.levels[0].dtype): raise ValueError( "Level 'start' must contain values of type 'timedelta64[ns]'.") if not pd.api.types.is_timedelta64_dtype(obj.levels[1].dtype): raise ValueError( "Level 'end' must contain values of type 'timedelta64[ns]'.") else: audformat.assert_index(obj)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: concat.py Projeto: yashukla/pandas

def _maybe_check_integrity(self, concat_index: Index): if self.verify_integrity: if not concat_index.is_unique: overlap = concat_index[concat_index.duplicated()].unique() raise ValueError( "Indexes have overlapping values: " "{overlap!s}".format(overlap=overlap) )

Exemplo n.º 5

0

Exibir arquivo

Arquivo: prepare.py Projeto: sinnamone/cellxgene

def make_index_unique(index: pd.Index, join: str = "-"): """ Makes the index unique by appending a number string to each duplicate index element: '1', '2', etc. If a tentative name created by the algorithm already exists in the index, it tries the next integer in the sequence. The first occurrence of a non-unique value is ignored. Parameters ---------- join The connecting string between name and integer. Examples -------- >>> from anndata import AnnData >>> adata1 = AnnData(np.ones((3, 2)), dict(obs_names=['a', 'b', 'c'])) >>> adata2 = AnnData(np.zeros((3, 2)), dict(obs_names=['d', 'b', 'b'])) >>> adata = adata1.concatenate(adata2) >>> adata.obs_names Index(['a', 'b', 'c', 'd', 'b', 'b'], dtype='object') >>> adata.obs_names_make_unique() >>> adata.obs_names Index(['a', 'b', 'c', 'd', 'b-1', 'b-2'], dtype='object') """ if index.is_unique: return index from collections import defaultdict values = index.values values_set = set(values) indices_dup = index.duplicated(keep="first") values_dup = values[indices_dup] counter = defaultdict(lambda: 0) for i, v in enumerate(values_dup): while True: counter[v] += 1 tentative_new_name = v + join + str(counter[v]) if tentative_new_name not in values_set: values_set.add(tentative_new_name) values_dup[i] = tentative_new_name break values[indices_dup] = values_dup index = pd.Index(values) return index

Exemplo n.º 6

0

Exibir arquivo

def check_no_dupes(idx: pd.Index, name: str) -> bool: dupes = idx.duplicated().any() if dupes: warn(f"Duplicated {name}: {idx[idx.duplicated(False)].sort_values()}") return not dupes

Exemplo n.º 7

0

Exibir arquivo

Arquivo: pandas_utils.py Projeto: jhubscher/pandas_commons

def index_has_duplicates(index: Index) -> bool: """Indicates whether a DataFrame's Index contains any duplicates.""" return index.duplicated().any()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_pandas_utils.py Projeto: jhubscher/pandas_commons

def test_index_has_no_duplicates(self, index: Index): self.assertEqual(first=index.duplicated().any(), second=False, msg='The index contains duplicate values.')