def test_intersect2d_a(self): a = np.array([('a', 'b'), ('c', 'd'), ('e', 'f')]) b = np.array([('a', 'g'), ('c', 'd'), ('e', 'f')]) post = intersect2d(a, b) self.assertEqual(post.tolist(), [['c', 'd'], ['e', 'f']]) post = intersect2d(a.astype(object), b.astype(object)) self.assertEqual(post.tolist(), [['c', 'd'], ['e', 'f']]) post = union2d(a, b) self.assertEqual(post.tolist(), [['a', 'b'], ['a', 'g'], ['c', 'd'], ['e', 'f']]) post = union2d(a.astype(object), b.astype(object)) self.assertEqual(post.tolist(), [['a', 'b'], ['a', 'g'], ['c', 'd'], ['e', 'f']])
def test_intersect2d(self, arrays: tp.Sequence[np.ndarray]) -> None: post = util.intersect2d(arrays[0], arrays[1], assume_unique=False) self.assertTrue(post.ndim == 2) self.assertTrue( len(post) == len( set(util.array2d_to_tuples(arrays[0])) & set(util.array2d_to_tuples(arrays[1]))))
def from_correspondence(cls, src_index: 'Index', dst_index: 'Index') -> 'IndexCorrespondence': ''' Return an IndexCorrespondence instance from the correspondence of two Index or IndexHierarchy objects. ''' mixed_depth = False if src_index.depth == dst_index.depth: depth = src_index.depth else: # if dimensions are mixed, the only way there can be a match is if the 1D index is of object type (so it can hold a tuple); otherwise, there can be no matches; if src_index.depth == 1 and src_index.values.dtype.kind == 'O': depth = dst_index.depth mixed_depth = True elif dst_index.depth == 1 and dst_index.values.dtype.kind == 'O': depth = src_index.depth mixed_depth = True else: depth = 0 # need to use lower level array methods go get intersection, rather than Index methods, as need arrays, not Index objects if depth == 1: # NOTE: this can fail in some cases: comparing two object arrays with NaNs and strings. common_labels = intersect1d(src_index.values, dst_index.values, assume_unique=True) has_common = len(common_labels) > 0 assert not mixed_depth elif depth > 1: # if either values arrays are object, we have to covert all values to tuples common_labels = intersect2d(src_index.values, dst_index.values, assume_unique=True) if mixed_depth: # when mixed, on the 1D index we have to use loc_to_iloc with tuples common_labels = list(array2d_to_tuples(common_labels)) has_common = len(common_labels) > 0 else: has_common = False size = len(dst_index.values) # either a reordering or a subset if has_common: if len(common_labels) == len(dst_index): # use new index to retain order values_dst = dst_index.values if values_dst.dtype == DTYPE_BOOL: # if the index values are a Boolean array, loc_to_iloc will try to do a Boolean selection, which is incorrect. Using a list avoids this problem. iloc_src = src_index.loc_to_iloc(values_dst.tolist()) else: iloc_src = src_index.loc_to_iloc(values_dst) iloc_dst = np.arange(size) return cls(has_common=has_common, is_subset=True, iloc_src=iloc_src, iloc_dst=iloc_dst, size=size) # these will be equal sized iloc_src = src_index.loc_to_iloc(common_labels) iloc_dst = dst_index.loc_to_iloc(common_labels) # if iloc_src.dtype != int: # import ipdb; ipdb.set_trace() return cls(has_common=has_common, is_subset=False, iloc_src=iloc_src, iloc_dst=iloc_dst, size=size) return cls(has_common=has_common, is_subset=False, iloc_src=None, iloc_dst=None, size=size)