def get_compressed_ids(labels, sizes): # no overflow if com._long_prod(sizes) < 2**63: group_index = get_group_index(labels, sizes) comp_index, obs_ids = _compress_group_index(group_index) else: n = len(labels[0]) mask = np.zeros(n, dtype=bool) for v in labels: mask |= v < 0 while com._long_prod(sizes) >= 2**63: i = len(sizes) while com._long_prod(sizes[:i]) >= 2**63: i -= 1 rem_index, rem_ids = get_compressed_ids(labels[:i], sizes[:i]) sizes = [len(rem_ids)] + sizes[i:] labels = [rem_index] + labels[i:] return get_compressed_ids(labels, sizes) return comp_index, obs_ids
def get_compressed_ids(labels, sizes): # no overflow if com._long_prod(sizes) < 2 ** 63: group_index = get_group_index(labels, sizes) comp_index, obs_ids = _compress_group_index(group_index) else: n = len(labels[0]) mask = np.zeros(n, dtype=bool) for v in labels: mask |= v < 0 while com._long_prod(sizes) >= 2 ** 63: i = len(sizes) while com._long_prod(sizes[:i]) >= 2 ** 63: i -= 1 rem_index, rem_ids = get_compressed_ids(labels[:i], sizes[:i]) sizes = [len(rem_ids)] + sizes[i:] labels = [rem_index] + labels[i:] return get_compressed_ids(labels, sizes) return comp_index, obs_ids