def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, precision=3, name=None, include_lowest=False): if name is None and isinstance(x, Series): name = x.name x = np.asarray(x) side = 'left' if right else 'right' ids = bins.searchsorted(x, side=side) if len(algos.unique(bins)) < len(bins): raise Exception('Bin edges must be unique: %s' % repr(bins)) if include_lowest: ids[x == bins[0]] = 1 na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: increases = 0 while True: try: levels = _format_levels(bins, precision, right=right, include_lowest=include_lowest) except ValueError: increases += 1 precision += 1 if increases >= 20: raise else: break else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') levels = labels levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) fac = Categorical(ids - 1, levels, name=name) else: fac = ids - 1 if has_nas: fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) if not retbins: return fac return fac, bins
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, precision=3, name=None, include_lowest=False): x_is_series = isinstance(x, Series) series_index = None if x_is_series: series_index = x.index if name is None: name = x.name x = np.asarray(x) side = "left" if right else "right" ids = bins.searchsorted(x, side=side) if len(algos.unique(bins)) < len(bins): raise ValueError("Bin edges must be unique: %s" % repr(bins)) if include_lowest: ids[x == bins[0]] = 1 na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: increases = 0 while True: try: levels = _format_levels(bins, precision, right=right, include_lowest=include_lowest) except ValueError: increases += 1 precision += 1 if increases >= 20: raise else: break else: if len(labels) != len(bins) - 1: raise ValueError("Bin labels must be one fewer than " "the number of bin edges") levels = labels levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) fac = Categorical(ids - 1, levels, ordered=True, fastpath=True) else: fac = ids - 1 if has_nas: fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) if x_is_series: fac = Series(fac, index=series_index, name=name) if not retbins: return fac return fac, bins
def _bins_to_cuts(x, bins, right=True, labels=None, precision=3, include_lowest=False, dtype=None, duplicates='raise'): if duplicates not in ['raise', 'drop']: raise ValueError("invalid value for 'duplicates' parameter, " "valid options are: raise, drop") unique_bins = algos.unique(bins) if len(unique_bins) < len(bins): if duplicates == 'raise': raise ValueError("Bin edges must be unique: {}.\nYou " "can drop duplicate edges by setting " "the 'duplicates' kwarg".format(repr(bins))) else: bins = unique_bins side = 'left' if right else 'right' ids = bins.searchsorted(x, side=side) if include_lowest: ids[x == bins[0]] = 1 na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: increases = 0 while True: try: levels = _format_levels(bins, precision, right=right, include_lowest=include_lowest, dtype=dtype) except ValueError: increases += 1 precision += 1 if increases >= 20: raise else: break else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') levels = labels levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) fac = Categorical(ids - 1, levels, ordered=True, fastpath=True) else: fac = ids - 1 if has_nas: fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) return fac, bins
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, precision=3, name=None, include_lowest=False): if name is None and isinstance(x, Series): name = x.name x = np.asarray(x) side = 'left' if right else 'right' ids = bins.searchsorted(x, side=side) if include_lowest: ids[x == bins[0]] = 1 na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: fmt = lambda v: _format_label(v, precision=precision) if right: levels = [ '(%s, %s]' % (fmt(a), fmt(b)) for a, b in zip(bins, bins[1:]) ] if include_lowest: levels[0] = '[' + levels[0][1:] else: levels = [ '[%s, %s)' % (fmt(a), fmt(b)) for a, b in zip(bins, bins[1:]) ] else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') levels = labels levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) fac = Categorical(ids - 1, levels, name=name) else: fac = ids - 1 if has_nas: fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) if not retbins: return fac return fac, bins
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, precision=3, name=None, include_lowest=False): if name is None and isinstance(x, Series): name = x.name x = np.asarray(x) side = 'left' if right else 'right' ids = bins.searchsorted(x, side=side) if include_lowest: ids[x == bins[0]] = 1 na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: increases = 0 while True: try: levels = _format_levels(bins, precision, right=right, include_lowest=include_lowest) except ValueError: increases += 1 precision += 1 if increases >= 20: raise else: break else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') levels = labels levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) fac = Categorical(ids - 1, levels, name=name) else: fac = ids - 1 if has_nas: fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) if not retbins: return fac return fac, bins
def _bins_to_cuts(x, bins, right=True, labels=None, precision=3, include_lowest=False, dtype=None): side = 'left' if right else 'right' ids = bins.searchsorted(x, side=side) if len(algos.unique(bins)) < len(bins): raise ValueError('Bin edges must be unique: %s' % repr(bins)) if include_lowest: ids[x == bins[0]] = 1 na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: increases = 0 while True: try: levels = _format_levels(bins, precision, right=right, include_lowest=include_lowest, dtype=dtype) except ValueError: increases += 1 precision += 1 if increases >= 20: raise else: break else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') levels = labels levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) fac = Categorical(ids - 1, levels, ordered=True, fastpath=True) else: fac = ids - 1 if has_nas: fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) return fac, bins
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, precision=3, name=None, include_lowest=False): if name is None and isinstance(x, Series): name = x.name x = np.asarray(x) side = 'left' if right else 'right' ids = bins.searchsorted(x, side=side) if include_lowest: ids[x == bins[0]] = 1 na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: fmt = lambda v: _format_label(v, precision=precision) if right: levels = ['(%s, %s]' % (fmt(a), fmt(b)) for a, b in zip(bins, bins[1:])] if include_lowest: levels[0] = '[' + levels[0][1:] else: levels = ['[%s, %s)' % (fmt(a), fmt(b)) for a, b in zip(bins, bins[1:])] else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') levels = labels levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) fac = Categorical(ids - 1, levels, name=name) else: fac = ids - 1 if has_nas: fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) if not retbins: return fac return fac, bins
def _bins_to_cuts_new( x, bins, right=True, labels=None, retbins=False, precision=3, name=None, include_lowest=False, ): x_is_series = isinstance(x, Series) series_index = None # Added this line to the original code bins = np.array(sorted(list(set(bins)))) if x_is_series: series_index = x.index if name is None: name = x.name x = np.asarray(x) side = "left" if right else "right" ids = bins.searchsorted(x, side=side) if len(algos.unique(bins)) < len(bins): raise ValueError("Bin edges must be unique: %s" % repr(bins)) if include_lowest: ids[x == bins[0]] = 1 na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: if labels is None: increases = 0 while True: try: levels = _format_levels(bins, precision, right=right, include_lowest=include_lowest) except ValueError: increases += 1 precision += 1 if increases >= 20: raise else: break else: if len(labels) != len(bins) - 1: raise ValueError("Bin labels must be one fewer than " "the number of bin edges") levels = labels levels = np.asarray(levels, dtype=object) np.putmask(ids, na_mask, 0) fac = Categorical(ids - 1, levels, ordered=True, fastpath=True) else: fac = ids - 1 if has_nas: fac = fac.astype(np.float64) np.putmask(fac, na_mask, np.nan) if x_is_series: fac = Series(fac, index=series_index, name=name) if not retbins: return fac return fac, bins