def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right'): if com.is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter elif com.is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter elif com.is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter else: fmt_klass = GenericArrayFormatter if space is None: space = get_option("print.column_space") if float_format is None: float_format = get_option("print.float_format") if digits is None: digits = get_option("print.precision") fmt_obj = fmt_klass(values, digits, na_rep=na_rep, float_format=float_format, formatter=formatter, space=space, justify=justify) return fmt_obj.get_result()
def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, index_names=True, line_width=None, **kwds): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names if sparsify is None: sparsify = get_option("print.multi_sparse") self.sparsify = sparsify self.float_format = float_format self.formatters = formatters if formatters is not None else {} self.na_rep = na_rep self.col_space = col_space self.header = header self.index = index self.line_width = line_width if justify is None: self.justify = get_option("print.colheader_justify") else: self.justify = justify self.kwds = kwds if columns is not None: self.columns = _ensure_index(columns) self.frame = self.frame[self.columns] else: self.columns = frame.columns
def _format_strings(self): if self.float_format is None: float_format = get_option("print.float_format") if float_format is None: fmt_str = '%% .%dg' % get_option("print.precision") float_format = lambda x: fmt_str % x else: float_format = self.float_format formatter = com.pprint_thing if self.formatter is None else self.formatter def _format(x): if self.na_rep is not None and lib.checknull(x): if x is None: return 'None' return self.na_rep else: # object dtype return '%s' % formatter(x) vals = self.values is_float = lib.map_infer(vals, com.is_float) & notnull(vals) leading_space = is_float.any() fmt_values = [] for i, v in enumerate(vals): if not is_float[i] and leading_space: fmt_values.append(' %s' % _format(v)) elif is_float[i]: fmt_values.append(float_format(v)) else: fmt_values.append(' %s' % _format(v)) return fmt_values
def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): """ Try hard to parse datetime string, leveraging dateutil plus some extra goodies like quarter recognition. Parameters ---------- arg : compat.string_types freq : str or DateOffset, default None Helps with interpreting time string if supplied dayfirst : bool, default None If None uses default from print_config yearfirst : bool, default None If None uses default from print_config Returns ------- datetime, datetime/dateutil.parser._result, str """ from pandas.core.config import get_option if not isinstance(arg, compat.string_types): return arg from pandas.tseries.offsets import DateOffset if isinstance(freq, DateOffset): freq = freq.rule_code if dayfirst is None: dayfirst = get_option("display.date_dayfirst") if yearfirst is None: yearfirst = get_option("display.date_yearfirst") return tslib.parse_datetime_string_with_reso(arg, freq=freq, dayfirst=dayfirst, yearfirst=yearfirst)
def pprint_thing(thing, _nest_lvl=0, escape_chars=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. Returns ------- result - unicode object on py2, str on py3. Always Unicode. """ if thing is None: result = '' elif (py3compat.PY3 and hasattr(thing, '__next__')) or \ hasattr(thing, 'next'): return unicode(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl) elif _is_sequence(thing) and _nest_lvl < \ get_option("display.pprint_nest_depth"): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars) else: # when used internally in the package, everything # should be unicode text. However as an aid to transition # we also accept utf8 encoded strings, # if that's not it either, we have no way of knowing, # and the user should deal with it himself. # we resort to utf-8 with replacing errors, rather then throwing # an exception. try: result = unicode(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return unicode(result) # always unicode
def __unicode__(self): width, height = get_terminal_size() max_rows = height if get_option("display.max_rows") == 0 else get_option("display.max_rows") if len(self.labels) > (max_rows or 1000): result = self._tidy_repr(min(30, max_rows) - 4) elif len(self.labels) > 0: result = self._get_repr(length=len(self) > 50, name=True) else: result = "Categorical([], %s" % self._get_repr(name=True, length=False, footer=True) return result
def in_interactive_session(): """ check if we're running in an interactive shell returns True if running under python/ipython interactive shell """ import __main__ as main return not hasattr(main, '__file__') or get_option('mode.sim_interactive')
def test_to_string_repr_unicode(self): buf = StringIO() unicode_values = [u'\u03c3'] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode': unicode_values}) df.to_string(col_space=10, buf=buf) # it works! repr(df) idx = Index(['abc', u'\u03c3a', 'aegdvg']) ser = Series(np.random.randn(len(idx)), idx) rs = repr(ser).split('\n') line_len = len(rs[0]) for line in rs[1:]: try: line = line.decode(get_option("display.encoding")) except: pass if not line.startswith('Dtype:'): self.assert_(len(line) == line_len) # it works even if sys.stdin in None _stdin= sys.stdin try: sys.stdin = None repr(df) finally: sys.stdin = _stdin
def __init__(self, data, precision=None, table_styles=None, uuid=None, caption=None, table_attributes=None): self.ctx = defaultdict(list) self._todo = [] if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("``data`` must be a Series or DataFrame") if data.ndim == 1: data = data.to_frame() if not data.index.is_unique or not data.columns.is_unique: raise ValueError("style is not supported for non-unique indices.") self.data = data self.index = data.index self.columns = data.columns self.uuid = uuid self.table_styles = table_styles self.caption = caption if precision is None: precision = get_option('display.precision') self.precision = precision self.table_attributes = table_attributes self.hidden_index = False self.hidden_columns = [] # display_funcs maps (row, col) -> formatting function def default_display_func(x): if is_float(x): return '{:>.{precision}g}'.format(x, precision=self.precision) else: return x self._display_funcs = defaultdict(lambda: default_display_func)
def _make_fixed_width(strings, justify='right', minimum=None): if len(strings) == 0: return strings _strlen = _strlen_func() _encode_diff = _encode_diff_func() max_len = np.max([_strlen(x) for x in strings]) if minimum is not None: max_len = max(minimum, max_len) conf_max = get_option("print.max_colwidth") if conf_max is not None and max_len > conf_max: max_len = conf_max if justify == 'left': justfunc = lambda self, x: self.ljust(x) else: justfunc = lambda self, x: self.rjust(x) def just(x): try: eff_len = max_len + _encode_diff(x) except UnicodeError: eff_len = max_len if conf_max is not None: if (conf_max > 3) & (_strlen(x) > max_len): x = x[:eff_len - 3] + '...' return justfunc(x, eff_len) return [just(x) for x in strings]
def _get_level_lengths(index): """ Given an index, find the level lenght for each element. Result is a dictionary of (level, inital_position): span """ sentinel = com.sentinel_factory() levels = index.format(sparsify=sentinel, adjoin=False, names=False) if index.nlevels == 1: return {(0, i): 1 for i, value in enumerate(levels)} lengths = {} for i, lvl in enumerate(levels): for j, row in enumerate(lvl): if not get_option('display.multi_sparse'): lengths[(i, j)] = 1 elif row != sentinel: last_label = j lengths[(i, last_label)] = 1 else: lengths[(i, last_label)] += 1 return lengths
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. bounds length of printed sequence, depending on options """ if isinstance(seq, set): fmt = u("{{{body}}}") else: fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})") if max_seq_items is False: nitems = len(seq) else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) s = iter(seq) r = [] for i in range(min(nitems, len(seq))): # handle sets, no slicing r.append(pprint_thing( next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)) body = ", ".join(r) if nitems < len(seq): body += ", ..." elif isinstance(seq, tuple) and len(seq) == 1: body += ',' return fmt.format(body=body)
def mpl_style_cb(key): warnings.warn(pc_mpl_style_deprecation_warning, FutureWarning, stacklevel=5) import sys from pandas.tools.plotting import mpl_stylesheet global style_backup val = cf.get_option(key) if "matplotlib" not in sys.modules.keys(): if not val: # starting up, we get reset to None return val raise Exception("matplotlib has not been imported. aborting") import matplotlib.pyplot as plt if val == "default": style_backup = dict([(k, plt.rcParams[k]) for k in mpl_stylesheet]) plt.rcParams.update(mpl_stylesheet) elif not val: if style_backup: plt.rcParams.update(style_backup) return val
def _write_table(self, indent=0): _classes = ['dataframe'] # Default class. use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: _classes.append('tex2jax_ignore') if self.classes is not None: if isinstance(self.classes, str): self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): raise AssertionError('classes must be list or tuple, not {typ}' .format(typ=type(self.classes))) _classes.extend(self.classes) if self.table_id is None: id_section = "" else: id_section = ' id="{table_id}"'.format(table_id=self.table_id) self.write('<table border="{border}" class="{cls}"{id_section}>' .format(border=self.border, cls=' '.join(_classes), id_section=id_section), indent) if self.fmt.header or self.show_row_idx_names: self._write_header(indent + self.indent_delta) self._write_body(indent + self.indent_delta) self.write('</table>', indent)
def _use_inf_as_na(key): """Option change callback for na/inf behaviour Choose which replacement for numpy.isnan / -numpy.isfinite is used. Parameters ---------- flag: bool True means treat None, NaN, INF, -INF as null (old way), False means None and NaN are null, but INF, -INF are not null (new way). Notes ----- This approach to setting global module values is discussed and approved here: * http://stackoverflow.com/questions/4859217/ programmatically-creating-variables-in-python/4859312#4859312 """ from pandas.core.config import get_option flag = get_option(key) if flag: globals()['_isna'] = _isna_old else: globals()['_isna'] = _isna_new
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ fmt = u("{{{things}}}") pairs = [] pfmt = u("{key}: {val}") if max_seq_items is False: nitems = len(seq) else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) for k, v in list(seq.items())[:nitems]: pairs.append( pfmt.format( key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))) if nitems < len(seq): return fmt.format(things=", ".join(pairs) + ", ...") else: return fmt.format(things=", ".join(pairs))
def _format_data(self, name=None): # TODO: integrate with categorical and make generic # name argument is unused here; just for compat with base / categorical n = len(self) max_seq_items = min((get_option( 'display.max_seq_items') or n) // 10, 10) formatter = str if n == 0: summary = '[]' elif n == 1: first = formatter(self[0]) summary = '[{}]'.format(first) elif n == 2: first = formatter(self[0]) last = formatter(self[-1]) summary = '[{}, {}]'.format(first, last) else: if n > max_seq_items: n = min(max_seq_items // 2, 10) head = [formatter(x) for x in self[:n]] tail = [formatter(x) for x in self[-n:]] summary = '[{} ... {}]'.format(', '.join(head), ', '.join(tail)) else: head = [] tail = [formatter(x) for x in self] summary = '[{}]'.format(', '.join(tail)) return summary + self._format_space()
def register_converter_cb(key): from pandas.plotting import register_matplotlib_converters from pandas.plotting import deregister_matplotlib_converters if cf.get_option(key): register_matplotlib_converters() else: deregister_matplotlib_converters()
def console_encode(object): """ this is the sanctioned way to prepare something for sending *to the console*, it delegates to pprint_thing() to get a unicode representation of the object relies on the global encoding set in print.encoding. Use this everywhere where you output to the console. """ return pprint_thing_encoded(object, get_option("print.encoding"))
def _encode_diff_func(): if py3compat.PY3: # pragma: no cover _encode_diff = lambda x: 0 else: encoding = get_option("print.encoding") def _encode_diff(x): return len(x) - len(x.decode(encoding)) return _encode_diff
def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ max_categories = (10 if get_option("display.max_categories") == 0 else get_option("display.max_categories")) attrs = [ ('categories', ibase.default_pprint(self.categories, max_seq_items=max_categories)), ('ordered', self.ordered)] if self.name is not None: attrs.append(('name', ibase.default_pprint(self.name))) attrs.append(('dtype', "'%s'" % self.dtype.name)) max_seq_items = get_option('display.max_seq_items') or len(self) if len(self) > max_seq_items: attrs.append(('length', len(self))) return attrs
def __unicode__(self): """ Unicode representation. """ width, height = get_terminal_size() max_rows = (height if get_option("display.max_rows") == 0 else get_option("display.max_rows")) if len(self._codes) > (max_rows or 1000): result = self._tidy_repr(min(30, max_rows) - 4) elif len(self._codes) > 0: result = self._get_repr(length=len(self) > 50, name=True) else: result = 'Categorical([], %s' % self._get_repr(name=True, length=False, footer=True, ).replace("\n",", ") return result
def __bytes__(self): """ Return a string representation for a particular object. Invoked by bytes(obj) in py3 only. Yields a bytestring in both py2/py3. """ from pandas.core.config import get_option encoding = get_option("display.encoding") return self.__unicode__().encode(encoding, 'replace')
def _val(x, threshold): if notnull(x): if threshold is None or abs(x) > get_option("display.chop_threshold"): return fmt_str % x else: if fmt_str.endswith("e"): # engineering format return "0" else: return fmt_str % 0 else: return self.na_rep
def __init__(self, series, buf=None, header=True, length=True, na_rep='NaN', name=False, float_format=None): self.series = series self.buf = buf if buf is not None else StringIO(u"") self.name = name self.na_rep = na_rep self.length = length self.header = header if float_format is None: float_format = get_option("print.float_format") self.float_format = float_format
def _strlen_func(): if py3compat.PY3: # pragma: no cover _strlen = len else: encoding = get_option("print.encoding") def _strlen(x): try: return len(x.decode(encoding)) except UnicodeError: return len(x) return _strlen
def write_result(self, buf): indent = 0 id_section = "" frame = self.frame _classes = ['dataframe'] # Default class. use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: _classes.append('tex2jax_ignore') if self.classes is not None: if isinstance(self.classes, str): self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): raise AssertionError('classes must be list or tuple, not {typ}' .format(typ=type(self.classes))) _classes.extend(self.classes) if self.notebook: div_style = '' try: import IPython if IPython.__version__ < LooseVersion('3.0.0'): div_style = ' style="max-width:1500px;overflow:auto;"' except (ImportError, AttributeError): pass self.write('<div{style}>'.format(style=div_style)) self.write_style() if self.table_id is not None: id_section = ' id="{table_id}"'.format(table_id=self.table_id) self.write('<table border="{border}" class="{cls}"{id_section}>' .format(border=self.border, cls=' '.join(_classes), id_section=id_section), indent) indent += self.indent_delta indent = self._write_header(indent) indent = self._write_body(indent) self.write('</table>', indent) if self.should_show_dimensions: by = chr(215) if compat.PY3 else unichr(215) # × self.write(u('<p>{rows} rows {by} {cols} columns</p>') .format(rows=len(frame), by=by, cols=len(frame.columns))) if self.notebook: self.write('</div>') buffer_put_lines(buf, self.elements)
def test_repr_binary_type(): import string letters = string.ascii_letters btype = compat.binary_type try: raw = btype(letters, encoding=cf.get_option('display.encoding')) except TypeError: raw = btype(letters) b = compat.text_type(compat.bytes_to_str(raw)) res = printing.pprint_thing(b, quote_strings=True) assert res == repr(b) res = printing.pprint_thing(b, quote_strings=False) assert res == b
def __new__(cls, path, engine=None, **kwargs): # only switch class if generic(ExcelWriter) if cls == ExcelWriter: if engine is None: ext = os.path.splitext(path)[-1][1:] try: engine = config.get_option('io.excel.%s.writer' % ext) except KeyError: error = ValueError("No engine for filetype: '%s'" % ext) raise error cls = get_writer(engine) return object.__new__(cls)
def test_repr_binary_type(): import string letters = string.ascii_letters btype = compat.binary_type try: raw = btype(letters, encoding=cf.get_option('display.encoding')) except TypeError: raw = btype(letters) b = compat.text_type(compat.bytes_to_str(raw)) res = com.pprint_thing(b, quote_strings=True) assert_equal(res, repr(b)) res = com.pprint_thing(b, quote_strings=False) assert_equal(res, b)
def _strlen_func(): if py3compat.PY3: # pragma: no cover _strlen = len else: encoding = get_option("display.encoding") def _strlen(x): try: return len(x.decode(encoding)) except UnicodeError: return len(x) return _strlen
def table_schema_cb(key): # Having _ipython_display_ defined messes with the return value # from cells, so the Out[x] dictionary breaks. # Currently table schema is the only thing using it, so we'll # monkey patch `_ipython_display_` onto NDFrame when config option # is set # see https://github.com/pandas-dev/pandas/issues/16168 from pandas.core.generic import NDFrame, _ipython_display_ if cf.get_option(key): NDFrame._ipython_display_ = _ipython_display_ elif getattr(NDFrame, '_ipython_display_', None): del NDFrame._ipython_display_
def test_to_string_float_formatting(self): fmt.reset_printoptions() fmt.set_printoptions(precision=6, column_space=12, notebook_repr_html=False) df = DataFrame({ 'x': [ 0, 0.25, 3456.000, 12e+45, 1.64e+6, 1.7e+8, 1.253456, np.pi, -1e6 ] }) df_s = df.to_string() # Python 2.5 just wants me to be sad. And debian 32-bit # sys.version_info[0] == 2 and sys.version_info[1] < 6: if _three_digit_exp(): expected = (' x\n0 0.00000e+000\n1 2.50000e-001\n' '2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n' '5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n' '8 -1.00000e+006') else: expected = (' x\n0 0.00000e+00\n1 2.50000e-01\n' '2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n' '5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n' '8 -1.00000e+06') assert (df_s == expected) df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string() expected = (' x\n' '0 3234.000\n' '1 0.253') assert (df_s == expected) fmt.reset_printoptions() self.assertEqual(get_option("display.precision"), 7) df = DataFrame({'x': [1e9, 0.2512]}) df_s = df.to_string() # Python 2.5 just wants me to be sad. And debian 32-bit # sys.version_info[0] == 2 and sys.version_info[1] < 6: if _three_digit_exp(): expected = (' x\n' '0 1.000000e+009\n' '1 2.512000e-001') else: expected = (' x\n' '0 1.000000e+09\n' '1 2.512000e-01') assert (df_s == expected)
def _repr_categories_info(self): """ Returns a string representation of the footer.""" max_categories = (10 if get_option("display.max_categories") == 0 else get_option("display.max_categories")) category_strs = fmt.format_array(self.categories.get_values(), None) if len(category_strs) > max_categories: num = max_categories // 2 head = category_strs[:num] tail = category_strs[-(max_categories - num):] category_strs = head + ["..."] + tail # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] levheader = "Categories (%d, %s): " % (len(self.categories), self.categories.dtype) width, height = get_terminal_size() max_width = (width if get_option("display.width") == 0 else get_option("display.width")) if com.in_ipython_frontend(): # 0 = no breaks max_width = 0 levstring = "" start = True cur_col_len = len(levheader) sep_len, sep = (3, " < ") if self.ordered else (2, ", ") for val in category_strs: if max_width != 0 and cur_col_len + sep_len + len(val) > max_width: levstring += "\n" + (" "* len(levheader)) cur_col_len = len(levheader) if not start: levstring += sep cur_col_len += len(val) levstring += val start = False # replace to simple save space by return levheader + "["+levstring.replace(" < ... < ", " ... ")+"]"
def __init__(self, formatter, classes=None, border=None): self.fmt = formatter self.classes = classes self.frame = self.fmt.frame self.columns = self.fmt.tr_frame.columns self.elements = [] self.bold_rows = self.fmt.kwds.get('bold_rows', False) self.escape = self.fmt.kwds.get('escape', True) self.show_dimensions = self.fmt.show_dimensions if border is None: border = get_option('display.html.border') self.border = border self.table_id = self.fmt.table_id self.render_links = self.fmt.render_links
def _format_strings(self): if self.float_format is None: float_format = get_option("display.float_format") if float_format is None: fmt_str = '%% .%dg' % get_option("display.precision") float_format = lambda x: fmt_str % x else: float_format = self.float_format formatter = (lambda x: com.pprint_thing(x,escape_chars=('\t','\r','\n'))) \ if self.formatter is None else self.formatter def _format(x): if self.na_rep is not None and lib.checknull(x): if x is None: return 'None' return self.na_rep else: # object dtype return '%s' % formatter(x) vals = self.values is_float = lib.map_infer(vals, com.is_float) & notnull(vals) leading_space = is_float.any() fmt_values = [] for i, v in enumerate(vals): if not is_float[i] and leading_space: fmt_values.append(' %s' % _format(v)) elif is_float[i]: fmt_values.append(float_format(v)) else: fmt_values.append(' %s' % _format(v)) return fmt_values
def __call__(cls, path, **kwargs): engine = kwargs.pop('engine', None) # if it's not an ExcelWriter baseclass, dont' do anything (you've # probably made an explicit choice here) if not isinstance(getattr(cls, 'engine', None), compat.string_types): if engine is None: ext = os.path.splitext(path)[-1][1:] try: engine = config.get_option('io.excel.%s.writer' % ext) except KeyError: error = ValueError("No engine for filetype: '%s'" % ext) raise error cls = get_writer(engine) writer = cls.__new__(cls, path, **kwargs) writer.__init__(path, **kwargs) return writer
def __init__(self, series, buf=None, header=True, length=True, na_rep='NaN', name=False, float_format=None): self.series = series self.buf = buf if buf is not None else StringIO(u"") self.name = name self.na_rep = na_rep self.length = length self.header = header if float_format is None: float_format = get_option("print_config.float_format") self.float_format = float_format
def _get_level_lengths(index, hidden_elements=None): """ Given an index, find the level length for each element. Optional argument is a list of index positions which should not be visible. Result is a dictionary of (level, inital_position): span """ sentinel = com.sentinel_factory() levels = index.format(sparsify=sentinel, adjoin=False, names=False) if hidden_elements is None: hidden_elements = [] lengths = {} if index.nlevels == 1: for i, value in enumerate(levels): if (i not in hidden_elements): lengths[(0, i)] = 1 return lengths for i, lvl in enumerate(levels): for j, row in enumerate(lvl): if not get_option('display.multi_sparse'): lengths[(i, j)] = 1 elif (row != sentinel) and (j not in hidden_elements): last_label = j lengths[(i, last_label)] = 1 elif (row != sentinel): # even if its hidden, keep track of it in case # length >1 and later elements are visible last_label = j lengths[(i, last_label)] = 0 elif (j not in hidden_elements): lengths[(i, last_label)] += 1 non_zero_lengths = { element: length for element, length in lengths.items() if length >= 1 } return non_zero_lengths
def __init__(self, data, precision=None, table_styles=None, uuid=None, caption=None, table_attributes=None, cell_ids=True): self.ctx = defaultdict(list) self._todo = [] if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("``data`` must be a Series or DataFrame") if data.ndim == 1: data = data.to_frame() if not data.index.is_unique or not data.columns.is_unique: raise ValueError("style is not supported for non-unique indices.") self.data = data self.index = data.index self.columns = data.columns self.uuid = uuid self.table_styles = table_styles self.caption = caption if precision is None: precision = get_option('display.precision') self.precision = precision self.table_attributes = table_attributes self.hidden_index = False self.hidden_columns = [] self.cell_ids = cell_ids # display_funcs maps (row, col) -> formatting function def default_display_func(x): if is_float(x): return '{:>.{precision}g}'.format(x, precision=self.precision) else: return x self._display_funcs = defaultdict(lambda: default_display_func)
def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, notebook=False, border=None, table_id=None): self.fmt = formatter self.classes = classes self.frame = self.fmt.frame self.columns = self.fmt.tr_frame.columns self.elements = [] self.bold_rows = self.fmt.kwds.get('bold_rows', False) self.escape = self.fmt.kwds.get('escape', True) self.max_rows = max_rows or len(self.fmt.frame) self.max_cols = max_cols or len(self.fmt.columns) self.show_dimensions = self.fmt.show_dimensions self.is_truncated = (self.max_rows < len(self.fmt.frame) or self.max_cols < len(self.fmt.columns)) self.notebook = notebook if border is None: border = get_option('display.html.border') self.border = border self.table_id = table_id
def mpl_style_cb(key): import sys from pandas.tools.plotting import mpl_stylesheet global style_backup val = cf.get_option(key) if 'matplotlib' not in sys.modules.keys(): if not (val): # starting up, we get reset to None return val raise Exception("matplotlib has not been imported. aborting") import matplotlib.pyplot as plt if val == 'default': style_backup = dict([(k, plt.rcParams[k]) for k in mpl_stylesheet]) plt.rcParams.update(mpl_stylesheet) elif not val: if style_backup: plt.rcParams.update(style_backup) return val
def __new__(cls, path, engine=None, **kwargs): # only switch class if generic(ExcelWriter) if issubclass(cls, ExcelWriter): if engine is None or (isinstance(engine, string_types) and engine == 'auto'): if isinstance(path, string_types): ext = os.path.splitext(path)[-1][1:] else: ext = 'xlsx' try: engine = config.get_option( 'io.excel.{ext}.writer'.format(ext=ext)) if engine == 'auto': engine = _get_default_writer(ext) except KeyError: raise ValueError( "No engine for filetype: '{ext}'".format(ext=ext)) cls = get_writer(engine) return object.__new__(cls)
def test_register_writer(self): # some awkward mocking to test out dispatch and such actually works called_save = [] called_write_cells = [] class DummyClass(ExcelWriter): called_save = False called_write_cells = False supported_extensions = ['test', 'xlsx', 'xls'] engine = 'dummy' def save(self): called_save.append(True) def write_cells(self, *args, **kwargs): called_write_cells.append(True) def check_called(func): func() self.assert_(len(called_save) >= 1) self.assert_(len(called_write_cells) >= 1) del called_save[:] del called_write_cells[:] register_writer(DummyClass) writer = ExcelWriter('something.test') tm.assert_isinstance(writer, DummyClass) df = tm.makeCustomDataframe(1, 1) panel = tm.makePanel() func = lambda: df.to_excel('something.test') check_called(func) check_called(lambda: panel.to_excel('something.test')) from pandas import set_option, get_option val = get_option('io.excel.xlsx.writer') set_option('io.excel.xlsx.writer', 'dummy') check_called(lambda: df.to_excel('something.xlsx')) check_called(lambda: df.to_excel('something.xls', engine='dummy')) set_option('io.excel.xlsx.writer', val)
def format_object_attrs(obj): """ Return a list of tuples of the (attr, formatted_value) for common attrs, including dtype, name, length Parameters ---------- obj : object must be iterable Returns ------- list """ attrs = [] if hasattr(obj, 'dtype'): attrs.append(('dtype', "'{}'".format(obj.dtype))) if getattr(obj, 'name', None) is not None: attrs.append(('name', default_pprint(obj.name))) max_seq_items = get_option('display.max_seq_items') or len(obj) if len(obj) > max_seq_items: attrs.append(('length', len(obj))) return attrs
def _use_inf_as_null(key): '''Option change callback for null/inf behaviour Choose which replacement for numpy.isnan / -numpy.isfinite is used. Parameters ---------- flag: bool True means treat None, NaN, INF, -INF as null (old way), False means None and NaN are null, but INF, -INF are not null (new way). Notes ----- This approach to setting global module values is discussed and approved here: * http://stackoverflow.com/questions/4859217/ programmatically-creating-variables-in-python/4859312#4859312 ''' flag = get_option(key) if flag: globals()['_isnull'] = _isnull_old else: globals()['_isnull'] = _isnull_new
def check_main(): import __main__ as main return (not hasattr(main, '__file__') or get_option('mode.sim_interactive'))
def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): """ Try hard to parse datetime string, leveraging dateutil plus some extra goodies like quarter recognition. Parameters ---------- arg : basestring freq : str or DateOffset, default None Helps with interpreting time string if supplied dayfirst : bool, default None If None uses default from print_config yearfirst : bool, default None If None uses default from print_config Returns ------- datetime, datetime/dateutil.parser._result, str """ from pandas.core.config import get_option from pandas.tseries.offsets import DateOffset from pandas.tseries.frequencies import (_get_rule_month, _month_numbers, _get_freq_str) if not isinstance(arg, str): return arg arg = arg.upper() default = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 if len(arg) in [4, 6]: m = ypat.match(arg) if m: ret = default.replace(year=int(m.group(1))) return ret, ret, 'year' add_century = False if len(arg) == 4: add_century = True qpats = [(qpat1, 1), (qpat2, 0)] else: qpats = [(qpat1full, 1), (qpat2full, 0)] for pat, yfirst in qpats: qparse = pat.match(arg) if qparse is not None: if yfirst: yi, qi = 1, 2 else: yi, qi = 2, 1 q = int(qparse.group(yi)) y_str = qparse.group(qi) y = int(y_str) if add_century: y += 2000 if freq is not None: # hack attack, #1228 mnum = _month_numbers[_get_rule_month(freq)] + 1 month = (mnum + (q - 1) * 3) % 12 + 1 if month > mnum: y -= 1 else: month = (q - 1) * 3 + 1 ret = default.replace(year=y, month=month) return ret, ret, 'quarter' is_mo_str = freq is not None and freq == 'M' is_mo_off = getattr(freq, 'rule_code', None) == 'M' is_monthly = is_mo_str or is_mo_off if len(arg) == 6 and is_monthly: try: ret = _try_parse_monthly(arg) if ret is not None: return ret, ret, 'month' except Exception: pass # montly f7u12 mresult = _attempt_monthly(arg) if mresult: return mresult if dayfirst is None: dayfirst = get_option("display.date_dayfirst") if yearfirst is None: yearfirst = get_option("display.date_yearfirst") try: parsed, reso = dateutil_parse(arg, default, dayfirst=dayfirst, yearfirst=yearfirst) except Exception as e: raise DateParseError(e) if parsed is None: raise DateParseError("Could not parse %s" % arg) return parsed, parsed, reso # datetime, resolution
def use_numexpr_cb(key): from pandas.core.computation import expressions expressions.set_use_numexpr(cf.get_option(key))
def table_schema_cb(key): from pandas.io.formats.printing import _enable_data_resource_formatter _enable_data_resource_formatter(cf.get_option(key))
def use_bottleneck_cb(key): from pandas.core import nanops nanops.set_use_bottleneck(cf.get_option(key))
def setUp(self): self.check_skip() super(ExcelWriterBase, self).setUp() self.option_name = 'io.excel.%s.writer' % self.ext.strip('.') self.prev_engine = get_option(self.option_name) set_option(self.option_name, self.engine_name)
casting='safe') except ValueError as detail: if 'unknown type object' in str(detail): pass except Exception as detail: if raise_on_error: raise TypeError(str(detail)) if result is None: result = _where_standard(cond, a, b, raise_on_error) return result # turn myself on set_use_numexpr(get_option('compute.use_numexpr')) def _has_bool_dtype(x): try: return x.dtype == bool except AttributeError: try: return 'bool' in x.blocks except AttributeError: return isinstance(x, (bool, np.bool_)) def _bool_arith_check(op_str, a, b,
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode object on py2, str on py3. Always Unicode. """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = { '\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return compat.text_type(result) if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: fmt = "'%s'" else: fmt = "u'%s'" result = fmt % as_escaped_unicode(thing) else: result = as_escaped_unicode(thing) return compat.text_type(result) # always unicode
def _translate(self): """ Convert the DataFrame in `self.data` and the attrs from `_build_styles` into a dictionary of {head, body, uuid, cellstyle} """ table_styles = self.table_styles or [] caption = self.caption ctx = self.ctx precision = self.precision hidden_index = self.hidden_index hidden_columns = self.hidden_columns uuid = self.uuid or str(uuid1()).replace("-", "_") ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" INDEX_NAME_CLASS = "index_name" DATA_CLASS = "data" BLANK_CLASS = "blank" BLANK_VALUE = "" def format_attr(pair): return "{key}={value}".format(**pair) # for sparsifying a MultiIndex idx_lengths = _get_level_lengths(self.index) col_lengths = _get_level_lengths(self.columns, hidden_columns) cell_context = dict() n_rlvls = self.data.index.nlevels n_clvls = self.data.columns.nlevels rlabels = self.data.index.tolist() clabels = self.data.columns.tolist() if n_rlvls == 1: rlabels = [[x] for x in rlabels] if n_clvls == 1: clabels = [[x] for x in clabels] clabels = list(zip(*clabels)) cellstyle = [] head = [] for r in range(n_clvls): # Blank for Index columns... row_es = [{ "type": "th", "value": BLANK_VALUE, "display_value": BLANK_VALUE, "is_visible": not hidden_index, "class": " ".join([BLANK_CLASS]) }] * (n_rlvls - 1) # ... except maybe the last for columns.names name = self.data.columns.names[r] cs = [ BLANK_CLASS if name is None else INDEX_NAME_CLASS, "level{lvl}".format(lvl=r) ] name = BLANK_VALUE if name is None else name row_es.append({ "type": "th", "value": name, "display_value": name, "class": " ".join(cs), "is_visible": not hidden_index }) if clabels: for c, value in enumerate(clabels[r]): cs = [ COL_HEADING_CLASS, "level{lvl}".format(lvl=r), "col{col}".format(col=c) ] cs.extend( cell_context.get("col_headings", {}).get(r, {}).get(c, [])) es = { "type": "th", "value": value, "display_value": value, "class": " ".join(cs), "is_visible": _is_visible(c, r, col_lengths), } colspan = col_lengths.get((r, c), 0) if colspan > 1: es["attributes"] = [ format_attr({ "key": "colspan", "value": colspan }) ] row_es.append(es) head.append(row_es) if (self.data.index.names and com._any_not_none(*self.data.index.names) and not hidden_index): index_header_row = [] for c, name in enumerate(self.data.index.names): cs = [INDEX_NAME_CLASS, "level{lvl}".format(lvl=c)] name = '' if name is None else name index_header_row.append({ "type": "th", "value": name, "class": " ".join(cs) }) index_header_row.extend([{ "type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS]) }] * (len(clabels[0]) - len(hidden_columns))) head.append(index_header_row) body = [] for r, idx in enumerate(self.data.index): row_es = [] for c, value in enumerate(rlabels[r]): rid = [ ROW_HEADING_CLASS, "level{lvl}".format(lvl=c), "row{row}".format(row=r) ] es = { "type": "th", "is_visible": (_is_visible(r, c, idx_lengths) and not hidden_index), "value": value, "display_value": value, "id": "_".join(rid[1:]), "class": " ".join(rid) } rowspan = idx_lengths.get((c, r), 0) if rowspan > 1: es["attributes"] = [ format_attr({ "key": "rowspan", "value": rowspan }) ] row_es.append(es) for c, col in enumerate(self.data.columns): cs = [ DATA_CLASS, "row{row}".format(row=r), "col{col}".format(col=c) ] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) formatter = self._display_funcs[(r, c)] value = self.data.iloc[r, c] row_es.append({ "type": "td", "value": value, "class": " ".join(cs), "id": "_".join(cs[1:]), "display_value": formatter(value), "is_visible": (c not in hidden_columns) }) props = [] for x in ctx[r, c]: # have to handle empty styles like [''] if x.count(":"): props.append(x.split(":")) else: props.append(['', '']) cellstyle.append({ 'props': props, 'selector': "row{row}_col{col}".format(row=r, col=c) }) body.append(row_es) table_attr = self.table_attributes use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: table_attr = table_attr or '' if 'class="' in table_attr: table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ') else: table_attr += ' class="tex2jax_ignore"' return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, precision=precision, table_styles=table_styles, caption=caption, table_attributes=table_attr)