def to_html(self, max_rows=None, unsupported_param=None): args = locals() pdf = pd.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], }, index=[0, 1, 3]) validate_arguments_and_invoke_function(pdf, self.to_html, pd.DataFrame.to_html, args)
def to_html(self, max_rows=None, unsupported_param=None): args = locals() pdf = pd.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], }, index=[0, 1, 3]) validate_arguments_and_invoke_function(pdf, self.to_html, pd.DataFrame.to_html, args)
def to_clipboard(self, sep=',', **kwargs): args = locals() pdf = pd.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], }, index=[0, 1, 3]) validate_arguments_and_invoke_function(pdf, self.to_clipboard, pd.DataFrame.to_clipboard, args)
def to_clipboard(self, sep=',', **kwargs): args = locals() pdf = pd.DataFrame({ 'a': [1, 2, 3], 'b': [4, 5, 6], }, index=[0, 1, 3]) validate_arguments_and_invoke_function(pdf, self.to_clipboard, pd.DataFrame.to_clipboard, args)
def to_clipboard(self, sep=",", **kwargs): args = locals() pdf = pd.DataFrame({ "a": [1, 2, 3], "b": [4, 5, 6], }, index=[0, 1, 3]) validate_arguments_and_invoke_function(pdf, self.to_clipboard, pd.DataFrame.to_clipboard, args) # Support for **kwargs self.to_clipboard(sep=",", index=False)
def to_clipboard(self, excel=True, sep=None, **kwargs): # Docstring defined below by reusing DataFrame.to_clipboard's. args = locals() kseries = self return validate_arguments_and_invoke_function( kseries.to_pandas(), self.to_clipboard, pd.Series.to_clipboard, args)
def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, bold_rows=False, column_format=None, longtable=None, escape=None, encoding=None, decimal='.', multicolumn=None, multicolumn_format=None, multirow=None): args = locals() kseries = self return validate_arguments_and_invoke_function(kseries.to_pandas(), self.to_latex, pd.Series.to_latex, args)
def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, index=True, length=False, dtype=False, name=False, max_rows=None): """ Render a string representation of the Series. .. note:: This method should only be used if the resulting Pandas object is expected to be small, as all the data is loaded into the driver's memory. If the input is large, set max_rows parameter. Parameters ---------- buf : StringIO-like, optional buffer to write to na_rep : string, optional string representation of NAN to use, default 'NaN' float_format : one-parameter function, optional formatter function to apply to columns' elements if they are floats default None header : boolean, default True Add the Series header (index name) index : bool, optional Add index (row) labels, default True length : boolean, default False Add the Series length dtype : boolean, default False Add the Series dtype name : boolean, default False Add the Series name if not None max_rows : int, optional Maximum number of rows to show before truncating. If None, show all. Returns ------- formatted : string (if not buffer passed) Examples -------- >>> df = ks.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], columns=['dogs', 'cats']) >>> print(df['dogs'].to_string()) 0 0.2 1 0.0 2 0.6 3 0.2 >>> print(df['dogs'].to_string(max_rows=2)) 0 0.2 1 0.0 """ # Make sure locals() call is at the top of the function so we don't capture local variables. args = locals() if max_rows is not None: kseries = self.head(max_rows) else: kseries = self return validate_arguments_and_invoke_function( kseries.to_pandas(), self.to_string, pd.Series.to_string, args)
def to_dict(self, into=dict): """ Convert Series to {label -> value} dict or dict-like object. .. note:: This method should only be used if the resulting Pandas DataFrame is expected to be small, as all the data is loaded into the driver's memory. Parameters ---------- into : class, default dict The collections.abc.Mapping subclass to use as the return object. Can be the actual class or an empty instance of the mapping type you want. If you want a collections.defaultdict, you must pass it initialized. Returns ------- collections.abc.Mapping Key-value representation of Series. Examples -------- >>> s = ks.Series([1, 2, 3, 4]) >>> s_dict = s.to_dict() >>> sorted(s_dict.items()) [(0, 1), (1, 2), (2, 3), (3, 4)] >>> from collections import OrderedDict, defaultdict >>> s.to_dict(OrderedDict) OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> dd = defaultdict(list) >>> s.to_dict(dd) # doctest: +ELLIPSIS defaultdict(<class 'list'>, {...}) """ # Make sure locals() call is at the top of the function so we don't capture local variables. args = locals() kseries = self return validate_arguments_and_invoke_function(kseries.to_pandas(), self.to_dict, pd.Series.to_dict, args)
def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="", float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep="inf", verbose=True, freeze_panes=None): """ Write object to an Excel sheet. .. note:: This method should only be used if the resulting DataFrame is expected to be small, as all the data is loaded into the driver's memory. To write a single object to an Excel .xlsx file it is only necessary to specify a target file name. To write to multiple sheets it is necessary to create an `ExcelWriter` object with a target file name, and specify a sheet in the file to write to. Multiple sheets may be written to by specifying unique `sheet_name`. With all data written to the file it is necessary to save the changes. Note that creating an `ExcelWriter` object with a file name that already exists will result in the contents of the existing file being erased. Parameters ---------- excel_writer : str or ExcelWriter object File path or existing ExcelWriter. sheet_name : str, default 'Sheet1' Name of sheet which will contain DataFrame. na_rep : str, default '' Missing data representation. float_format : str, optional Format string for floating point numbers. For example ``float_format="%%.2f"`` will format 0.1234 to 0.12. columns : sequence or list of str, optional Columns to write. header : bool or list of str, default True Write out the column names. If a list of string is given it is assumed to be aliases for the column names. index : bool, default True Write row names (index). index_label : str or sequence, optional Column label for index column(s) if desired. If not specified, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. startrow : int, default 0 Upper left cell row to dump data frame. startcol : int, default 0 Upper left cell column to dump data frame. engine : str, optional Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and ``io.excel.xlsm.writer``. merge_cells : bool, default True Write MultiIndex and Hierarchical Rows as merged cells. encoding : str, optional Encoding of the resulting excel file. Only necessary for xlwt, other writers support unicode natively. inf_rep : str, default 'inf' Representation for infinity (there is no native representation for infinity in Excel). verbose : bool, default True Display more information in the error logs. freeze_panes : tuple of int (length 2), optional Specifies the one-based bottommost row and rightmost column that is to be frozen. Notes ----- Once a workbook has been saved it is not possible write further data without rewriting the whole workbook. See Also -------- read_excel : Read Excel file. Examples -------- Create, write to and save a workbook: >>> df1 = ks.DataFrame([['a', 'b'], ['c', 'd']], ... index=['row 1', 'row 2'], ... columns=['col 1', 'col 2']) >>> df1.to_excel("output.xlsx") # doctest: +SKIP To specify the sheet name: >>> df1.to_excel("output.xlsx") # doctest: +SKIP >>> df1.to_excel("output.xlsx", ... sheet_name='Sheet_name_1') # doctest: +SKIP If you wish to write to more than one sheet in the workbook, it is necessary to specify an ExcelWriter object: >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP ... df1.to_excel(writer, sheet_name='Sheet_name_1') ... df2.to_excel(writer, sheet_name='Sheet_name_2') To set the library that is used to write the Excel file, you can pass the `engine` keyword (the default engine is automatically chosen depending on the file extension): >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP """ # Make sure locals() call is at the top of the function so we don't capture local variables. args = locals() kdf = self if isinstance(self, ks.DataFrame): f = pd.DataFrame.to_excel elif isinstance(self, ks.Series): f = pd.Series.to_excel else: raise TypeError('Constructor expects DataFrame or Series; however, ' 'got [%s]' % (self,)) return validate_arguments_and_invoke_function( kdf.to_pandas(), self.to_excel, f, args)
def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', index=True): """ Convert the object to a JSON string. Note NaN's and None will be converted to null and datetime objects will be converted to UNIX timestamps. .. note:: This method should only be used if the resulting JSON is expected to be small, as all the data is loaded into the driver's memory. Parameters ---------- path_or_buf : string or file handle, optional File path or object. If not specified, the result is returned as a string. orient : string Indication of expected JSON string format. * Series - default is 'index' - allowed values are: {'split','records','index','table'} * DataFrame - default is 'columns' - allowed values are: {'split','records','index','columns','values','table'} * The format of the JSON string - 'split' : dict like {'index' -> [index], 'columns' -> [columns], 'data' -> [values]} - 'records' : list like [{column -> value}, ... , {column -> value}] - 'index' : dict like {index -> {column -> value}} - 'columns' : dict like {column -> {index -> value}} - 'values' : just the values array - 'table' : dict like {'schema': {schema}, 'data': {data}} describing the data, and the data component is like ``orient='records'``. date_format : {None, 'epoch', 'iso'} Type of date conversion. 'epoch' = epoch milliseconds, 'iso' = ISO8601. The default depends on the `orient`. For ``orient='table'``, the default is 'iso'. For all other orients, the default is 'epoch'. double_precision : int, default 10 The number of decimal places to use when encoding floating point values. force_ascii : bool, default True Force encoded string to be ASCII. date_unit : string, default 'ms' (milliseconds) The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us', 'ns' for second, millisecond, microsecond, and nanosecond respectively. default_handler : callable, default None Handler to call if object cannot otherwise be converted to a suitable format for JSON. Should receive a single argument which is the object to convert and return a serialisable object. lines : bool, default False If 'orient' is 'records' write out line delimited json format. Will throw ValueError if incorrect 'orient' since others are not list like. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} A string representing the compression to use in the output file, only used when the first argument is a filename. By default, the compression is inferred from the filename. index : bool, default True Whether to include the index values in the JSON string. Not including the index (``index=False``) is only supported when orient is 'split' or 'table'. Examples -------- >>> df = ks.DataFrame([['a', 'b'], ['c', 'd']], ... index=['row 1', 'row 2'], ... columns=['col 1', 'col 2']) >>> df.to_json(orient='split') '{"columns":["col 1","col 2"],\ "index":["row 1","row 2"],\ "data":[["a","b"],["c","d"]]}' >>> df['col 1'].to_json(orient='split') '{"name":"col 1","index":["row 1","row 2"],"data":["a","c"]}' Encoding/decoding a Dataframe using ``'records'`` formatted JSON. Note that index labels are not preserved with this encoding. >>> df.to_json(orient='records') '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' >>> df['col 1'].to_json(orient='records') '["a","c"]' Encoding/decoding a Dataframe using ``'index'`` formatted JSON: >>> df.to_json(orient='index') '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' >>> df['col 1'].to_json(orient='index') '{"row 1":"a","row 2":"c"}' Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: >>> df.to_json(orient='columns') '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}' >>> df['col 1'].to_json(orient='columns') '{"row 1":"a","row 2":"c"}' Encoding/decoding a Dataframe using ``'values'`` formatted JSON: >>> df.to_json(orient='values') '[["a","b"],["c","d"]]' >>> df['col 1'].to_json(orient='values') '["a","c"]' Encoding with Table Schema >>> df.to_json(orient='table') # doctest: +SKIP '{"schema": {"fields":[{"name":"index","type":"string"},\ {"name":"col 1","type":"string"},\ {"name":"col 2","type":"string"}],\ "primaryKey":["index"],\ "pandas_version":"0.20.0"}, \ "data": [{"index":"row 1","col 1":"a","col 2":"b"},\ {"index":"row 2","col 1":"c","col 2":"d"}]}' >>> df['col 1'].to_json(orient='table') # doctest: +SKIP '{"schema": {"fields":[{"name":"index","type":"string"},\ {"name":"col 1","type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"}, \ "data": [{"index":"row 1","col 1":"a"},{"index":"row 2","col 1":"c"}]}' """ # Make sure locals() call is at the top of the function so we don't capture local variables. args = locals() kdf = self if isinstance(self, ks.DataFrame): f = pd.DataFrame.to_json elif isinstance(self, ks.Series): f = pd.Series.to_json else: raise TypeError('Constructor expects DataFrame or Series; however, ' 'got [%s]' % (self,)) return validate_arguments_and_invoke_function( kdf.to_pandas(), self.to_json, f, args)
def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression='infer', quoting=None, quotechar='"', line_terminator="\n", chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.'): """ Write object to a comma-separated values (csv) file. .. note:: This method should only be used if the resulting CSV is expected to be small, as all the data is loaded into the driver's memory. Parameters ---------- path_or_buf : str or file handle, default None File path or object, if None is provided the result is returned as a string. If a file object is passed it should be opened with `newline=''`, disabling universal newlines. sep : str, default ',' String of length 1. Field delimiter for the output file. na_rep : str, default '' Missing data representation. float_format : str, default None Format string for floating point numbers. columns : sequence, optional Columns to write. header : bool or list of str, default True Write out the column names. If a list of strings is given it is assumed to be aliases for the column names. index : bool, default True Write row names (index). index_label : str or sequence, or False, default None Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the object uses MultiIndex. If False do not print fields for index names. Use index_label=False for easier importing in R. mode : str Python write mode, default 'w'. encoding : str, optional A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. compression : str, default 'infer' Compression mode among the following possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf` is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no compression). quoting : optional constant from csv module Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` then floats are converted to strings and thus csv.QUOTE_NONNUMERIC will treat them as non-numeric. quotechar : str, default '\"' String of length 1. Character used to quote fields. line_terminator : string, default '\\n' The newline character or character sequence to use in the output file. Defaults to `os.linesep`, which depends on the OS in which this method is called ('\n' for linux, '\r\n' for Windows, i.e.). chunksize : int or None Rows to write at a time. tupleize_cols : bool, default False Write MultiIndex columns as a list of tuples (if True) or in the new, expanded format, where each MultiIndex column is a row in the CSV (if False). date_format : str, default None Format string for datetime objects. doublequote : bool, default True Control quoting of `quotechar` inside a field. escapechar : str, default None String of length 1. Character used to escape `sep` and `quotechar` when appropriate. decimal : str, default '.' Character recognized as decimal separator. E.g. use ',' for European data. Returns ------- None or str If path_or_buf is None, returns the resulting csv format as a string. Otherwise returns None. See Also -------- read_csv : Reading CSV files. Examples -------- >>> df = ks.DataFrame({'name': ['Raphael', 'Donatello'], ... 'mask': ['red', 'purple'], ... 'weapon': ['sai', 'bo staff']}, ... columns=['name', 'mask', 'weapon']) >>> df.to_csv(index=False) 'name,mask,weapon\\nRaphael,red,sai\\nDonatello,purple,bo staff\\n' >>> df.name.to_csv() # doctest: +ELLIPSIS '...Raphael\\n1,Donatello\\n' """ # Make sure locals() call is at the top of the function so we don't capture local variables. args = locals() kdf = self if isinstance(self, ks.DataFrame): f = pd.DataFrame.to_csv elif isinstance(self, ks.Series): f = pd.Series.to_csv else: raise TypeError('Constructor expects DataFrame or Series; however, ' 'got [%s]' % (self,)) return validate_arguments_and_invoke_function( kdf.to_pandas(), self.to_csv, f, args)
def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, max_rows=None, max_cols=None, show_dimensions=False, decimal='.', line_width=None): """ Render a DataFrame to a console-friendly tabular output. .. note:: This method should only be used if the resulting Pandas object is expected to be small, as all the data is loaded into the driver's memory. If the input is large, set max_rows parameter. Parameters ---------- buf : StringIO-like, optional Buffer to write to. columns : sequence, optional, default None The subset of columns to write. Writes all columns by default. col_space : int, optional The minimum width of each column. header : bool, optional Write out the column names. If a list of strings is given, it is assumed to be aliases for the column names index : bool, optional, default True Whether to print index (row) labels. na_rep : str, optional, default 'NaN' String representation of NAN to use. formatters : list or dict of one-param. functions, optional Formatter functions to apply to columns' elements by position or name. The result of each function must be a unicode string. List must be of length equal to the number of columns. float_format : one-parameter function, optional, default None Formatter function to apply to columns' elements if they are floats. The result of this function must be a unicode string. sparsify : bool, optional, default True Set to False for a DataFrame with a hierarchical index to print every multiindex key at each row. index_names : bool, optional, default True Prints the names of the indexes. justify : str, default None How to justify the column labels. If None uses the option from the print configuration (controlled by set_option), 'right' out of the box. Valid values are * left * right * center * justify * justify-all * start * end * inherit * match-parent * initial * unset. max_rows : int, optional Maximum number of rows to display in the console. max_cols : int, optional Maximum number of columns to display in the console. show_dimensions : bool, default False Display DataFrame dimensions (number of rows by number of columns). decimal : str, default '.' Character recognized as decimal separator, e.g. ',' in Europe. line_width : int, optional Width to wrap a line in characters. Returns ------- str (or unicode, depending on data and options) String representation of the dataframe. See Also -------- to_html : Convert DataFrame to HTML. Examples -------- >>> df = ks.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6]}) >>> print(df.to_string()) col1 col2 0 1 4 1 2 5 2 3 6 >>> print(df.to_string(max_rows=2)) col1 col2 0 1 4 1 2 5 """ # Make sure locals() call is at the top of the function so we don't capture local variables. args = locals() if max_rows is not None: kdf = self.head(max_rows) else: kdf = self return validate_arguments_and_invoke_function(kdf.to_pandas(), self.to_string, pd.DataFrame.to_string, args)
def to_html(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, float_format=None, sparsify=None, index_names=True, justify=None, max_rows=None, max_cols=None, show_dimensions=False, decimal='.', bold_rows=True, classes=None, escape=True, notebook=False, border=None, table_id=None, render_links=False): """ Render a DataFrame as an HTML table. .. note:: This method should only be used if the resulting Pandas object is expected to be small, as all the data is loaded into the driver's memory. If the input is large, set max_rows parameter. Parameters ---------- buf : StringIO-like, optional Buffer to write to. columns : sequence, optional, default None The subset of columns to write. Writes all columns by default. col_space : int, optional The minimum width of each column. header : bool, optional Write out the column names. If a list of strings is given, it is assumed to be aliases for the column names index : bool, optional, default True Whether to print index (row) labels. na_rep : str, optional, default 'NaN' String representation of NAN to use. formatters : list or dict of one-param. functions, optional Formatter functions to apply to columns' elements by position or name. The result of each function must be a unicode string. List must be of length equal to the number of columns. float_format : one-parameter function, optional, default None Formatter function to apply to columns' elements if they are floats. The result of this function must be a unicode string. sparsify : bool, optional, default True Set to False for a DataFrame with a hierarchical index to print every multiindex key at each row. index_names : bool, optional, default True Prints the names of the indexes. justify : str, default None How to justify the column labels. If None uses the option from the print configuration (controlled by set_option), 'right' out of the box. Valid values are * left * right * center * justify * justify-all * start * end * inherit * match-parent * initial * unset. max_rows : int, optional Maximum number of rows to display in the console. max_cols : int, optional Maximum number of columns to display in the console. show_dimensions : bool, default False Display DataFrame dimensions (number of rows by number of columns). decimal : str, default '.' Character recognized as decimal separator, e.g. ',' in Europe. bold_rows : bool, default True Make the row labels bold in the output. classes : str or list or tuple, default None CSS class(es) to apply to the resulting html table. escape : bool, default True Convert the characters <, >, and & to HTML-safe sequences. notebook : {True, False}, default False Whether the generated HTML is for IPython Notebook. border : int A ``border=border`` attribute is included in the opening `<table>` tag. Default ``pd.options.html.border``. table_id : str, optional A css id is included in the opening `<table>` tag if specified. render_links : bool, default False Convert URLs to HTML links (only works with Pandas 0.24+). Returns ------- str (or unicode, depending on data and options) String representation of the dataframe. See Also -------- to_string : Convert DataFrame to a string. """ # Make sure locals() call is at the top of the function so we don't capture local variables. args = locals() if max_rows is not None: kdf = self.head(max_rows) else: kdf = self return validate_arguments_and_invoke_function(kdf.to_pandas(), self.to_html, pd.DataFrame.to_html, args)
def to_clipboard(self, excel=True, sep=None, **kwargs): """ Copy object to the system clipboard. Write a text representation of object to the system clipboard. This can be pasted into Excel, for example. .. note:: This method should only be used if the resulting DataFrame is expected to be small, as all the data is loaded into the driver's memory. Parameters ---------- excel : bool, default True - True, use the provided separator, writing in a csv format for allowing easy pasting into excel. - False, write a string representation of the object to the clipboard. sep : str, default ``'\\t'`` Field delimiter. **kwargs These parameters will be passed to DataFrame.to_csv. Notes ----- Requirements for your platform. - Linux : `xclip`, or `xsel` (with `gtk` or `PyQt4` modules) - Windows : none - OS X : none Examples -------- Copy the contents of a DataFrame to the clipboard. >>> df = ks.Series([1, 2, 3, 4, 5, 6, 7], name='x') >>> df.to_clipboard(sep=',') ... # Wrote the following to the system clipboard: ... # 0, 1 ... # 1, 2 ... # 2, 3 ... # 3, 4 ... # 4, 5 ... # 5, 6 ... # 6, 7 We can omit the the index by passing the keyword `index` and setting it to false. >>> df.to_clipboard(sep=',', index=False) ... # Wrote the following to the system clipboard: ... # 1 ... # 2 ... # 3 ... # 4 ... # 5 ... # 6 ... # 7 """ args = locals() kseries = self return validate_arguments_and_invoke_function(kseries.to_pandas(), self.to_clipboard, pd.Series.to_clipboard, args)