示例#1
0
    def sheet_to_df(self, index=1, header_rows=1, start_row=1, sheet=None):
        """Pull a worksheet into a DataFrame.

        Parameters
        ----------
        index : int
            col number of index column, 0 or None for no index (default 1)
        header_rows : int
            number of rows that represent headers (default 1)
        start_row : int
            row number for first row of headers or data (default 1)
        sheet : str,int
            optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>`
            (default None)

        Returns
        -------
        DataFrame
            DataFrame with the data from the Worksheet

        """
        if sheet is not None:
            self.open_sheet(sheet)

        if not self.sheet:
            raise NoWorksheetException("No open worksheet")

        vals = self._retry_func(self.sheet.get_all_values)
        vals = self._fix_merge_values(vals)[start_row - 1 :]

        col_names = parse_sheet_headers(vals, header_rows)

        # remove rows where everything is null, then replace nulls with ''
        df = (
            pd.DataFrame(vals[header_rows or 0 :])
            .replace("", np.nan)
            .dropna(how="all")
            .fillna("")
        )

        if col_names is not None:
            if len(df.columns) == len(col_names):
                df.columns = col_names
            elif len(df) == 0:
                # if we have headers but no data, set column headers on empty DF
                df = df.reindex(columns=col_names)
            else:
                raise MissMatchException(
                    "Column headers don't match number of data columns"
                )

        return parse_sheet_index(df, index)
示例#2
0
    def sheet_to_df(self,
                    index=1,
                    headers=1,
                    header_rows=1,
                    start_row=1,
                    sheet=None):
        """
        Pull a worksheet into a DataFrame.

        :param int index: col number of index column, 0 or None for no index (default 1)
        :param int headers: (DEPRECATED - use `header_rows`) number of rows that represent
            headers (default 1)
        :param int header_rows: number of rows that represent headers (default 1)
        :param int start_row: row number for first row of headers or data (default 1)
        :param str,int sheet: optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None)

        :returns: a DataFrame with the data from the Worksheet
        """
        if sheet:
            self.open_sheet(sheet)

        if not self.sheet:
            raise Exception("No open worksheet")

        if headers != 1:
            deprecate("headers has been deprecated, use header_rows instead")
            header_rows = headers

        vals = self._retry_get_all_values()
        vals = self._fix_merge_values(vals)[start_row - 1:]

        col_names = parse_sheet_headers(vals, header_rows)

        # remove rows where everything is null, then replace nulls with ''
        df = pd.DataFrame(vals[header_rows or 0:])\
               .replace('', np.nan)\
               .dropna(how='all')\
               .fillna('')

        if col_names is not None:
            if len(df.columns) == len(col_names):
                df.columns = col_names
            elif len(df) == 0:
                # if we have headers but no data, set column headers on empty DF
                df = df.reindex(columns=col_names)
            else:
                raise Exception(
                    "Column headers don't match number of data columns")

        return parse_sheet_index(df, index)
示例#3
0
 def test_multiheader3(self, data_multiheader):
     """Note that 'test_index' and 1 should be shifted up."""
     expected = pd.MultiIndex.from_arrays([["test_index", "col1", "col1"],
                                           [1, "subcol1", "subcol2"],
                                           ["", 2, 3]])
     assert util.parse_sheet_headers(data_multiheader, 3).equals(expected)
示例#4
0
 def test_normal(self, data_multiheader):
     expected = pd.Index(["", "col1", "col1"])
     assert util.parse_sheet_headers(data_multiheader, 1).equals(expected)
示例#5
0
 def test_empty(self, data_empty):
     assert util.parse_sheet_headers(data_empty, 0) is None
示例#6
0
    def sheet_to_df(
        self,
        index=1,
        header_rows=1,
        start_row=1,
        unformatted_columns=None,
        formula_columns=None,
        sheet=None,
    ):
        """
        Pull a worksheet into a DataFrame.

        Parameters
        ----------
        index : int
            col number of index column, 0 or None for no index (default 1)
        header_rows : int
            number of rows that represent headers (default 1)
        start_row : int
            row number for first row of headers or data (default 1)
        unformatted_columns : list
            column numbers or names for columns you'd like to pull in as
            unformatted values (defaul [])
        formula_columns : list
            column numbers or names for columns you'd like to pull in as
            actual formulas (defaul [])
        sheet : str,int
            optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.spread.Spread.open_sheet>`
            (default None)

        Returns
        -------
        DataFrame
            DataFrame with the data from the Worksheet
        """
        self._ensure_sheet(sheet)

        vals = self.sheet.get_all_values()
        vals = self._fix_merge_values(vals)[start_row - 1:]

        col_names = parse_sheet_headers(vals, header_rows)

        # remove rows where everything is null, then replace nulls with ''
        df = (pd.DataFrame(vals[header_rows or 0:]).replace(
            "", np.nan).dropna(how="all").fillna(""))

        # replace values with a different value render option before we set the
        # index in set_col_names
        if unformatted_columns:
            self._fix_value_render(
                df,
                header_rows + start_row - 1,
                col_names,
                unformatted_columns,
                "UNFORMATTED_VALUE",
            )

        if formula_columns:
            self._fix_value_render(df, header_rows + start_row - 1, col_names,
                                   formula_columns, "FORMULA")

        df = set_col_names(df, col_names)

        return parse_sheet_index(df, index)
示例#7
0
def test_parse_sheet_headers_multiheader3(data_multiheader):
    """Note that 'test_index' and 1 should be shifted up"""
    expected = pd.MultiIndex.from_arrays([['test_index', 'col1', 'col1'],
                                          [1, 'subcol1', 'subcol2'],
                                          ['', 2, 3]])
    assert util.parse_sheet_headers(data_multiheader, 3).equals(expected)
示例#8
0
def test_parse_sheet_headers_normal(data_multiheader):
    expected = pd.Index(['', 'col1', 'col1'])
    assert util.parse_sheet_headers(data_multiheader, 1).equals(expected)
示例#9
0
def test_parse_sheet_headers_multiheader(data_multiheader):
    """Note that 'test_index' should be shifted up"""
    expected = pd.MultiIndex.from_arrays(
        [["test_index", "col1", "col1"], ["", "subcol1", "subcol2"]]
    )
    assert util.parse_sheet_headers(data_multiheader, 2).equals(expected)