Пример #1
0
    def parse_to_df(self,
                    filelike,
                    mode='csv',
                    header_names=None,
                    sheet_names=None,
                    **kwargs):
        """
        Parse a delimited or excel file from the provided content and return a DataFrame.

        Any extra kwargs are passed to the appropriate pandas parser;
        read the pandas docs for details.
        Recommended kwargs: skiprows, parse_cols, header.

        :param filelike: string-like or filelike object containing formatted data
        :paramtype: string or file
        :param string mode: Choose from 'csv' or 'xls'. Default 'csv'.
            If 'csv', kwargs are passed to pandas.read_csv.
        :param list header_names: List of strings to use as column names.
            If provided, this will override the header extracted by pandas.
        :param list sheet_names: List of strings for excel sheet names to read.
            Default is to concatenate all sheets.
        """
        # check mode
        allowed_modes = ['csv', 'xls']
        if mode not in allowed_modes:
            raise ValueError('Invalid mode %s' % mode)

        # do csv/tsv
        if mode == 'csv':
            # convert string to filelike if needed
            try:
                filelike.closed
            except AttributeError:  # string, unicode, etc
                try:
                    filelike = BytesIO(
                        filelike
                    )  # This was changed from StringIO to work in Python 3.x
                except TypeError:
                    filelike = StringIO(filelike)

            # read csv
            df = pd.read_csv(filelike, **kwargs)

        # do xls
        elif mode == 'xls':
            pieces = []
            for sheet in sheet_names:
                pieces.append(filelike.parse(sheet, **kwargs))
            df = pd.concat(pieces)

        # set names
        if header_names is not None:
            df.columns = header_names

        # drop na
        df = df.dropna()

        return df
Пример #2
0
    def parse_to_df(self, filelike, mode='csv', header_names=None, sheet_names=None, **kwargs):
        """
        Parse a delimited or excel file from the provided content and return a DataFrame.

        Any extra kwargs are passed to the appropriate pandas parser;
        read the pandas docs for details.
        Recommended kwargs: skiprows, parse_cols, header.

        :param filelike: string-like or filelike object containing formatted data
        :paramtype: string or file
        :param string mode: Choose from 'csv' or 'xls'. Default 'csv'.
            If 'csv', kwargs are passed to pandas.read_csv.
        :param list header_names: List of strings to use as column names.
            If provided, this will override the header extracted by pandas.
        :param list sheet_names: List of strings for excel sheet names to read.
            Default is to concatenate all sheets.
        """
        # check mode
        allowed_modes = ['csv', 'xls']
        if mode not in allowed_modes:
            raise ValueError('Invalid mode %s' % mode)

        # do csv/tsv
        if mode == 'csv':
            # convert string to filelike if needed
            try:
                filelike.closed
            except AttributeError:  # string, unicode, etc
                try:
                    filelike = BytesIO(filelike)  # This was changed from StringIO to work in Python 3.x
                except TypeError:
                    filelike = StringIO(filelike)

            # read csv
            df = pd.read_csv(filelike, **kwargs)

        # do xls
        elif mode == 'xls':
            pieces = []
            for sheet in sheet_names:
                pieces.append(filelike.parse(sheet, **kwargs))
            df = pd.concat(pieces)

        # set names
        if header_names is not None:
            df.columns = header_names

        # drop na
        df = df.dropna()

        return df