def parse_to_df(self, filelike, mode='csv', header_names=None, sheet_names=None, **kwargs): """ Parse a delimited or excel file from the provided content and return a DataFrame. Any extra kwargs are passed to the appropriate pandas parser; read the pandas docs for details. Recommended kwargs: skiprows, parse_cols, header. :param filelike: string-like or filelike object containing formatted data :paramtype: string or file :param string mode: Choose from 'csv' or 'xls'. Default 'csv'. If 'csv', kwargs are passed to pandas.read_csv. :param list header_names: List of strings to use as column names. If provided, this will override the header extracted by pandas. :param list sheet_names: List of strings for excel sheet names to read. Default is to concatenate all sheets. """ # check mode allowed_modes = ['csv', 'xls'] if mode not in allowed_modes: raise ValueError('Invalid mode %s' % mode) # do csv/tsv if mode == 'csv': # convert string to filelike if needed try: filelike.closed except AttributeError: # string, unicode, etc try: filelike = BytesIO( filelike ) # This was changed from StringIO to work in Python 3.x except TypeError: filelike = StringIO(filelike) # read csv df = pd.read_csv(filelike, **kwargs) # do xls elif mode == 'xls': pieces = [] for sheet in sheet_names: pieces.append(filelike.parse(sheet, **kwargs)) df = pd.concat(pieces) # set names if header_names is not None: df.columns = header_names # drop na df = df.dropna() return df
def parse_to_df(self, filelike, mode='csv', header_names=None, sheet_names=None, **kwargs): """ Parse a delimited or excel file from the provided content and return a DataFrame. Any extra kwargs are passed to the appropriate pandas parser; read the pandas docs for details. Recommended kwargs: skiprows, parse_cols, header. :param filelike: string-like or filelike object containing formatted data :paramtype: string or file :param string mode: Choose from 'csv' or 'xls'. Default 'csv'. If 'csv', kwargs are passed to pandas.read_csv. :param list header_names: List of strings to use as column names. If provided, this will override the header extracted by pandas. :param list sheet_names: List of strings for excel sheet names to read. Default is to concatenate all sheets. """ # check mode allowed_modes = ['csv', 'xls'] if mode not in allowed_modes: raise ValueError('Invalid mode %s' % mode) # do csv/tsv if mode == 'csv': # convert string to filelike if needed try: filelike.closed except AttributeError: # string, unicode, etc try: filelike = BytesIO(filelike) # This was changed from StringIO to work in Python 3.x except TypeError: filelike = StringIO(filelike) # read csv df = pd.read_csv(filelike, **kwargs) # do xls elif mode == 'xls': pieces = [] for sheet in sheet_names: pieces.append(filelike.parse(sheet, **kwargs)) df = pd.concat(pieces) # set names if header_names is not None: df.columns = header_names # drop na df = df.dropna() return df