def test_excel_roundtrip_datetime(self): _skip_if_no_xlrd() # datetime.date, not sure what to test here exactly tsf = self.tsframe.copy() with ensure_clean(self.ext) as path: tsf.index = [x.date() for x in self.tsframe.index] tsf.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(self.tsframe, recons)
def test_to_excel_periodindex(self): _skip_if_no_xlrd() frame = self.tsframe xp = frame.resample('M', kind='period') with ensure_clean(self.ext) as path: xp.to_excel(path, 'sht1') reader = ExcelFile(path) rs = reader.parse('sht1', index_col=0, parse_dates=True) tm.assert_frame_equal(xp, rs.to_period('M'))
def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path): # try multiindex with dates new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.index.names = ["time", "foo"] tsframe.to_excel(path, "test1", merge_cells=merge_cells) reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons) assert recons.index.names == ("time", "foo")
def test_excel_sheet_by_name_raise(self): _skip_if_no_xlrd() import xlrd with ensure_clean(self.ext) as pth: gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(pth) xl = ExcelFile(pth) df = xl.parse(0) tm.assert_frame_equal(gt, df) self.assertRaises(xlrd.XLRDError, xl.parse, '0')
def test_to_excel_empty_multiindex(self, path): # GH 19543. expected = DataFrame([], columns=[0, 1, 2]) df = DataFrame([], index=MultiIndex.from_tuples([], names=[0, 1]), columns=[2]) df.to_excel(path, "test1") with ExcelFile(path) as reader: result = pd.read_excel(reader, sheet_name="test1") tm.assert_frame_equal( result, expected, check_index_type=False, check_dtype=False )
def test_excel_sheet_by_name_raise(self, path): gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(path) with ExcelFile(path) as xl: df = pd.read_excel(xl, sheet_name=0, index_col=0) tm.assert_frame_equal(gt, df) msg = "Worksheet named '0' not found" with pytest.raises(ValueError, match=msg): pd.read_excel(xl, "0")
def test_ts_frame(self, tsframe, path): df = tsframe # freq doesnt round-trip index = pd.DatetimeIndex(np.asarray(df.index), freq=None) df.index = index df.to_excel(path, "test1") reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(df, recons)
def check_excel_sheet_by_name_raise(self, ext): import xlrd pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext)) with ensure_clean(pth) as pth: gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(pth) xl = ExcelFile(pth) df = xl.parse(0) tm.assert_frame_equal(gt, df) self.assertRaises(xlrd.XLRDError, xl.parse, '0')
def test_float_types(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel_float_types__.' + ext for np_type in (np.float16, np.float32, np.float64): with ensure_clean(path) as path: # Test np.float values read come back as float. frame = DataFrame(np.random.random_sample(10), dtype=np_type) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np_type) tm.assert_frame_equal(frame, recons, check_dtype=False)
def test_bool_types(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel_bool_types__.' + ext for np_type in (np.bool8, np.bool_): with ensure_clean(path) as path: # Test np.bool values read come back as float. frame = (DataFrame([1, 0, True, False], dtype=np_type)) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np_type) tm.assert_frame_equal(frame, recons)
def test_excelwriter_contextmanager(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as pth: with ExcelWriter(pth) as writer: self.frame.to_excel(writer, 'Data1') self.frame2.to_excel(writer, 'Data2') with ExcelFile(pth) as reader: found_df = reader.parse('Data1') found_df2 = reader.parse('Data2') tm.assert_frame_equal(found_df, self.frame) tm.assert_frame_equal(found_df2, self.frame2)
def test_to_excel_float_format(self, engine, ext): df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=["A", "B"], columns=["X", "Y", "Z"]) df.to_excel(self.path, "test1", float_format="%.2f") reader = ExcelFile(self.path) result = pd.read_excel(reader, "test1", index_col=0) expected = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"]) tm.assert_frame_equal(result, expected)
def test_excel_writer_context_manager(self, frame, path): with ExcelWriter(path) as writer: frame.to_excel(writer, "Data1") frame2 = frame.copy() frame2.columns = frame.columns[::-1] frame2.to_excel(writer, "Data2") with ExcelFile(path) as reader: found_df = pd.read_excel(reader, "Data1", index_col=0) found_df2 = pd.read_excel(reader, "Data2", index_col=0) tm.assert_frame_equal(found_df, frame) tm.assert_frame_equal(found_df2, frame2)
def test_excel_sheet_by_name_raise(self, path): import xlrd gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(path) xl = ExcelFile(path) df = pd.read_excel(xl, 0, index_col=0) tm.assert_frame_equal(gt, df) with pytest.raises(xlrd.XLRDError): pd.read_excel(xl, "0")
def test_to_excel_multiindex(self, merge_cells, engine, ext, frame): arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index frame.to_excel(self.path, 'test1', header=False) frame.to_excel(self.path, 'test1', columns=['A', 'B']) # round trip frame.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) df = pd.read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df)
def test_to_excel_multiindex_dates(self, merge_cells, engine, ext, tsframe): # try multiindex with dates new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.index.names = ['time', 'foo'] tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) recons = pd.read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons) assert recons.index.names == ('time', 'foo')
def test_to_excel_multiindex(self, merge_cells, frame, path): arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index frame.to_excel(path, "test1", header=False) frame.to_excel(path, "test1", columns=["A", "B"]) # round trip frame.to_excel(path, "test1", merge_cells=merge_cells) reader = ExcelFile(path) df = pd.read_excel(reader, "test1", index_col=[0, 1]) tm.assert_frame_equal(frame, df)
def test_excel_passes_na(self, read_ext): excel = ExcelFile('test4' + read_ext) parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, na_values=['apple']) expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, na_values=['apple']) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) # 13967 excel = ExcelFile('test5' + read_ext) parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, na_values=['apple']) expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, na_values=['apple']) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected)
def test_excel_date_datetime_format(self, engine, ext): # see gh-4133 # # Excel output format strings df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], index=["DATE", "DATETIME"], columns=["X", "Y"]) df_expected = DataFrame([[datetime(2014, 1, 31), datetime(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], index=["DATE", "DATETIME"], columns=["X", "Y"]) with ensure_clean(ext) as filename2: writer1 = ExcelWriter(self.path) writer2 = ExcelWriter(filename2, date_format="DD.MM.YYYY", datetime_format="DD.MM.YYYY HH-MM-SS") df.to_excel(writer1, "test1") df.to_excel(writer2, "test1") writer1.close() writer2.close() reader1 = ExcelFile(self.path) reader2 = ExcelFile(filename2) rs1 = pd.read_excel(reader1, "test1", index_col=0) rs2 = pd.read_excel(reader2, "test1", index_col=0) tm.assert_frame_equal(rs1, rs2) # Since the reader returns a datetime object for dates, # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected)
def test_to_excel_interval_no_labels(self, path): # see gh-19242 # # Test writing Interval without labels. df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = df.copy() df["new"] = pd.cut(df[0], 10) expected["new"] = pd.cut(expected[0], 10).astype(str) df.to_excel(path, "test1") with ExcelFile(path) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(expected, recons)
def test_excel_sheet_by_name_raise(self, path): import xlrd gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(path) xl = ExcelFile(path) df = pd.read_excel(xl, sheet_name=0, index_col=0) tm.assert_frame_equal(gt, df) msg = "No sheet named <'0'>" with pytest.raises(xlrd.XLRDError, match=msg): pd.read_excel(xl, sheet_name="0")
def test_to_excel_periodindex(self): _skip_if_no_excelsuite() for ext in ['xls', 'xlsx']: path = '__tmp_to_excel_periodindex__.' + ext frame = self.tsframe xp = frame.resample('M', kind='period') with ensure_clean(path) as path: xp.to_excel(path, 'sht1') reader = ExcelFile(path) rs = reader.parse('sht1', index_col=0, parse_dates=True) tm.assert_frame_equal(xp, rs.to_period('M'))
def test_excel_file_warning_with_xlsx_file(datapath): # GH 29375 path = datapath("io", "data", "excel", "test1.xlsx") has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None if not has_openpyxl: with tm.assert_produces_warning( FutureWarning, raise_on_extra_warnings=False, match="The xlrd engine is no longer maintained", ): ExcelFile(path, engine=None) else: with tm.assert_produces_warning(None): pd.read_excel(path, "Sheet1", engine=None)
def test_roundtrip_indexlabels(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel_indexlabels__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test index_label frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label=['test', 'dummy', 'dummy2']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) frame.to_excel(path, 'test1', index_label='test') reader = ExcelFile(path) recons = reader.parse('test1', index_col=0).astype(np.int64) frame.index.names = ['test'] self.assertAlmostEqual(frame.index.names, recons.index.names)
def inputData(self): curPath = self.dir.currentPath() title = self._tr("OperatorFile", "打开文件") filt = self._tr("OperatorFile", "*.xlsx") fileName, flt = QFileDialog.getOpenFileName(self, title, curPath, filt) if fileName == "": return graphData = ExcelFile(fileName) sheetNames = graphData.sheet_names graphFrame = read_excel(fileName, sheetNames[0]) nodeFrame = read_excel(fileName, sheetNames[1]) edgeFrame = read_excel(fileName, sheetNames[2]) mode = int(graphFrame.iloc[0, 3]) graphName = str(graphFrame.iloc[0, 0]) nodeDataList = [] edgeDataList = [] for i in range(nodeFrame.shape[0]): data = [] for j in range(nodeFrame.shape[1]): if not isnan(nodeFrame.iloc[i, j]): data.append(int(nodeFrame.iloc[i, j])) else: QMessageBox.warning( self, QCoreApplication.translate("OperatorFile", "警告!"), QCoreApplication.translate("OperatorFile", "对不起,您的数据有误,系统无法识别!")) return None if len(data) >= 2: nodeDataList.append(data) else: QMessageBox.warning( self, QCoreApplication.translate("OperatorFile", "警告!"), QCoreApplication.translate( "OperatorFile", "顶点ID和权重为必须信息,您的信息不完整,请补充信息后,再次尝试。")) return None for i in range(edgeFrame.shape[0]): data = [] for j in range(edgeFrame.shape[1]): if not isnan(edgeFrame.iloc[i, j]): data.append(int(edgeFrame.iloc[i, j])) if len(data) >= 2: edgeDataList.append(data) return [graphName, mode, nodeDataList, edgeDataList]
def test_excelwriter_contextmanager(self): _skip_if_no_xlrd() ext = self.ext pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext)) with ensure_clean(pth) as pth: with ExcelWriter(pth) as writer: self.frame.to_excel(writer, 'Data1') self.frame2.to_excel(writer, 'Data2') with ExcelFile(pth) as reader: found_df = reader.parse('Data1') found_df2 = reader.parse('Data2') tm.assert_frame_equal(found_df, self.frame) tm.assert_frame_equal(found_df2, self.frame2)
def test_excel_table(self): _skip_if_no_xlrd() pth = os.path.join(self.dirpath, 'test.xls') xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xls.parse('Sheet1', index_col=0, parse_dates=True, skipfooter=1) df5 = xls.parse('Sheet1', index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5)
def test_excel_passes_na(self): _skip_if_no_xlrd() excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xlsx')) parsed = excel_data.parse('Sheet1', keep_default_na=False, na_values=['apple']) expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) parsed = excel_data.parse('Sheet1', keep_default_na=True, na_values=['apple']) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected)
def _check_excel_multiindex_dates(self, ext): path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext # try multiindex with dates tsframe = self.tsframe old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) with ensure_clean(path) as path: tsframe.to_excel(path, 'test1', index_label=['time', 'foo']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons, check_names=False) self.assertEquals(recons.index.names, ['time', 'foo']) # infer index tsframe.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(tsframe, recons) self.tsframe.index = old_index # needed if setUP becomes classmethod
def openExcel(self, fileName): graphData = ExcelFile(fileName) sheetNames = graphData.sheet_names graphFrame = read_excel(fileName, sheetNames[0]) nodeFrame = read_excel(fileName, sheetNames[1]) edgeFrame = read_excel(fileName, sheetNames[2]) textFrame = read_excel(fileName, sheetNames[3]) mode = int(graphFrame.iloc[0, 4]) graphName = str(graphFrame.iloc[0, 0]) nodeDataList = [] edgeDataList = [] textDataList = [] for i in range(nodeFrame.shape[0]): data = [] for j in range(nodeFrame.shape[1]): if not isnan(nodeFrame.iloc[i, j]): data.append(int(nodeFrame.iloc[i, j])) else: QMessageBox.warning( self, QCoreApplication.translate("OperatorFile", "警告!"), QCoreApplication.translate("OperatorFile", "对不起,您的数据有误,系统无法识别!")) return None if len(data) >= 2: nodeDataList.append(data) for i in range(edgeFrame.shape[0]): data = [] for j in range(edgeFrame.shape[1]): if not isnan(edgeFrame.iloc[i, j]): data.append(int(edgeFrame.iloc[i, j])) if len(data) >= 2: edgeDataList.append(data) for i in range(textFrame.shape[0]): data = [] for j in range(textFrame.shape[1]): if type(textFrame.iloc[i, j]) is not str: data.append(int(textFrame.iloc[i, j])) else: data.append(textFrame.iloc[i, j]) if len(data) >= 2: textDataList.append(data) return [graphName, mode, nodeDataList, edgeDataList, textDataList]