示例#1
0
文件: test_excel.py 项目: t1c1/pandas
    def test_excel_roundtrip_datetime(self):
        _skip_if_no_xlrd()

        # datetime.date, not sure what to test here exactly
        tsf = self.tsframe.copy()
        with ensure_clean(self.ext) as path:

            tsf.index = [x.date() for x in self.tsframe.index]
            tsf.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(self.tsframe, recons)
示例#2
0
文件: test_excel.py 项目: t1c1/pandas
    def test_to_excel_periodindex(self):
        _skip_if_no_xlrd()

        frame = self.tsframe
        xp = frame.resample('M', kind='period')

        with ensure_clean(self.ext) as path:
            xp.to_excel(path, 'sht1')

            reader = ExcelFile(path)
            rs = reader.parse('sht1', index_col=0, parse_dates=True)
            tm.assert_frame_equal(xp, rs.to_period('M'))
示例#3
0
    def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path):
        # try multiindex with dates
        new_index = [tsframe.index, np.arange(len(tsframe.index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        tsframe.index.names = ["time", "foo"]
        tsframe.to_excel(path, "test1", merge_cells=merge_cells)
        reader = ExcelFile(path)
        recons = pd.read_excel(reader, "test1", index_col=[0, 1])

        tm.assert_frame_equal(tsframe, recons)
        assert recons.index.names == ("time", "foo")
示例#4
0
文件: test_excel.py 项目: t1c1/pandas
    def test_excel_sheet_by_name_raise(self):
        _skip_if_no_xlrd()
        import xlrd

        with ensure_clean(self.ext) as pth:
            gt = DataFrame(np.random.randn(10, 2))
            gt.to_excel(pth)
            xl = ExcelFile(pth)
            df = xl.parse(0)
            tm.assert_frame_equal(gt, df)

            self.assertRaises(xlrd.XLRDError, xl.parse, '0')
示例#5
0
    def test_to_excel_empty_multiindex(self, path):
        # GH 19543.
        expected = DataFrame([], columns=[0, 1, 2])

        df = DataFrame([], index=MultiIndex.from_tuples([], names=[0, 1]), columns=[2])
        df.to_excel(path, "test1")

        with ExcelFile(path) as reader:
            result = pd.read_excel(reader, sheet_name="test1")
        tm.assert_frame_equal(
            result, expected, check_index_type=False, check_dtype=False
        )
示例#6
0
    def test_excel_sheet_by_name_raise(self, path):
        gt = DataFrame(np.random.randn(10, 2))
        gt.to_excel(path)

        with ExcelFile(path) as xl:
            df = pd.read_excel(xl, sheet_name=0, index_col=0)

        tm.assert_frame_equal(gt, df)

        msg = "Worksheet named '0' not found"
        with pytest.raises(ValueError, match=msg):
            pd.read_excel(xl, "0")
    def test_ts_frame(self, tsframe, path):
        df = tsframe

        # freq doesnt round-trip
        index = pd.DatetimeIndex(np.asarray(df.index), freq=None)
        df.index = index

        df.to_excel(path, "test1")
        reader = ExcelFile(path)

        recons = pd.read_excel(reader, "test1", index_col=0)
        tm.assert_frame_equal(df, recons)
示例#8
0
    def check_excel_sheet_by_name_raise(self, ext):
        import xlrd
        pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext))

        with ensure_clean(pth) as pth:
            gt = DataFrame(np.random.randn(10, 2))
            gt.to_excel(pth)
            xl = ExcelFile(pth)
            df = xl.parse(0)
            tm.assert_frame_equal(gt, df)

            self.assertRaises(xlrd.XLRDError, xl.parse, '0')
示例#9
0
    def test_float_types(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_float_types__.' + ext

        for np_type in (np.float16, np.float32, np.float64):
            with ensure_clean(path) as path:
                # Test np.float values read come back as float.
                frame = DataFrame(np.random.random_sample(10), dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons, check_dtype=False)
示例#10
0
    def test_bool_types(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_bool_types__.' + ext

        for np_type in (np.bool8, np.bool_):
            with ensure_clean(path) as path:
                # Test np.bool values read come back as float.
                frame = (DataFrame([1, 0, True, False], dtype=np_type))
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons)
示例#11
0
    def test_excelwriter_contextmanager(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as pth:
            with ExcelWriter(pth) as writer:
                self.frame.to_excel(writer, 'Data1')
                self.frame2.to_excel(writer, 'Data2')

            with ExcelFile(pth) as reader:
                found_df = reader.parse('Data1')
                found_df2 = reader.parse('Data2')
                tm.assert_frame_equal(found_df, self.frame)
                tm.assert_frame_equal(found_df2, self.frame2)
示例#12
0
    def test_to_excel_float_format(self, engine, ext):
        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=["A", "B"], columns=["X", "Y", "Z"])
        df.to_excel(self.path, "test1", float_format="%.2f")

        reader = ExcelFile(self.path)
        result = pd.read_excel(reader, "test1", index_col=0)

        expected = DataFrame([[0.12, 0.23, 0.57],
                              [12.32, 123123.20, 321321.20]],
                             index=["A", "B"], columns=["X", "Y", "Z"])
        tm.assert_frame_equal(result, expected)
示例#13
0
    def test_excel_writer_context_manager(self, frame, path):
        with ExcelWriter(path) as writer:
            frame.to_excel(writer, "Data1")
            frame2 = frame.copy()
            frame2.columns = frame.columns[::-1]
            frame2.to_excel(writer, "Data2")

        with ExcelFile(path) as reader:
            found_df = pd.read_excel(reader, "Data1", index_col=0)
            found_df2 = pd.read_excel(reader, "Data2", index_col=0)

            tm.assert_frame_equal(found_df, frame)
            tm.assert_frame_equal(found_df2, frame2)
示例#14
0
    def test_excel_sheet_by_name_raise(self, path):
        import xlrd

        gt = DataFrame(np.random.randn(10, 2))
        gt.to_excel(path)

        xl = ExcelFile(path)
        df = pd.read_excel(xl, 0, index_col=0)

        tm.assert_frame_equal(gt, df)

        with pytest.raises(xlrd.XLRDError):
            pd.read_excel(xl, "0")
示例#15
0
    def test_to_excel_multiindex(self, merge_cells, engine, ext, frame):
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        frame.to_excel(self.path, 'test1', header=False)
        frame.to_excel(self.path, 'test1', columns=['A', 'B'])

        # round trip
        frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        df = pd.read_excel(reader, 'test1', index_col=[0, 1])
        tm.assert_frame_equal(frame, df)
示例#16
0
    def test_to_excel_multiindex_dates(self, merge_cells, engine, ext,
                                       tsframe):
        # try multiindex with dates
        new_index = [tsframe.index, np.arange(len(tsframe.index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        tsframe.index.names = ['time', 'foo']
        tsframe.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        recons = pd.read_excel(reader, 'test1', index_col=[0, 1])

        tm.assert_frame_equal(tsframe, recons)
        assert recons.index.names == ('time', 'foo')
示例#17
0
    def test_to_excel_multiindex(self, merge_cells, frame, path):
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
        frame.index = new_index

        frame.to_excel(path, "test1", header=False)
        frame.to_excel(path, "test1", columns=["A", "B"])

        # round trip
        frame.to_excel(path, "test1", merge_cells=merge_cells)
        reader = ExcelFile(path)
        df = pd.read_excel(reader, "test1", index_col=[0, 1])
        tm.assert_frame_equal(frame, df)
示例#18
0
    def test_excel_passes_na(self, read_ext):

        excel = ExcelFile('test4' + read_ext)

        parsed = pd.read_excel(excel,
                               'Sheet1',
                               keep_default_na=False,
                               na_values=['apple'])
        expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        parsed = pd.read_excel(excel,
                               'Sheet1',
                               keep_default_na=True,
                               na_values=['apple'])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        # 13967
        excel = ExcelFile('test5' + read_ext)

        parsed = pd.read_excel(excel,
                               'Sheet1',
                               keep_default_na=False,
                               na_values=['apple'])
        expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        parsed = pd.read_excel(excel,
                               'Sheet1',
                               keep_default_na=True,
                               na_values=['apple'])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
示例#19
0
    def test_excel_date_datetime_format(self, engine, ext):
        # see gh-4133
        #
        # Excel output format strings
        df = DataFrame([[date(2014, 1, 31),
                         date(1999, 9, 24)],
                        [datetime(1998, 5, 26, 23, 33, 4),
                         datetime(2014, 2, 28, 13, 5, 13)]],
                       index=["DATE", "DATETIME"], columns=["X", "Y"])
        df_expected = DataFrame([[datetime(2014, 1, 31),
                                  datetime(1999, 9, 24)],
                                 [datetime(1998, 5, 26, 23, 33, 4),
                                  datetime(2014, 2, 28, 13, 5, 13)]],
                                index=["DATE", "DATETIME"], columns=["X", "Y"])

        with ensure_clean(ext) as filename2:
            writer1 = ExcelWriter(self.path)
            writer2 = ExcelWriter(filename2,
                                  date_format="DD.MM.YYYY",
                                  datetime_format="DD.MM.YYYY HH-MM-SS")

            df.to_excel(writer1, "test1")
            df.to_excel(writer2, "test1")

            writer1.close()
            writer2.close()

            reader1 = ExcelFile(self.path)
            reader2 = ExcelFile(filename2)

            rs1 = pd.read_excel(reader1, "test1", index_col=0)
            rs2 = pd.read_excel(reader2, "test1", index_col=0)

            tm.assert_frame_equal(rs1, rs2)

            # Since the reader returns a datetime object for dates,
            # we need to use df_expected to check the result.
            tm.assert_frame_equal(rs2, df_expected)
示例#20
0
    def test_to_excel_interval_no_labels(self, path):
        # see gh-19242
        #
        # Test writing Interval without labels.
        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64)
        expected = df.copy()

        df["new"] = pd.cut(df[0], 10)
        expected["new"] = pd.cut(expected[0], 10).astype(str)

        df.to_excel(path, "test1")
        with ExcelFile(path) as reader:
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
        tm.assert_frame_equal(expected, recons)
示例#21
0
    def test_excel_sheet_by_name_raise(self, path):
        import xlrd

        gt = DataFrame(np.random.randn(10, 2))
        gt.to_excel(path)

        xl = ExcelFile(path)
        df = pd.read_excel(xl, sheet_name=0, index_col=0)

        tm.assert_frame_equal(gt, df)

        msg = "No sheet named <'0'>"
        with pytest.raises(xlrd.XLRDError, match=msg):
            pd.read_excel(xl, sheet_name="0")
示例#22
0
    def test_to_excel_periodindex(self):
        _skip_if_no_excelsuite()

        for ext in ['xls', 'xlsx']:
            path = '__tmp_to_excel_periodindex__.' + ext
            frame = self.tsframe
            xp = frame.resample('M', kind='period')

            with ensure_clean(path) as path:
                xp.to_excel(path, 'sht1')

                reader = ExcelFile(path)
                rs = reader.parse('sht1', index_col=0, parse_dates=True)
                tm.assert_frame_equal(xp, rs.to_period('M'))
示例#23
0
def test_excel_file_warning_with_xlsx_file(datapath):
    # GH 29375
    path = datapath("io", "data", "excel", "test1.xlsx")
    has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None
    if not has_openpyxl:
        with tm.assert_produces_warning(
            FutureWarning,
            raise_on_extra_warnings=False,
            match="The xlrd engine is no longer maintained",
        ):
            ExcelFile(path, engine=None)
    else:
        with tm.assert_produces_warning(None):
            pd.read_excel(path, "Sheet1", engine=None)
示例#24
0
    def test_roundtrip_indexlabels(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_indexlabels__.' + ext

        with ensure_clean(path) as path:

            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test index_label
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label=['test'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path,
                           'test1',
                           index_label=['test', 'dummy', 'dummy2'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label='test')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertAlmostEqual(frame.index.names, recons.index.names)
示例#25
0
    def inputData(self):
        curPath = self.dir.currentPath()
        title = self._tr("OperatorFile", "打开文件")
        filt = self._tr("OperatorFile", "*.xlsx")
        fileName, flt = QFileDialog.getOpenFileName(self, title, curPath, filt)
        if fileName == "":
            return
        graphData = ExcelFile(fileName)
        sheetNames = graphData.sheet_names
        graphFrame = read_excel(fileName, sheetNames[0])
        nodeFrame = read_excel(fileName, sheetNames[1])
        edgeFrame = read_excel(fileName, sheetNames[2])

        mode = int(graphFrame.iloc[0, 3])
        graphName = str(graphFrame.iloc[0, 0])
        nodeDataList = []
        edgeDataList = []

        for i in range(nodeFrame.shape[0]):
            data = []
            for j in range(nodeFrame.shape[1]):
                if not isnan(nodeFrame.iloc[i, j]):
                    data.append(int(nodeFrame.iloc[i, j]))
                else:
                    QMessageBox.warning(
                        self,
                        QCoreApplication.translate("OperatorFile", "警告!"),
                        QCoreApplication.translate("OperatorFile",
                                                   "对不起,您的数据有误,系统无法识别!"))
                    return None

            if len(data) >= 2:
                nodeDataList.append(data)
            else:
                QMessageBox.warning(
                    self, QCoreApplication.translate("OperatorFile", "警告!"),
                    QCoreApplication.translate(
                        "OperatorFile", "顶点ID和权重为必须信息,您的信息不完整,请补充信息后,再次尝试。"))
                return None

        for i in range(edgeFrame.shape[0]):
            data = []
            for j in range(edgeFrame.shape[1]):
                if not isnan(edgeFrame.iloc[i, j]):
                    data.append(int(edgeFrame.iloc[i, j]))
            if len(data) >= 2:
                edgeDataList.append(data)

        return [graphName, mode, nodeDataList, edgeDataList]
示例#26
0
    def test_excelwriter_contextmanager(self):
        _skip_if_no_xlrd()
        ext = self.ext
        pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext))

        with ensure_clean(pth) as pth:
            with ExcelWriter(pth) as writer:
                self.frame.to_excel(writer, 'Data1')
                self.frame2.to_excel(writer, 'Data2')

            with ExcelFile(pth) as reader:
                found_df = reader.parse('Data1')
                found_df2 = reader.parse('Data2')
                tm.assert_frame_equal(found_df, self.frame)
                tm.assert_frame_equal(found_df2, self.frame2)
示例#27
0
    def test_excel_table(self):
        _skip_if_no_xlrd()

        pth = os.path.join(self.dirpath, 'test.xls')
        xls = ExcelFile(pth)
        df = xls.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2, check_names=False)
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xls.parse('Sheet1', index_col=0, parse_dates=True, skipfooter=1)
        df5 = xls.parse('Sheet1', index_col=0, parse_dates=True, skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)
示例#28
0
    def test_excel_passes_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xlsx'))
        parsed = excel_data.parse('Sheet1', keep_default_na=False,
                                  na_values=['apple'])
        expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        parsed = excel_data.parse('Sheet1', keep_default_na=True,
                                  na_values=['apple'])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
示例#29
0
    def _check_excel_multiindex_dates(self, ext):
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(path) as path:
            tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])

            tm.assert_frame_equal(tsframe, recons, check_names=False)
            self.assertEquals(recons.index.names, ['time', 'foo'])

            # infer index
            tsframe.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(tsframe, recons)

            self.tsframe.index = old_index  # needed if setUP becomes classmethod
示例#30
0
    def openExcel(self, fileName):
        graphData = ExcelFile(fileName)
        sheetNames = graphData.sheet_names
        graphFrame = read_excel(fileName, sheetNames[0])
        nodeFrame = read_excel(fileName, sheetNames[1])
        edgeFrame = read_excel(fileName, sheetNames[2])
        textFrame = read_excel(fileName, sheetNames[3])

        mode = int(graphFrame.iloc[0, 4])
        graphName = str(graphFrame.iloc[0, 0])
        nodeDataList = []
        edgeDataList = []
        textDataList = []

        for i in range(nodeFrame.shape[0]):
            data = []
            for j in range(nodeFrame.shape[1]):
                if not isnan(nodeFrame.iloc[i, j]):
                    data.append(int(nodeFrame.iloc[i, j]))
                else:
                    QMessageBox.warning(
                        self,
                        QCoreApplication.translate("OperatorFile", "警告!"),
                        QCoreApplication.translate("OperatorFile",
                                                   "对不起,您的数据有误,系统无法识别!"))
                    return None

            if len(data) >= 2:
                nodeDataList.append(data)

        for i in range(edgeFrame.shape[0]):
            data = []
            for j in range(edgeFrame.shape[1]):
                if not isnan(edgeFrame.iloc[i, j]):
                    data.append(int(edgeFrame.iloc[i, j]))
            if len(data) >= 2:
                edgeDataList.append(data)
        for i in range(textFrame.shape[0]):
            data = []
            for j in range(textFrame.shape[1]):
                if type(textFrame.iloc[i, j]) is not str:
                    data.append(int(textFrame.iloc[i, j]))
                else:
                    data.append(textFrame.iloc[i, j])
            if len(data) >= 2:
                textDataList.append(data)

        return [graphName, mode, nodeDataList, edgeDataList, textDataList]