示例#1
0
    def _check_extension_sheets(self, ext):
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        self.frame['A'][:5] = nan

        self.frame.to_excel(path,'test1')
        self.frame.to_excel(path,'test1', cols=['A', 'B'])
        self.frame.to_excel(path,'test1', header=False)
        self.frame.to_excel(path,'test1', index=False)

        # Test writing to separate sheets
        writer = ExcelWriter(path)
        self.frame.to_excel(writer,'test1')
        self.tsframe.to_excel(writer,'test2')
        writer.save()
        reader = ExcelFile(path)
        recons = reader.parse('test1',index_col=0)
        tm.assert_frame_equal(self.frame, recons)
        recons = reader.parse('test2',index_col=0)
        tm.assert_frame_equal(self.tsframe, recons)
        np.testing.assert_equal(2, len(reader.sheet_names))
        np.testing.assert_equal('test1', reader.sheet_names[0])
        np.testing.assert_equal('test2', reader.sheet_names[1])

        os.remove(path)
    def _check_extension(self, ext):
        path = '__tmp_to_excel_from_excel__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test roundtrip
            self.frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            
            self.frame.to_excel(path, 'test1', index=False)
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=None)
            recons.index = self.frame.index
            tm.assert_frame_equal(self.frame, recons)
            
            self.frame.to_excel(path, 'test1', na_rep='NA')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0, na_values=['NA'])
            tm.assert_frame_equal(self.frame, recons)
示例#3
0
    def _check_extension_sheets(self, ext):
        path = "__tmp_to_excel_from_excel_sheets__." + ext

        self.frame["A"][:5] = nan

        self.frame.to_excel(path, "test1")
        self.frame.to_excel(path, "test1", cols=["A", "B"])
        self.frame.to_excel(path, "test1", header=False)
        self.frame.to_excel(path, "test1", index=False)

        # Test writing to separate sheets
        writer = ExcelWriter(path)
        self.frame.to_excel(writer, "test1")
        self.tsframe.to_excel(writer, "test2")
        writer.save()
        reader = ExcelFile(path)
        recons = reader.parse("test1", index_col=0)
        tm.assert_frame_equal(self.frame, recons)
        recons = reader.parse("test2", index_col=0)
        tm.assert_frame_equal(self.tsframe, recons)
        np.testing.assert_equal(2, len(reader.sheet_names))
        np.testing.assert_equal("test1", reader.sheet_names[0])
        np.testing.assert_equal("test2", reader.sheet_names[1])

        os.remove(path)
示例#4
0
    def _check_excel_multiindex_dates(self, ext):
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=[0, 1])

        tm.assert_frame_equal(tsframe, recons, check_names=False)
        self.assertEquals(recons.index.names, ['time', 'foo'])

        # infer index
        tsframe.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1')
        tm.assert_frame_equal(tsframe, recons)

        self.tsframe.index = old_index  # needed if setUP becomes classmethod

        os.remove(path)
示例#5
0
    def _check_extension(self, ext):
        path = "__tmp_to_excel_from_excel__." + ext

        self.frame["A"][:5] = nan

        self.frame.to_excel(path, "test1")
        self.frame.to_excel(path, "test1", cols=["A", "B"])
        self.frame.to_excel(path, "test1", header=False)
        self.frame.to_excel(path, "test1", index=False)

        # test roundtrip
        self.frame.to_excel(path, "test1")
        reader = ExcelFile(path)
        recons = reader.parse("test1", index_col=0)
        tm.assert_frame_equal(self.frame, recons)

        self.frame.to_excel(path, "test1", index=False)
        reader = ExcelFile(path)
        recons = reader.parse("test1", index_col=None)
        recons.index = self.frame.index
        tm.assert_frame_equal(self.frame, recons)

        self.frame.to_excel(path, "test1", na_rep="NA")
        reader = ExcelFile(path)
        recons = reader.parse("test1", index_col=0, na_values=["NA"])
        tm.assert_frame_equal(self.frame, recons)

        os.remove(path)
示例#6
0
 def test_excel_table(self):
     pth = os.path.join(self.dirpath, 'test.xls')
     xls = ExcelFile(pth)
     df = xls.parse('Sheet1')
     df2 = read_csv(self.csv1)
     df3 = xls.parse('Sheet2', skiprows=[1])
     assert_frame_equal(df, df2)
     assert_frame_equal(df3, df2)
示例#7
0
    def test_xlsx_table(self):
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        assert_frame_equal(df, df2)
        assert_frame_equal(df3, df2)
示例#8
0
    def test_xlsx_table(self):
        try:
            import openpyxl
        except ImportError:
            raise nose.SkipTest('openpyxl not installed, skipping')

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        assert_frame_equal(df, df2)
        assert_frame_equal(df3, df2)
示例#9
0
    def test_excel_table(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest("xlrd not installed, skipping")

        pth = os.path.join(self.dirpath, "test.xls")
        xls = ExcelFile(pth)
        df = xls.parse("Sheet1", index_col=0, parse_dates=True)
        df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True)
        assert_frame_equal(df, df2)
        assert_frame_equal(df3, df2)
示例#10
0
    def test_excel_read_buffer(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xls')
        f = open(pth, 'rb')
        xls = ExcelFile(f)
        # it works
        xls.parse('Sheet1', index_col=0, parse_dates=True)

        pth = os.path.join(self.dirpath, 'test.xlsx')
        f = open(pth, 'rb')
        xl = ExcelFile(f)
        df = xl.parse('Sheet1', index_col=0, parse_dates=True)
    def _check_extension_indexlabels(self, ext):
        path = '__tmp_to_excel_from_excel_indexlabels__.' + ext

        with ensure_clean(path) as path:

            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test index_label
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label=['test'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(
                path, 'test1', index_label=['test', 'dummy', 'dummy2'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label='test')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

        # test index_labels in same row as column names
        path = '%s.xls' % tm.rands(10)

        with ensure_clean(path) as path:

            self.frame.to_excel(path, 'test1',
                                cols=['A', 'B', 'C', 'D'], index=False)
            # take 'A' and 'B' as indexes (they are in same row as cols 'C',
            # 'D')
            df = self.frame.copy()
            df = df.set_index(['A', 'B'])

            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])
            tm.assert_frame_equal(df, recons)
示例#12
0
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ["", "x"]

        for s in suffix:
            pth = os.path.join(self.dirpath, "test.xls%s" % s)
            xls = ExcelFile(pth)
            df = xls.parse("Sheet1", index_col=0, parse_dates=True, parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=["A", "B", "C"])
            df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True, parse_cols=3)
            tm.assert_frame_equal(df, df2)
            tm.assert_frame_equal(df3, df2)
示例#13
0
    def test_excel_table(self):
        _skip_if_no_xlrd()

        pth = os.path.join(self.dirpath, "test.xls")
        xls = ExcelFile(pth)
        df = xls.parse("Sheet1", index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2, check_names=False)
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xls.parse("Sheet1", index_col=0, parse_dates=True, skipfooter=1)
        df5 = xls.parse("Sheet1", index_col=0, parse_dates=True, skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)
示例#14
0
    def test_xlsx_table(self):
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, "test.xlsx")
        xlsx = ExcelFile(pth)
        df = xlsx.parse("Sheet1", index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2)
        tm.assert_frame_equal(df3, df2)

        df4 = xlsx.parse("Sheet1", index_col=0, parse_dates=True, skipfooter=1)
        df5 = xlsx.parse("Sheet1", index_col=0, parse_dates=True, skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)
示例#15
0
    def _check_extension_indexlabels(self, ext):
        path = "__tmp_to_excel_from_excel_indexlabels__." + ext
        try:
            self.frame["A"][:5] = nan

            self.frame.to_excel(path, "test1")
            self.frame.to_excel(path, "test1", cols=["A", "B"])
            self.frame.to_excel(path, "test1", header=False)
            self.frame.to_excel(path, "test1", index=False)

            # test index_label
            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label=["test"])
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)

            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label=["test", "dummy", "dummy2"])
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)

            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label="test")
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)
        finally:
            os.remove(path)

        # test index_labels in same row as column names
        path = "%s.xls" % tm.rands(10)
        try:
            self.frame.to_excel(path, "test1", cols=["A", "B", "C", "D"], index=False)
            # take 'A' and 'B' as indexes (they are in same row as cols 'C',
            # 'D')
            df = self.frame.copy()
            df = df.set_index(["A", "B"])

            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=[0, 1])
            tm.assert_frame_equal(df, recons)
        finally:
            os.remove(path)
示例#16
0
文件: xlutil.py 项目: fagan2888/pyxl
def getExcelChunck(file, ws, drange, rid=-1, cid=-1):
    xls = ExcelFile(file)
    df = xls.parse(ws)

    # get the range from expression
    # B4H4
    m = re.search('([A-Z]+)([0-9]+)([A-Z]+)([0-9]+)', drange)
    c1 = sord(m.group(1))
    c2 = sord(m.group(3)) + 1
    r1 = int(m.group(2)) - 2
    r2 = int(m.group(4)) - 2

    df2 = df.ix[r1:r2, c1:c2]

    if (rid >= 0):
        rh = int(rid) - 2
        df2.columns = df.ix[rh, c1:c2]
        df2.columns = df2.columns.map(
            lambda x: str(x).strip().replace('.0', ''))

    if (cid >= 0):
        ch = sord(cid)
        df2.index = df.ix[r1:r2, ch]
        df2.index = df2.index.map(lambda x: str(x).strip().replace('.0', ''))
    return (df2)
示例#17
0
    def test_excel_cell_error_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
示例#18
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ['xls', 'xlsx']:
            filename = u'\u0192u.' + ext

            try:
                f = open(filename, 'wb')
            except UnicodeEncodeError:
                raise nose.SkipTest('no unicode file names on this system')
            else:
                f.close()

            df = DataFrame([[0.123456, 0.234567, 0.567567],
                            [12.32112, 123123.2, 321321.2]],
                           index=['A', 'B'],
                           columns=['X', 'Y', 'Z'])
            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
                           index=['A', 'B'],
                           columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
            os.remove(filename)
示例#19
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
        tm.assert_frame_equal(parsed, expected)
示例#20
0
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()

        suffix = ['', 'x']

        for s in suffix:
            pth = os.path.join(self.dirpath, 'test.xls%s' % s)
            xls = ExcelFile(pth)
            df = xls.parse('Sheet1', index_col=0, parse_dates=True,
                            parse_cols=3)
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
                            parse_dates=True, parse_cols=3)
            assert_frame_equal(df, df2)
            assert_frame_equal(df3, df2)
示例#21
0
    def test_excel_cell_error_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, "test3.xls"))
        parsed = excel_data.parse("Sheet1")
        expected = DataFrame([[np.nan]], columns=["Test"])
        tm.assert_frame_equal(parsed, expected)
示例#22
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ["xls", "xlsx"]:
            filename = u"\u0192u." + ext

            try:
                f = open(filename, "wb")
            except UnicodeEncodeError:
                raise nose.SkipTest("no unicode file names on this system")
            else:
                f.close()

            df = DataFrame(
                [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
                index=["A", "B"],
                columns=["X", "Y", "Z"],
            )
            df.to_excel(filename, "test1", float_format="%.2f")

            reader = ExcelFile(filename)
            rs = reader.parse("test1", index_col=None)
            xp = DataFrame(
                [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"]
            )
            tm.assert_frame_equal(rs, xp)
            os.remove(filename)
示例#23
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1'])
        assert_frame_equal(parsed, expected)
示例#24
0
    def test_excel_cell_error_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        assert_frame_equal(parsed, expected)
示例#25
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls"))
        parsed = excel_data.parse("Sheet1")
        expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"])
        tm.assert_frame_equal(parsed, expected)
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ['xls', 'xlsx']:
            filename = '\u0192u.' + ext

            try:
                f = open(filename, 'wb')
            except UnicodeEncodeError:
                raise nose.SkipTest('no unicode file names on this system')
            else:
                f.close()

            df = DataFrame([[0.123456, 0.234567, 0.567567],
                            [12.32112, 123123.2, 321321.2]],
                           index=['A', 'B'], columns=['X', 'Y', 'Z'])

            with ensure_clean(filename) as filename:
                df.to_excel(filename, 'test1', float_format='%.2f')

                reader = ExcelFile(filename)
                rs = reader.parse('test1', index_col=None)
                xp = DataFrame([[0.12, 0.23, 0.57],
                                [12.32, 123123.20, 321321.20]],
                               index=['A', 'B'], columns=['X', 'Y', 'Z'])
                tm.assert_frame_equal(rs, xp)
    def _check_extension_mixed(self, ext):
        path = '__tmp_to_excel_from_excel_mixed__.' + ext

        with ensure_clean(path) as path:
            self.mixed_frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.mixed_frame, recons)
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['', 'x']

        for s in suffix:
            pth = os.path.join(self.dirpath, 'test.xls%s' % s)
            xls = ExcelFile(pth)
            df = xls.parse('Sheet1', index_col=0, parse_dates=True,
                           parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
                            parse_dates=True, parse_cols=3)
            tm.assert_frame_equal(df, df2, check_names=False)  # TODO add index to xls file)
            tm.assert_frame_equal(df3, df2, check_names=False)
    def test_excel_table(self):
        _skip_if_no_xlrd()

        pth = os.path.join(self.dirpath, 'test.xls')
        xls = ExcelFile(pth)
        df = xls.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2, check_names=False)
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
                        skipfooter=1)
        df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
                        skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)
    def test_xlsx_table(self):
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)

        tm.assert_frame_equal(df, df2, check_names=False)  # TODO add index to xlsx file
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
                         skipfooter=1)
        df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
                         skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)
示例#31
0
    def _check_extension_mixed(self, ext):
        path = '__tmp_to_excel_from_excel_mixed__.' + ext

        self.mixed_frame.to_excel(path,'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=0, has_index_names=True)
        tm.assert_frame_equal(self.mixed_frame, recons)

        os.remove(path)
示例#32
0
    def test_xlsx_table(self):
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2)
        tm.assert_frame_equal(df3, df2)

        df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True, skipfooter=1)
        df5 = xlsx.parse('Sheet1',
                         index_col=0,
                         parse_dates=True,
                         skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)
示例#33
0
    def _check_extension_mixed(self, ext):
        path = '__tmp_to_excel_from_excel_mixed__.' + ext

        self.mixed_frame.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=0, has_index_names=True)
        tm.assert_frame_equal(self.mixed_frame, recons)

        os.remove(path)
示例#34
0
    def _check_extension_mixed(self, ext):
        path = "__tmp_to_excel_from_excel_mixed__." + ext

        self.mixed_frame.to_excel(path, "test1")
        reader = ExcelFile(path)
        recons = reader.parse("test1", index_col=0)
        tm.assert_frame_equal(self.mixed_frame, recons)

        os.remove(path)
    def test_parse_cols_list(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['', 'x']

        for s in suffix:
            pth = os.path.join(self.dirpath, 'test.xls%s' % s)
            xls = ExcelFile(pth)
            df = xls.parse('Sheet1', index_col=0, parse_dates=True,
                           parse_cols=[0, 2, 3])
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['B', 'C'])
            df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
                            parse_dates=True,
                            parse_cols=[0, 2, 3])
            tm.assert_frame_equal(df, df2, check_names=False)  # TODO add index to xls file
            tm.assert_frame_equal(df3, df2, check_names=False)
示例#36
0
    def test_excel_cell_error_na(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest('xlrd not installed, skipping')

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        assert_frame_equal(parsed, expected)
示例#37
0
    def test_excel_stop_iterator(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest("xlrd not installed, skipping")

        excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls"))
        parsed = excel_data.parse("Sheet1")
        expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"])
        assert_frame_equal(parsed, expected)
    def _check_extension_tsframe(self, ext):
        path = '__tmp_to_excel_from_excel_tsframe__.' + ext

        df = tm.makeTimeDataFrame()[:5]

        with ensure_clean(path) as path:
            df.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(df, recons)
示例#39
0
    def test_excel_stop_iterator(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest('xlrd not installed, skipping')

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa','bbbbb']], columns=['Test', 'Test1'])
        assert_frame_equal(parsed, expected)
示例#40
0
    def test_excel_stop_iterator(self):
        try:
            import xlrd
        except ImportError:
            raise nose.SkipTest('xlrd not installed, skipping')

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
        assert_frame_equal(parsed, expected)
示例#41
0
    def test_excel_roundtrip_bool(self):
        _skip_if_no_openpyxl()

        # Test roundtrip np.bool8, does not seem to work for xls
        path = '__tmp_excel_roundtrip_bool__.xlsx'
        frame = (DataFrame(np.random.randn(10, 2)) >= 0)
        frame.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1')
        tm.assert_frame_equal(frame, recons)
        os.remove(path)
示例#42
0
    def test_to_excel_periodindex(self):
        _skip_if_no_excelsuite()
        for ext in ['xls', 'xlsx']:
            path = '__tmp_to_excel_periodindex__.' + ext
            frame = self.tsframe
            xp = frame.resample('M', kind='period')
            xp.to_excel(path, 'sht1')

            reader = ExcelFile(path)
            rs = reader.parse('sht1', index_col=0, parse_dates=True)
            tm.assert_frame_equal(xp, rs.to_period('M'))
            os.remove(path)
示例#43
0
 def test_excel_roundtrip_datetime(self):
     _skip_if_no_xlrd()
     _skip_if_no_xlwt()
     # datetime.date, not sure what to test here exactly
     path = '__tmp_excel_roundtrip_datetime__.xls'
     tsf = self.tsframe.copy()
     tsf.index = [x.date() for x in self.tsframe.index]
     tsf.to_excel(path, 'test1')
     reader = ExcelFile(path)
     recons = reader.parse('test1')
     tm.assert_frame_equal(self.tsframe, recons)
     os.remove(path)
示例#44
0
    def _check_excel_multiindex_dates(self, ext):
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=[0, 1])
        tm.assert_frame_equal(tsframe, recons)

        # infer index
        tsframe.to_excel(path, 'test1')
        reader = ExcelFile(path)
        recons = reader.parse('test1')
        tm.assert_frame_equal(tsframe, recons)

        self.tsframe.index = old_index  # needed if setUP becomes classmethod

        os.remove(path)
示例#45
0
    def test_to_excel(self):
        try:
            import xlwt
            import xlrd
            import openpyxl
        except ImportError:
            raise nose.SkipTest

        path = '__tmp__.xlsx'
        self.panel.to_excel(path)
        reader = ExcelFile(path)
        for item, df in self.panel.iteritems():
            recdf = reader.parse(str(item),index_col=0)
            assert_frame_equal(df, recdf)
    def _check_extension_sheets(self, ext):
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)
            
            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = reader.parse('test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1])
示例#47
0
    def test_parse_cols_str(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['', 'x']

        for s in suffix:

            pth = os.path.join(self.dirpath, 'test.xls%s' % s)
            xls = ExcelFile(pth)

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A:D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A:D')
            tm.assert_frame_equal(df, df2)
            tm.assert_frame_equal(df3, df2)
            del df, df2, df3

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A,C,D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A,C,D')
            tm.assert_frame_equal(df, df2)
            tm.assert_frame_equal(df3, df2)
            del df, df2, df3

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A,C:D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A,C:D')
            tm.assert_frame_equal(df, df2)
            tm.assert_frame_equal(df3, df2)
示例#48
0
    def test_to_excel_float_format(self):
        _skip_if_no_excelsuite()
        for ext in ['xls', 'xlsx']:
            filename = '__tmp_to_excel_float_format__.' + ext
            df = DataFrame([[0.123456, 0.234567, 0.567567],
                            [12.32112, 123123.2, 321321.2]],
                           index=['A', 'B'],
                           columns=['X', 'Y', 'Z'])
            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
                           index=['A', 'B'],
                           columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
            os.remove(filename)
    def _check_extension_int64(self, ext):
        path = '__tmp_to_excel_from_excel_int64__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)
            
            # Test np.int64, values read come back as float
            frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np.int64)
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1').astype(np.int64)
            tm.assert_frame_equal(frame, recons, check_dtype=False)
    def test_excel_roundtrip_indexname(self):
        _skip_if_no_xlrd()
        _skip_if_no_xlwt()

        path = '%s.xls' % tm.rands(10)

        df = DataFrame(np.random.randn(10, 4))
        df.index.name = 'foo'

        with ensure_clean(path) as path:
            df.to_excel(path)

            xf = ExcelFile(path)
            result = xf.parse(xf.sheet_names[0], index_col=0)
            
            tm.assert_frame_equal(result, df)
            self.assertEqual(result.index.name, 'foo')
    def _check_extension_bool(self, ext):
        path = '__tmp_to_excel_from_excel_bool__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)
            
            # Test reading/writing np.bool8, roundtrip only works for xlsx
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1').astype(np.bool8)
            tm.assert_frame_equal(frame, recons)
示例#52
0
    def test_to_excel(self):
        try:
            import os
            import xlwt
            import xlrd
            import openpyxl
            from pandas.io.parsers import ExcelFile
        except ImportError:
            raise nose.SkipTest

        for ext in ['xls', 'xlsx']:
            path = '__tmp__.' + ext
            self.panel.to_excel(path)
            reader = ExcelFile(path)
            for item, df in self.panel.iteritems():
                recdf = reader.parse(str(item), index_col=0)
                assert_frame_equal(df, recdf)
            os.remove(path)
    def _check_extension_colaliases(self, ext):
        path = '__tmp_to_excel_from_excel_aliases__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)
            
            # column aliases
            col_aliases = Index(['AA', 'X', 'Y', 'Z'])
            self.frame2.to_excel(path, 'test1', header=col_aliases)
            reader = ExcelFile(path)
            rs = reader.parse('test1', index_col=0)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            tm.assert_frame_equal(xp, rs)
示例#54
0
    def _check_excel_multiindex(self, ext):
        path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext

        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index
        frame.to_excel(path, 'test1', header=False)
        frame.to_excel(path, 'test1', cols=['A', 'B'])

        # round trip
        frame.to_excel(path, 'test1')
        reader = ExcelFile(path)
        df = reader.parse('test1', index_col=[0, 1], parse_dates=False)
        tm.assert_frame_equal(frame, df)
        self.assertEqual(frame.index.names, df.index.names)
        self.frame.index = old_index  # needed if setUP becomes a classmethod

        os.remove(path)
示例#55
0
    def test_excel_roundtrip_indexname(self):
        _skip_if_no_xlrd()
        _skip_if_no_xlwt()

        path = '%s.xls' % tm.rands(10)

        df = DataFrame(np.random.randn(10, 4))
        df.index.name = 'foo'

        df.to_excel(path)

        xf = ExcelFile(path)
        result = xf.parse(xf.sheet_names[0], index_col=0)

        tm.assert_frame_equal(result, df)
        self.assertEqual(result.index.name, 'foo')

        try:
            os.remove(path)
        except os.error:
            pass
示例#56
0
"""
Origin: QE by John Stachurski and Thomas J. Sargent
Filename: wb_download.py
Authors: John Stachurski, Tomohito Okabe
LastModified: 29/08/2013

Dowloads data from the World Bank site on GDP per capita and plots result for
a subset of countries.
"""
import pandas as pd
import matplotlib.pyplot as plt
from pandas.io.parsers import ExcelFile
import urllib

# == Get data and read into file gd.xls == #
wb_data_file_dir = "http://api.worldbank.org/datafiles/"
file_name = "GC.DOD.TOTL.GD.ZS_Indicator_MetaData_en_EXCEL.xls"
url = wb_data_file_dir + file_name
urllib.urlretrieve(url, "gd.xls")

# == Parse data into a DataFrame == #
gov_debt_xls = ExcelFile('gd.xls')
govt_debt = gov_debt_xls.parse('Sheet1', index_col=1, na_values=['NA'])

# == Take desired values and plot == #
govt_debt = govt_debt.transpose()
govt_debt = govt_debt[['AUS', 'DEU', 'FRA', 'USA']]
govt_debt = govt_debt[36:]
govt_debt.plot(lw=2)
plt.show()
示例#57
0
'''

from BoilerPlate import *

## get unique labels as done in sklearn.metrics super annoytng
def unique_labels(*lists_of_labels):
    """Extract an ordered array of unique labels"""
    labels = set().union(*(l.ravel() if hasattr(l, "ravel") else l for l in lists_of_labels))
    return np.asarray(sorted(labels))

# Import data
rawdata = pd.read_csv('~')

from pandas.io.parsers import  ExcelFile
xls = ExcelFile('~')
rawdata = xls.parse('~', index_col=None, na_values=['NA'])

#drop na actual text rows
rawdata = rawdata.dropna(subset=['Actual text'])

# Extract features
text = rawdata['Actual text']

#Make large word features
#uses sklearn CountVectoriser/bag of words
#This is fitting vocab
from sklearn.feature_extraction.text import CountVectorizer
vectoriser_training = CountVectorizer(min_df=1,stop_words='english',strip_accents='unicode')
t = time.time()
features = vectoriser_training.fit_transform(text) 
print "training text to word vector took", time.time()-t, "seconds"