示例#1
0
    def test_excel_table_sheet_by_index(self, read_ext, df_ref):

        excel = ExcelFile('test1' + read_ext)

        df1 = pd.read_excel(excel, 0, index_col=0)
        df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0)
        tm.assert_frame_equal(df1, df_ref, check_names=False)
        tm.assert_frame_equal(df2, df_ref, check_names=False)

        df1 = excel.parse(0, index_col=0)
        df2 = excel.parse(1, skiprows=[1], index_col=0)
        tm.assert_frame_equal(df1, df_ref, check_names=False)
        tm.assert_frame_equal(df2, df_ref, check_names=False)

        df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1)
        tm.assert_frame_equal(df3, df1.iloc[:-1])

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1)
            tm.assert_frame_equal(df3, df4)

        df3 = excel.parse(0, index_col=0, skipfooter=1)
        tm.assert_frame_equal(df3, df1.iloc[:-1])

        import xlrd  # will move to engine-specific tests as new ones are added
        with pytest.raises(xlrd.XLRDError):
            pd.read_excel(excel, 'asdf')
示例#2
0
    def test_sheets(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = reader.parse('test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1])
示例#3
0
    def test_roundtrip_indexlabels(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:

            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test index_label
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label=['test'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(
                path, 'test1', index_label=['test', 'dummy', 'dummy2'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label='test')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertAlmostEqual(frame.index.names, recons.index.names)
示例#4
0
    def test_to_excel_multiindex_dates(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(path) as path:
            tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])

            tm.assert_frame_equal(tsframe, recons, check_names=False)
            self.assertEquals(recons.index.names, ('time', 'foo'))

            # infer index
            tsframe.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(tsframe, recons)

            self.tsframe.index = old_index  # needed if setUP becomes classmethod
示例#5
0
class ExcelExtractor(Extractor):
    '''
    An extractor for excel files. 
    One sheet only for now. 
    Expects column names in first row, rest of rows mapped 1:1 to incoming table rows.
    Unique identifier (or unique for domain) in first column.
    '''
    
    def __init__(self, incoming_table_class, file_name):
        '''
        Constructor
        '''
        self._incoming_table_class = incoming_table_class
        self.file_name = file_name
        
        self.workbook = ExcelFile(os.path.join(conf.INPUT_DIR, file_name))
        
        super(ExcelExtractor, self).__init__(self._incoming_table_class)
        
    def _get_workbook_rowdicts(self):
        '''
        returns list of key-value dicts for all rows in sheet, with keys in first row. empty values are removed.
        '''
        rows = self.workbook.parse().to_dict(outtype='records')
        rows_ret = list()
        for row in rows:
            ret = dict((k, v) for k, v in row.iteritems() if notnull(v))
            rows_ret.append(ret)
        return rows_ret
        
    def _get_workbook_keys(self):
        '''
        returns list of key-value dicts from keys in first row
        '''
        return self.workbook.parse().to_dict().keys()
        
    
    def do_extract(self):
        
        db_col_keys = [k for k in self._get_workbook_keys() if k in self._get_db_cols]
        hstore_keys = [h for h in self._get_workbook_keys() if h not in self._get_db_cols]
        
        for row in self._get_workbook_rowdicts():
            db_col_dict = dict((k, v) for k, v in row.iteritems() if k in db_col_keys)
            hstore_col_dict = dict((k, unicode(v)) for k, v in row.iteritems() if k in hstore_keys)
            
            insert_dict = db_col_dict
            insert_dict[self._get_hstore_db_col] = hstore_col_dict

            self._incoming_table_class.create(**insert_dict)
            
    def do_cleanup(self):
        delete_q = self._incoming_table_class.delete()
        rows = delete_q.execute()
        logger.info('Deleted %d records in incoming data table %s' % (rows, self._incoming_table_class._meta.db_table))
示例#6
0
    def test_sheet_name(self, read_ext, df_ref):
        filename = "test1"
        sheet_name = "Sheet1"

        excel = ExcelFile(filename + read_ext)
        df1_parse = excel.parse(sheet_name=sheet_name, index_col=0)  # doc
        df2_parse = excel.parse(index_col=0,
                                sheet_name=sheet_name)

        tm.assert_frame_equal(df1_parse, df_ref, check_names=False)
        tm.assert_frame_equal(df2_parse, df_ref, check_names=False)
示例#7
0
    def test_excel_passes_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xlsx"))
        parsed = excel_data.parse("Sheet1", keep_default_na=False, na_values=["apple"])
        expected = DataFrame([["NA"], [1], ["NA"], [np.nan], ["rabbit"]], columns=["Test"])
        tm.assert_frame_equal(parsed, expected)

        parsed = excel_data.parse("Sheet1", keep_default_na=True, na_values=["apple"])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"])
        tm.assert_frame_equal(parsed, expected)
示例#8
0
    def test_roundtrip_indexlabels(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_indexlabels__.' + ext

        with ensure_clean(path) as path:

            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test index_label
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label=['test'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(
                path, 'test1', index_label=['test', 'dummy', 'dummy2'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label='test')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

        # test index_labels in same row as column names
        path = '%s.%s' % (tm.rands(10), ext)

        with ensure_clean(path) as path:

            self.frame.to_excel(path, 'test1',
                                cols=['A', 'B', 'C', 'D'], index=False)
            # take 'A' and 'B' as indexes (they are in same row as cols 'C',
            # 'D')
            df = self.frame.copy()
            df = df.set_index(['A', 'B'])

            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])
            tm.assert_frame_equal(df, recons)
示例#9
0
    def test_excel_read_buffer(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, "test.xls")
        f = open(pth, "rb")
        xls = ExcelFile(f)
        # it works
        xls.parse("Sheet1", index_col=0, parse_dates=True)

        pth = os.path.join(self.dirpath, "test.xlsx")
        f = open(pth, "rb")
        xl = ExcelFile(f)
        df = xl.parse("Sheet1", index_col=0, parse_dates=True)
示例#10
0
    def test_excel_read_buffer(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xls')
        f = open(pth, 'rb')
        xls = ExcelFile(f)
        # it works
        xls.parse('Sheet1', index_col=0, parse_dates=True)

        pth = os.path.join(self.dirpath, 'test.xlsx')
        f = open(pth, 'rb')
        xl = ExcelFile(f)
        xl.parse('Sheet1', index_col=0, parse_dates=True)
示例#11
0
    def test_excel_passes_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xlsx'))
        parsed = excel_data.parse('Sheet1', keep_default_na=False,
                                  na_values=['apple'])
        expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)

        parsed = excel_data.parse('Sheet1', keep_default_na=True,
                                  na_values=['apple'])
        expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']],
                             columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
示例#12
0
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ["", "x"]

        for s in suffix:
            pth = os.path.join(self.dirpath, "test.xls%s" % s)
            xls = ExcelFile(pth)
            df = xls.parse("Sheet1", index_col=0, parse_dates=True, parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=["A", "B", "C"])
            df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True, parse_cols=3)
            tm.assert_frame_equal(df, df2, check_names=False)  # TODO add index to xls file)
            tm.assert_frame_equal(df3, df2, check_names=False)
示例#13
0
    def test_excel_table(self):
        _skip_if_no_xlrd()

        pth = os.path.join(self.dirpath, "test.xls")
        xls = ExcelFile(pth)
        df = xls.parse("Sheet1", index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xls.parse("Sheet2", skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2, check_names=False)
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xls.parse("Sheet1", index_col=0, parse_dates=True, skipfooter=1)
        df5 = xls.parse("Sheet1", index_col=0, parse_dates=True, skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)
示例#14
0
    def _check_extension_indexlabels(self, ext):
        path = "__tmp_to_excel_from_excel_indexlabels__." + ext

        with ensure_clean(path) as path:

            self.frame["A"][:5] = nan

            self.frame.to_excel(path, "test1")
            self.frame.to_excel(path, "test1", cols=["A", "B"])
            self.frame.to_excel(path, "test1", header=False)
            self.frame.to_excel(path, "test1", index=False)

            # test index_label
            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label=["test"])
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)

            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label=["test", "dummy", "dummy2"])
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)

            frame = DataFrame(np.random.randn(10, 2)) >= 0
            frame.to_excel(path, "test1", index_label="test")
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0).astype(np.int64)
            frame.index.names = ["test"]
            self.assertEqual(frame.index.names, recons.index.names)

        # test index_labels in same row as column names
        path = "%s.xls" % tm.rands(10)

        with ensure_clean(path) as path:

            self.frame.to_excel(path, "test1", cols=["A", "B", "C", "D"], index=False)
            # take 'A' and 'B' as indexes (they are in same row as cols 'C',
            # 'D')
            df = self.frame.copy()
            df = df.set_index(["A", "B"])

            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=[0, 1])
            tm.assert_frame_equal(df, recons)
示例#15
0
    def test_excel_cell_error_na(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test3.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([[np.nan]], columns=['Test'])
        tm.assert_frame_equal(parsed, expected)
示例#16
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ["xls", "xlsx"]:
            filename = u"\u0192u." + ext

            try:
                f = open(filename, "wb")
            except UnicodeEncodeError:
                raise nose.SkipTest("no unicode file names on this system")
            else:
                f.close()

            df = DataFrame(
                [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
                index=["A", "B"],
                columns=["X", "Y", "Z"],
            )

            with ensure_clean(filename) as filename:
                df.to_excel(filename, "test1", float_format="%.2f")

                reader = ExcelFile(filename)
                rs = reader.parse("test1", index_col=None)
                xp = DataFrame(
                    [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"]
                )
                tm.assert_frame_equal(rs, xp)
示例#17
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, 'test2.xls'))
        parsed = excel_data.parse('Sheet1')
        expected = DataFrame([['aaaa', 'bbbbb']], columns=['Test', 'Test1'])
        tm.assert_frame_equal(parsed, expected)
示例#18
0
    def test_excel_stop_iterator(self):
        _skip_if_no_xlrd()

        excel_data = ExcelFile(os.path.join(self.dirpath, "test2.xls"))
        parsed = excel_data.parse("Sheet1")
        expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"])
        tm.assert_frame_equal(parsed, expected)
示例#19
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_xlrd()
        ext = self.ext
        filename = u('\u0192u.') + ext

        try:
            f = open(filename, 'wb')
        except UnicodeEncodeError:
            raise nose.SkipTest('no unicode file names on this system')
        else:
            f.close()

        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                        index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean(filename) as filename:
            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57],
                            [12.32, 123123.20, 321321.20]],
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
示例#20
0
    def check_excel_table_sheet_by_index(self, filename, csvfile):
        import xlrd

        pth = os.path.join(self.dirpath, filename)
        xls = ExcelFile(pth)
        df = xls.parse(0, index_col=0, parse_dates=True)
        df2 = self.read_csv(csvfile, index_col=0, parse_dates=True)
        df3 = xls.parse(1, skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2, check_names=False)
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xls.parse(0, index_col=0, parse_dates=True, skipfooter=1)
        df5 = xls.parse(0, index_col=0, parse_dates=True, skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)

        self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
示例#21
0
    def test_mixed(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.mixed_frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.mixed_frame, recons)
示例#22
0
    def _check_extension_mixed(self, ext):
        path = "__tmp_to_excel_from_excel_mixed__." + ext

        with ensure_clean(path) as path:
            self.mixed_frame.to_excel(path, "test1")
            reader = ExcelFile(path)
            recons = reader.parse("test1", index_col=0)
            tm.assert_frame_equal(self.mixed_frame, recons)
示例#23
0
    def test_inf_roundtrip(self):
        _skip_if_no_xlrd()

        frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
        with ensure_clean(self.ext) as path:
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(frame, recons)
示例#24
0
    def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['xls', 'xlsx', 'xlsm']

        for s in suffix:
            pth = os.path.join(self.dirpath, 'test.%s' % s)
            xls = ExcelFile(pth)
            df = xls.parse('Sheet1', index_col=0, parse_dates=True,
                           parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
                            parse_dates=True, parse_cols=3)
            # TODO add index to xls file)
            tm.assert_frame_equal(df, df2, check_names=False)
            tm.assert_frame_equal(df3, df2, check_names=False)
示例#25
0
    def test_tsframe(self):
        _skip_if_no_xlrd()

        df = tm.makeTimeDataFrame()[:5]

        with ensure_clean(self.ext) as path:
            df.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(df, recons)
示例#26
0
    def test_xlsx_table(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)

        tm.assert_frame_equal(df, df2, check_names=False)  # TODO add index to xlsx file
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
                         skipfooter=1)
        df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
                         skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)
示例#27
0
    def _check_extension_tsframe(self, ext):
        path = "__tmp_to_excel_from_excel_tsframe__." + ext

        df = tm.makeTimeDataFrame()[:5]

        with ensure_clean(path) as path:
            df.to_excel(path, "test1")
            reader = ExcelFile(path)
            recons = reader.parse("test1")
            tm.assert_frame_equal(df, recons)
示例#28
0
    def test_float_types(self):
        _skip_if_no_xlrd()

        for np_type in (np.float16, np.float32, np.float64):
            with ensure_clean(self.ext) as path:
                # Test np.float values read come back as float.
                frame = DataFrame(np.random.random_sample(10), dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons, check_dtype=False)
示例#29
0
    def test_bool_types(self):
        _skip_if_no_xlrd()

        for np_type in (np.bool8, np.bool_):
            with ensure_clean(self.ext) as path:
                # Test np.bool values read come back as float.
                frame = (DataFrame([1, 0, True, False], dtype=np_type))
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons)
示例#30
0
    def test_to_excel_periodindex(self):
        _skip_if_no_xlrd()

        frame = self.tsframe
        xp = frame.resample('M', kind='period')

        with ensure_clean(self.ext) as path:
            xp.to_excel(path, 'sht1')

            reader = ExcelFile(path)
            rs = reader.parse('sht1', index_col=0, parse_dates=True)
            tm.assert_frame_equal(xp, rs.to_period('M'))
示例#31
0
    def test_excel_sheet_by_name_raise(self):
        _skip_if_no_xlrd()
        import xlrd

        ext = self.ext
        pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext))

        with ensure_clean(pth) as pth:
            gt = DataFrame(np.random.randn(10, 2))
            gt.to_excel(pth)
            xl = ExcelFile(pth)
            df = xl.parse(0)
            tm.assert_frame_equal(gt, df)

            self.assertRaises(xlrd.XLRDError, xl.parse, '0')
示例#32
0
    def _check_extension_sheets(self, ext):
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = reader.parse('test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1])
示例#33
0
    def test_sheets(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', columns=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = reader.parse('test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1])
示例#34
0
    def _check_excel_multiindex_dates(self, ext):
        path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext

        # try multiindex with dates
        tsframe = self.tsframe
        old_index = tsframe.index
        new_index = [old_index, np.arange(len(old_index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(path) as path:
            tsframe.to_excel(path, 'test1', index_label=['time', 'foo'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])

            tm.assert_frame_equal(tsframe, recons, check_names=False)
            self.assertEquals(recons.index.names, ['time', 'foo'])

            # infer index
            tsframe.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1')
            tm.assert_frame_equal(tsframe, recons)

            self.tsframe.index = old_index  # needed if setUP becomes classmethod
示例#35
0
    def test_roundtrip_indexlabels(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_indexlabels__.' + ext

        with ensure_clean(path) as path:

            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test index_label
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label=['test'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(
                path, 'test1', index_label=['test', 'dummy', 'dummy2'])
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1', index_label='test')
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0).astype(np.int64)
            frame.index.names = ['test']
            self.assertAlmostEqual(frame.index.names, recons.index.names)
示例#36
0
    def test_parse_cols_str(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['xls', 'xlsx', 'xlsm']

        for s in suffix:

            pth = os.path.join(self.dirpath, 'test.%s' % s)
            xls = ExcelFile(pth)

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A:D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A:D')
            # TODO add index to xls, read xls ignores index name ?
            tm.assert_frame_equal(df, df2, check_names=False)
            tm.assert_frame_equal(df3, df2, check_names=False)
            del df, df2, df3

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A,C,D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A,C,D')
            # TODO add index to xls file
            tm.assert_frame_equal(df, df2, check_names=False)
            tm.assert_frame_equal(df3, df2, check_names=False)
            del df, df2, df3

            df = xls.parse('Sheet1',
                           index_col=0,
                           parse_dates=True,
                           parse_cols='A,C:D')
            df2 = read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['B', 'C'])
            df3 = xls.parse('Sheet2',
                            skiprows=[1],
                            index_col=0,
                            parse_dates=True,
                            parse_cols='A,C:D')
            tm.assert_frame_equal(df, df2, check_names=False)
            tm.assert_frame_equal(df3, df2, check_names=False)
示例#37
0
def read_benchmarks(excel_file: ExcelFile) -> pd.DataFrame:
    """
    :param excel_file: (ExcelFile)

    :return melted_benchmarks: (DataFrame)
        code        | (String)
        date        | (Datetime)
        price_index | (float)
    """
    # Read excel file.
    raw_benchmarks = excel_file.parse(BENCHMARK, skiprows=8)
    raw_macro_from_monthly = excel_file.parse(MACRO_MONTHLY, skiprows=8)

    # Use only CD91
    raw_risk_free = raw_macro_from_monthly.loc[
        raw_macro_from_monthly[ITEM_NAME] == '시장금리:CD유통수익률(91)(%)', :]

    # Remove unnecessary columns, for example, Symbol, Kind, Item, Item Name, Frequency
    raw_benchmarks = raw_benchmarks.drop(columns=BENCHMARK_UNNECESSARY_COLUMNS)
    raw_risk_free = raw_risk_free.drop(columns=BENCHMARK_UNNECESSARY_COLUMNS)
    raw_risk_free[SYMBOL_NAME] = CD91

    # Melt benchmarks. Symbole name -> code, column names -> date
    melted_benchmarks = _melt(raw_benchmarks, PRICE_INDEX)
    melted_risk_free = _melt(raw_risk_free, PRICE_INDEX)

    # Calculate a risk free rate index
    melted_risk_free[PRICE_INDEX] = (((melted_risk_free[PRICE_INDEX] / 100) +
                                      1)**(1 / 12)).cumprod()
    melted_benchmarks = pd.concat([melted_benchmarks, melted_risk_free])

    # Sort by code and date
    melted_benchmarks = melted_benchmarks.sort_values(
        [CODE, DATE]).reset_index(drop=True)

    return melted_benchmarks
示例#38
0
    def test_excel_roundtrip_indexname(self):
        _skip_if_no_xlrd()

        df = DataFrame(np.random.randn(10, 4))
        df.index.name = 'foo'

        with ensure_clean(self.ext) as path:
            df.to_excel(path, merge_cells=self.merge_cells)

            xf = ExcelFile(path)
            result = xf.parse(xf.sheet_names[0],
                              index_col=0,
                              has_index_names=self.merge_cells)

            tm.assert_frame_equal(result, df)
            self.assertEqual(result.index.name, 'foo')
示例#39
0
    def test_to_excel_float_format(self):
        _skip_if_no_xlrd()

        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                        index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean(self.ext) as filename:
            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57],
                            [12.32, 123123.20, 321321.20]],
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
示例#40
0
    def test_excel_roundtrip_indexname(self):
        _skip_if_no_xlrd()

        path = '%s.%s' % (tm.rands(10), self.ext)

        df = DataFrame(np.random.randn(10, 4))
        df.index.name = 'foo'

        with ensure_clean(path) as path:
            df.to_excel(path)

            xf = ExcelFile(path)
            result = xf.parse(xf.sheet_names[0], index_col=0)

            tm.assert_frame_equal(result, df)
            self.assertEqual(result.index.name, 'foo')
示例#41
0
    def _check_extension_bool(self, ext):
        path = '__tmp_to_excel_from_excel_bool__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test reading/writing np.bool8, roundtrip only works for xlsx
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1').astype(np.bool8)
            tm.assert_frame_equal(frame, recons)
示例#42
0
    def _check_extension_int64(self, ext):
        path = '__tmp_to_excel_from_excel_int64__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)
            
            # Test np.int64, values read come back as float
            frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np.int64)
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            recons = reader.parse('test1').astype(np.int64)
            tm.assert_frame_equal(frame, recons, check_dtype=False)
示例#43
0
    def test_to_excel_multiindex_dates(self):
        _skip_if_no_xlrd()

        # try multiindex with dates
        tsframe = self.tsframe.copy()
        new_index = [tsframe.index, np.arange(len(tsframe.index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(self.ext) as path:
            tsframe.index.names = ['time', 'foo']
            tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1',
                                  index_col=[0, 1],
                                  has_index_names=self.merge_cells)

            tm.assert_frame_equal(tsframe, recons)
            self.assertEquals(recons.index.names, ('time', 'foo'))
示例#44
0
    def test_colaliases(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # column aliases
            col_aliases = Index(['AA', 'X', 'Y', 'Z'])
            self.frame2.to_excel(path, 'test1', header=col_aliases)
            reader = ExcelFile(path)
            rs = reader.parse('test1', index_col=0)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            tm.assert_frame_equal(xp, rs)
示例#45
0
    def _check_extension_colaliases(self, ext):
        path = '__tmp_to_excel_from_excel_aliases__.' + ext

        with ensure_clean(path) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # column aliases
            col_aliases = Index(['AA', 'X', 'Y', 'Z'])
            self.frame2.to_excel(path, 'test1', header=col_aliases)
            reader = ExcelFile(path)
            rs = reader.parse('test1', index_col=0)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            tm.assert_frame_equal(xp, rs)
示例#46
0
    def test_bool_types(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_bool_types__.' + ext

        for np_type in (np.bool8, np.bool_):
            with ensure_clean(path) as path:
                self.frame['A'][:5] = nan

                self.frame.to_excel(path, 'test1')
                self.frame.to_excel(path, 'test1', cols=['A', 'B'])
                self.frame.to_excel(path, 'test1', header=False)
                self.frame.to_excel(path, 'test1', index=False)

                # Test np.bool values read come back as float.
                frame = (DataFrame([1, 0, True, False], dtype=np_type))
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons)
示例#47
0
    def test_float_types(self):
        _skip_if_no_xlrd()
        ext = self.ext
        path = '__tmp_to_excel_from_excel_float_types__.' + ext

        for np_type in (np.float16, np.float32, np.float64):
            with ensure_clean(path) as path:
                self.frame['A'][:5] = nan

                self.frame.to_excel(path, 'test1')
                self.frame.to_excel(path, 'test1', cols=['A', 'B'])
                self.frame.to_excel(path, 'test1', header=False)
                self.frame.to_excel(path, 'test1', index=False)

                # Test np.float values read come back as float.
                frame = DataFrame(np.random.random_sample(10), dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1').astype(np_type)
                tm.assert_frame_equal(frame, recons, check_dtype=False)
示例#48
0
    def _check_excel_multiindex(self, ext):
        path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext

        frame = self.frame
        old_index = frame.index
        arrays = np.arange(len(old_index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        with ensure_clean(path) as path:
            frame.to_excel(path, 'test1', header=False)
            frame.to_excel(path, 'test1', cols=['A', 'B'])

            # round trip
            frame.to_excel(path, 'test1')
            reader = ExcelFile(path)
            df = reader.parse('test1', index_col=[0, 1], parse_dates=False)
            tm.assert_frame_equal(frame, df)
            self.assertEqual(frame.index.names, df.index.names)
            self.frame.index = old_index  # needed if setUP becomes a classmethod
示例#49
0
    def test_to_excel_multiindex(self):
        _skip_if_no_xlrd()

        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays, names=['first', 'second'])
        frame.index = new_index

        with ensure_clean(self.ext) as path:
            frame.to_excel(path, 'test1', header=False)
            frame.to_excel(path, 'test1', cols=['A', 'B'])

            # round trip
            frame.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            df = reader.parse('test1',
                              index_col=[0, 1],
                              parse_dates=False,
                              has_index_names=self.merge_cells)
            tm.assert_frame_equal(frame, df)
            self.assertEqual(frame.index.names, df.index.names)
示例#50
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_xlrd()
        with ensure_clean(u('\u0192u.') + self.ext) as filename:
            try:
                f = open(filename, 'wb')
            except UnicodeEncodeError:
                raise nose.SkipTest('no unicode file names on this system')
            else:
                f.close()

            df = DataFrame([[0.123456, 0.234567, 0.567567],
                            [12.32112, 123123.2, 321321.2]],
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])

            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57],
                            [12.32, 123123.20, 321321.20]],
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp)
示例#51
0
    def test_int_types(self):
        _skip_if_no_xlrd()

        for np_type in (np.int8, np.int16, np.int32, np.int64):

            with ensure_clean(self.ext) as path:
                # Test np.int values read come back as int (rather than float
                # which is Excel's format).
                frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
                                  dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1')
                int_frame = frame.astype(int)
                tm.assert_frame_equal(int_frame, recons)
                recons2 = read_excel(path, 'test1')
                tm.assert_frame_equal(int_frame, recons2)

                # test with convert_float=False comes back as float
                float_frame = frame.astype(float)
                recons = read_excel(path, 'test1', convert_float=False)
                tm.assert_frame_equal(recons, float_frame)
示例#52
0
def main(argv):
  parser = argparse.ArgumentParser(prog='importxls')
  parser.add_argument('--xls', help='The full pathname of the spread sheet to import')
  args = parser.parse_args()

  if args.xls:
    xl = ExcelFile(args.xls)
    salaries = xl.parse("FY14 Est Salaries")
    # print (salaries.columns)
# Index([u'Employee Name', u'Position Title', u'Pay Plan', u'Appt Type', u'Auth Hours', u'Estimated Salary', u'Estimated Benefits', 
# u'Salary & Benefits', u'Estimated Salary/Hr', u'Estimated Benefit/Hr', u'Salary+Benefit/Hr', u'Leave Category', u'LAF'], dtype='object')

  db = create_engine ('postgresql+psycopg2://' + dbconnect['user'] + ':' + dbconnect['pass'] + '@' + dbconnect['server'] + ':' + 
  dbconnect['port'] + '/' + dbconnect['instance'])

  Session = sessionmaker(bind=db)

  for i in salaries.index:
      if (salaries['Employee Name'][i] != 'COPY EMPLOYEE NAME HERE'):
        #print (salaries['Employee Name'][i])
        #print ("\t%s\t%s " % (salaries['Pay Plan'][i], salaries['Position Title'][i]))
        session = Session()
        # Check if the user is already in the database
        add_person (salaries, i, session)
示例#53
0
    def test_to_excel_multiindex_no_write_index(self):
        _skip_if_no_xlrd()

        # Test writing and re-reading a MI witout the index. GH 5616.

        # Initial non-MI frame.
        frame1 = pd.DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]})

        # Add a MI.
        frame2 = frame1.copy()
        multi_index = pd.MultiIndex.from_tuples([(70, 80), (90, 100)])
        frame2.index = multi_index

        with ensure_clean(self.ext) as path:

            # Write out to Excel without the index.
            frame2.to_excel(path, 'test1', index=False)

            # Read it back in.
            reader = ExcelFile(path)
            frame3 = reader.parse('test1')

            # Test that it is the same as the initial frame.
            tm.assert_frame_equal(frame1, frame3)
示例#54
0
import sys
import matplotlib.pyplot as plt
from pandas.io.excel import ExcelFile

if sys.version_info[0] == 2:
    from urllib import urlretrieve
elif sys.version_info[0] == 3:
    from urllib.request import urlretrieve

# == Get data and read into file gd.xls == #
wb_data_query = "http://api.worldbank.org/v2/en/indicator/gc.dod.totl.gd.zs?downloadformat=excel"
urlretrieve(wb_data_query, "gd.xls")

# == Parse data into a DataFrame == #
gov_debt_xls = ExcelFile('gd.xls')
govt_debt = gov_debt_xls.parse('Data',
                               index_col=1,
                               na_values=['NA'],
                               skiprows=3)

# == Take desired values and plot == #
govt_debt = govt_debt.transpose()
govt_debt = govt_debt[['AUS', 'DEU', 'FRA', 'USA']]
govt_debt = govt_debt[38:]
govt_debt.plot(lw=2)
plt.show()
示例#55
0
def read_macro_daily(excel_file: ExcelFile):
    # Read excel file.
    raw_macro_from_daily = excel_file.parse(MACRO_DAILY, skiprows=8)

    return raw_macro_from_daily
示例#56
0
def read_macro_monthly(excel_file: ExcelFile):
    # Read excel file.
    raw_macro_from_monthly = excel_file.parse(MACRO_MONTHLY, skiprows=8)

    return raw_macro_from_monthly
示例#57
0
def read_companies(excel_file: ExcelFile) -> pd.DataFrame:
    """
    :param excel_file: (ExcelFile)

    :return melted_companies: (DataFrame)
         code   | (String)
         date   | (Datetime)
         name   | (String)
         ...
    """
    # Read excel file.
    raw_companies = excel_file.parse(COMPANY, skiprows=8)

    # Rename Symbol -> code, Symbol Name -> name
    raw_companies = raw_companies.rename(columns={
        'Symbol': CODE,
        'Symbol Name': NAME,
    })

    # Save symbol names and item names.
    names = raw_companies.drop_duplicates(subset=CODE,
                                          keep='last').loc[:, [CODE, NAME]]
    names = names.set_index(CODE)
    item_name_num = len(raw_companies.loc[:1000, ITEM_NAME].unique())
    item_names = raw_companies.loc[:item_name_num - 1, ITEM_NAME]

    # Remove unnecessary columns, for example, Symbol, Kind, Item, Item Name, Frequency
    raw_companies = raw_companies.drop(columns=COMPANY_UNNECESSARY_COLUMNS)

    # Melt every items.
    melted_companies = pd.DataFrame(columns=[CODE, DATE])
    melted_companies = melted_companies.set_index([CODE, DATE])
    for index, item_name in enumerate(item_names):
        # Melt raw_benchmark. Symbole name -> code, column names -> date
        item_companies = pd.melt(raw_companies.iloc[index::item_name_num, :],
                                 id_vars=[CODE],
                                 var_name=DATE,
                                 value_name=item_name)
        item_companies[DATE] = pd.to_datetime(item_companies[DATE],
                                              format='%Y-$m-%D')
        item_companies = item_companies.set_index([CODE, DATE])
        melted_companies = melted_companies.join(item_companies, how='outer')

    melted_companies = melted_companies.rename(columns=COMPANY_RENAMES)

    # Add the names of company.
    melted_companies = melted_companies.join(names)

    melted_companies = melted_companies.reset_index()
    melted_companies = melted_companies.sort_values([CODE, DATE])

    # IS_MANAGED, IS_SUSPENDED: '정지' -> True, na -> False
    melted_companies[IS_MANAGED] = melted_companies[IS_MANAGED].replace(
        '관리', True)
    melted_companies[IS_MANAGED] = melted_companies[IS_MANAGED].fillna(False)
    melted_companies[IS_SUSPENDED] = melted_companies[IS_SUSPENDED].replace(
        '정지', True)
    melted_companies[IS_SUSPENDED] = melted_companies[IS_SUSPENDED].fillna(
        False)

    # nan -> 0
    to_zero_columns = [
        CFO, ALLOWANCE_AR_, TRADING_VOLUME, RES_EXP, AR, DIVP, AP,
        NET_PERSONAL_PURCHASE, NET_NATIONAL_PURCHASE,
        NET_FINANCIAL_INVESTMENT_PURCHASE, NET_INSTITUTIONAL_FOREIGN_PURCHASE,
        NET_INSTITUTIONAL_PURCHASE, NET_ETC_FINANCE_PURCHASE,
        NET_ETC_CORPORATION_PURCHASE, NET_ETC_FOREIGN_PURCHASE,
        NET_REGISTERED_FOREIGN_PURCHASE, NET_INSURANCE_PURCHASE,
        NET_PRIVATE_FUND_PURCHASE, NET_PENSION_PURCHASE, NET_FOREIGN_PURCHASE,
        NET_BANK_PURCHASE, NET_TRUST_PURCHASE, SHORT_SALE_BALANCE,
        FOREIGN_OWNERSHIP_RATIO
    ]
    melted_companies.loc[:, to_zero_columns] = melted_companies.replace(
        np.nan, 0.0).loc[:, to_zero_columns]

    # There are no SHORT_SALE_BALANCE before 2016-06-30
    melted_companies.loc[melted_companies[DATE] < '2016-06-30',
                         SHORT_SALE_BALANCE] = np.nan

    # Sort by code and date
    melted_companies = melted_companies.sort_values([CODE, DATE
                                                     ]).reset_index(drop=True)

    return melted_companies
示例#58
0
"""
Origin: QE by John Stachurski and Thomas J. Sargent
Filename: wb_download.py
Authors: John Stachurski, Tomohito Okabe
LastModified: 29/08/2013

Dowloads data from the World Bank site on GDP per capita and plots result for
a subset of countries.
"""
import pandas as pd
import matplotlib.pyplot as plt
from pandas.io.excel import ExcelFile
import urllib

# == Get data and read into file gd.xls == #
wb_data_file_dir = "http://api.worldbank.org/datafiles/"
file_name = "GC.DOD.TOTL.GD.ZS_Indicator_MetaData_en_EXCEL.xls"
url = wb_data_file_dir + file_name
urllib.urlretrieve(url, "gd.xls")

# == Parse data into a DataFrame == #
gov_debt_xls = ExcelFile('gd.xls')
govt_debt = gov_debt_xls.parse('Sheet1', index_col=1, na_values=['NA'])

# == Take desired values and plot == #
govt_debt = govt_debt.transpose()
govt_debt = govt_debt[['AUS', 'DEU', 'FRA', 'USA']]
govt_debt = govt_debt[36:]
govt_debt.plot(lw=2)
plt.show()
示例#59
0
from financials.tools.indicators import *
from pandas.io.excel import ExcelFile
import os

filename = os.path.join(os.getcwd(), "financials", "tools", "tests", "indicators.xls")

xls = ExcelFile(filename)
data = xls.parse("Indicators", skiprows = 1, index_col = 0, parse_dates = True)
df = data[["O", "H", "L", "C", "V"]]

# This is a dictionary. Keys are the function names as string while the values
# are 2-tuples with first element being the actual function and the second
# element is the list of column names in the Excel File. The dictionary is
# iterated and each function result is compared to the actual result in the
# Excel File

result = {
"AD": (AD, ("AD")),
"ATR": (ATR, ("ATR")),
"SMA": (SMA, ("SMA")),
"EMA": (EMA, ("EMA")),
"BB": (BB, ("CentralBand", "UpperBand", "LowerBand")),
"FS": (FS, ("K_full", "D_full")),
"MACD": (MACD, ("MACD", "SignalLine", "Hist")),
"OBV": (OBV, ("OBV")),
"RSI": (RSI, ("RSI")),
"SR": (SR, ("Support", "Resistance"))
}

def checkAlmostEqual(one, two, precision = 10):
    """
示例#60
0
    def test_roundtrip_indexlabels(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:

            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # test index_label
            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path,
                           'test1',
                           index_label=['test'],
                           merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1',
                                  index_col=0,
                                  has_index_names=self.merge_cells).astype(
                                      np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path,
                           'test1',
                           index_label=['test', 'dummy', 'dummy2'],
                           merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1',
                                  index_col=0,
                                  has_index_names=self.merge_cells).astype(
                                      np.int64)
            frame.index.names = ['test']
            self.assertEqual(frame.index.names, recons.index.names)

            frame = (DataFrame(np.random.randn(10, 2)) >= 0)
            frame.to_excel(path,
                           'test1',
                           index_label='test',
                           merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1',
                                  index_col=0,
                                  has_index_names=self.merge_cells).astype(
                                      np.int64)
            frame.index.names = ['test']
            self.assertAlmostEqual(frame.index.names, recons.index.names)

        with ensure_clean(self.ext) as path:

            self.frame.to_excel(path,
                                'test1',
                                cols=['A', 'B', 'C', 'D'],
                                index=False,
                                merge_cells=self.merge_cells)
            # take 'A' and 'B' as indexes (same row as cols 'C', 'D')
            df = self.frame.copy()
            df = df.set_index(['A', 'B'])

            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=[0, 1])
            tm.assert_frame_equal(df, recons, check_less_precise=True)