示例#1
0
    def test_multi_index_no_level_names(self):
        data = """index1,index2,A,B,C,D
foo,one,2,3,4,5
foo,two,7,8,9,10
foo,three,12,13,14,15
bar,one,12,13,14,15
bar,two,12,13,14,15
"""

        data2 = """A,B,C,D
foo,one,2,3,4,5
foo,two,7,8,9,10
foo,three,12,13,14,15
bar,one,12,13,14,15
bar,two,12,13,14,15
"""

        lines = data.split('\n')
        no_header = '\n'.join(lines[1:])
        names = ['A', 'B', 'C', 'D']
        df = read_csv(StringIO(no_header), index_col=[0, 1], names=names)
        expected = read_csv(StringIO(data), index_col=[0, 1])
        assert_frame_equal(df, expected)

        # 2 implicit first cols
        df2 = read_csv(StringIO(data2))
        assert_frame_equal(df2, df)
示例#2
0
    def test_skiprows_bug(self):
        # GH #505
        text = """#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
1/1/2000,1.,2.,3.
1/2/2000,4,5,6
1/3/2000,7,8,9
"""
        data = read_csv(StringIO(text),
                        skiprows=range(6),
                        header=None,
                        index_col=0,
                        parse_dates=True)

        data2 = read_csv(StringIO(text),
                         skiprows=6,
                         header=None,
                         index_col=0,
                         parse_dates=True)

        expected = DataFrame(np.arange(1., 10.).reshape((3, 3)),
                             columns=['X.2', 'X.3', 'X.4'],
                             index=[
                                 datetime(2000, 1, 1),
                                 datetime(2000, 1, 2),
                                 datetime(2000, 1, 3)
                             ])
        assert_frame_equal(data, expected)
        assert_frame_equal(data, data2)
示例#3
0
    def test_parse_date_time(self):
        result = conv.parse_date_time(self.dates, self.times)
        self.assert_((result == self.expected).all())

        data = """\
date, time, a, b
2001-01-05, 10:00:00, 0.0, 10.
2001-01-05, 00:00:00, 1., 11.
"""
        datecols = {'date_time': [0, 1]}
        df = read_table(StringIO(data),
                        sep=',',
                        header=0,
                        parse_dates=datecols,
                        date_parser=conv.parse_date_time)
        self.assert_('date_time' in df)
        self.assert_(df.date_time.ix[0] == datetime(2001, 1, 5, 10, 0, 0))

        data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
                "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
                "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
                "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
                "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
                "KORD,19990127, 23:00:00, 22:56:00, -0.5900")

        date_spec = {'nominal': [1, 2], 'actual': [1, 3]}
        df = read_csv(StringIO(data),
                      header=None,
                      parse_dates=date_spec,
                      date_parser=conv.parse_date_time)
示例#4
0
    def test_pass_names_with_index(self):
        lines = self.data1.split('\n')
        no_header = '\n'.join(lines[1:])

        # regular index
        names = ['index', 'A', 'B', 'C', 'D']
        df = read_csv(StringIO(no_header), index_col=0, names=names)
        expected = read_csv(StringIO(self.data1), index_col=0)
        assert_frame_equal(df, expected)

        # multi index
        data = """index1,index2,A,B,C,D
foo,one,2,3,4,5
foo,two,7,8,9,10
foo,three,12,13,14,15
bar,one,12,13,14,15
bar,two,12,13,14,15
"""
        lines = data.split('\n')
        no_header = '\n'.join(lines[1:])
        names = ['index1', 'index2', 'A', 'B', 'C', 'D']
        df = read_csv(StringIO(no_header), index_col=[0, 1], names=names)
        expected = read_csv(StringIO(data), index_col=[0, 1])
        assert_frame_equal(df, expected)

        df = read_csv(StringIO(data), index_col=['index1', 'index2'])
        assert_frame_equal(df, expected)
示例#5
0
    class UnicodeWriter:
        """
        A CSV writer which will write rows to CSV file "f",
        which is encoded in the given encoding.
        """

        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
            # Redirect output to a queue
            self.queue = StringIO()
            self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
            self.stream = f
            self.encoder = codecs.getincrementalencoder(encoding)()

        def writerow(self, row):
            row = [x if isinstance(x, basestring) else str(x) for x in row]
            self.writer.writerow([s.encode("utf-8") for s in row])
            # Fetch UTF-8 output from the queue ...
            data = self.queue.getvalue()
            data = data.decode("utf-8")
            # ... and reencode it into the target encoding
            data = self.encoder.encode(data)
            # write to the target stream
            self.stream.write(data)
            # empty queue
            self.queue.truncate(0)
示例#6
0
    class UnicodeWriter:
        """
        A CSV writer which will write rows to CSV file "f",
        which is encoded in the given encoding.
        """

        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
            # Redirect output to a queue
            self.queue = StringIO()
            self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
            self.stream = f
            self.encoder = codecs.getincrementalencoder(encoding)()
            self.quoting = kwds.get("quoting", None)

        def writerow(self, row):
            def _check_as_is(x):
                return (self.quoting == csv.QUOTE_NONNUMERIC and
                        is_number(x)) or isinstance(x, str)

            row = [x if _check_as_is(x)
                   else pprint_thing(x).encode('utf-8') for x in row]

            self.writer.writerow([s for s in row])
            # Fetch UTF-8 output from the queue ...
            data = self.queue.getvalue()
            data = data.decode("utf-8")
            # ... and reencode it into the target encoding
            data = self.encoder.encode(data)
            # write to the target stream
            self.stream.write(data)
            # empty queue
            self.queue.truncate(0)
示例#7
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     # Redirect output to a queue
     self.queue = StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
     self.quoting = kwds.get("quoting", None)
示例#8
0
    class UnicodeWriter:
        """
        A CSV writer which will write rows to CSV file "f",
        which is encoded in the given encoding.
        """
        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
            # Redirect output to a queue
            self.queue = StringIO()
            self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
            self.stream = f
            self.encoder = codecs.getincrementalencoder(encoding)()
            self.quoting = kwds.get("quoting", None)

        def writerow(self, row):
            def _check_as_is(x):
                return (self.quoting == csv.QUOTE_NONNUMERIC
                        and is_number(x)) or isinstance(x, str)

            row = [
                x if _check_as_is(x) else pprint_thing(x).encode('utf-8')
                for x in row
            ]

            self.writer.writerow([s for s in row])
            # Fetch UTF-8 output from the queue ...
            data = self.queue.getvalue()
            data = data.decode("utf-8")
            # ... and reencode it into the target encoding
            data = self.encoder.encode(data)
            # write to the target stream
            self.stream.write(data)
            # empty queue
            self.queue.truncate(0)
示例#9
0
    def test_converters_corner_with_nas(self):
        import StringIO
        import numpy as np
        import pandas
        csv = """id,score,days
1,2,12
2,2-5,
3,,14+
4,6-12,2"""

        def convert_days(x):
            x = x.strip()
            if not x: return np.nan

            is_plus = x.endswith('+')
            if is_plus:
                x = int(x[:-1]) + 1
            else:
                x = int(x)
            return x

        def convert_days_sentinel(x):
            x = x.strip()
            if not x: return -1

            is_plus = x.endswith('+')
            if is_plus:
                x = int(x[:-1]) + 1
            else:
                x = int(x)
            return x

        def convert_score(x):
            x = x.strip()
            if not x: return np.nan
            if x.find('-') > 0:
                valmin, valmax = map(int, x.split('-'))
                val = 0.5 * (valmin + valmax)
            else:
                val = float(x)

            return val

        fh = StringIO.StringIO(csv)
        result = pandas.read_csv(fh,
                                 converters={
                                     'score': convert_score,
                                     'days': convert_days
                                 },
                                 na_values=[-1, '', None])
        self.assert_(isnull(result['days'][1]))

        fh = StringIO.StringIO(csv)
        result2 = pandas.read_csv(fh,
                                  converters={
                                      'score': convert_score,
                                      'days': convert_days_sentinel
                                  },
                                  na_values=[-1, '', None])
        assert_frame_equal(result, result2)
示例#10
0
    def test_skip_bad_lines(self):
        # too many lines, see #2430 for why
        data = ('a:b:c\n' 'd:e:f\n' 'g:h:i\n' 'j:k:l:m\n' 'l:m:n\n' 'o:p:q:r')

        reader = TextReader(StringIO(data), delimiter=':', header=None)
        self.assertRaises(parser.CParserError, reader.read)

        reader = TextReader(StringIO(data),
                            delimiter=':',
                            header=None,
                            error_bad_lines=False,
                            warn_bad_lines=False)
        result = reader.read()
        expected = {
            0: ['a', 'd', 'g', 'l'],
            1: ['b', 'e', 'h', 'm'],
            2: ['c', 'f', 'i', 'n']
        }
        assert_array_dicts_equal(result, expected)

        stderr = sys.stderr
        sys.stderr = StringIO()
        try:
            reader = TextReader(StringIO(data),
                                delimiter=':',
                                header=None,
                                error_bad_lines=False,
                                warn_bad_lines=True)
            reader.read()
            val = sys.stderr.getvalue()
            self.assertTrue('Skipping line 4' in val)
            self.assertTrue('Skipping line 6' in val)
        finally:
            sys.stderr = stderr
示例#11
0
    def test_empty_string(self):
        data = """\
One,Two,Three
a,1,one
b,2,two
,3,three
d,4,nan
e,5,five
nan,6,
g,7,seven
"""
        df = read_csv(StringIO(data))
        xp = DataFrame({
            'One': ['a', 'b', np.nan, 'd', 'e', np.nan, 'g'],
            'Two': [1, 2, 3, 4, 5, 6, 7],
            'Three': ['one', 'two', 'three', np.nan, 'five', np.nan, 'seven']
        })
        assert_frame_equal(xp.reindex(columns=df.columns), df)

        df = read_csv(StringIO(data), na_values={'One': [], 'Three': []})
        xp = DataFrame({
            'One': ['a', 'b', '', 'd', 'e', 'nan', 'g'],
            'Two': [1, 2, 3, 4, 5, 6, 7],
            'Three': ['one', 'two', 'three', 'nan', 'five', '', 'seven']
        })
        assert_frame_equal(xp.reindex(columns=df.columns), df)
示例#12
0
    def test_parse_dates_column_list(self):
        from pandas.core.datetools import to_datetime

        data = '''date;destination;ventilationcode;unitcode;units;aux_date
01/01/2010;P;P;50;1;12/1/2011
01/01/2010;P;R;50;1;13/1/2011
15/01/2010;P;P;50;1;14/1/2011
01/05/2010;P;P;50;1;15/1/2011'''

        expected = read_csv(StringIO(data), sep=";", index_col=range(4))

        lev = expected.index.levels[0]
        expected.index.levels[0] = lev.to_datetime(dayfirst=True)
        expected['aux_date'] = to_datetime(expected['aux_date'], dayfirst=True)
        expected['aux_date'] = map(Timestamp, expected['aux_date'])
        self.assert_(isinstance(expected['aux_date'][0], datetime))

        df = read_csv(StringIO(data),
                      sep=";",
                      index_col=range(4),
                      parse_dates=[0, 5],
                      dayfirst=True)
        assert_frame_equal(df, expected)

        df = read_csv(StringIO(data),
                      sep=";",
                      index_col=range(4),
                      parse_dates=['date', 'aux_date'],
                      dayfirst=True)
        assert_frame_equal(df, expected)
示例#13
0
 def test_multiple_date_col_named_components(self):
     xp = read_csv(StringIO(self.ts_data),
                   parse_dates={'nominal': [1, 2]},
                   index_col='nominal')
     colspec = {'nominal': ['date', 'nominalTime']}
     df = read_csv(StringIO(self.ts_data),
                   parse_dates=colspec,
                   index_col='nominal')
     assert_frame_equal(df, xp)
示例#14
0
    def test_read_chunksize_named(self):
        reader = read_csv(StringIO(self.data1), index_col='index', chunksize=2)
        df = read_csv(StringIO(self.data1), index_col='index')

        chunks = list(reader)

        assert_frame_equal(chunks[0], df[:2])
        assert_frame_equal(chunks[1], df[2:4])
        assert_frame_equal(chunks[2], df[4:])
示例#15
0
    def test_regex_separator(self):
        data = """   A   B   C   D
a   1   2   3   4
b   1   2   3   4
c   1   2   3   4
"""
        df = read_table(StringIO(data), sep='\s+')
        expected = read_csv(StringIO(re.sub('[ ]+', ',', data)), index_col=0)
        self.assert_(expected.index.name is None)
        assert_frame_equal(df, expected)
示例#16
0
    def test_csv_custom_parser(self):
        data = """A,B,C
20090101,a,1,2
20090102,b,3,4
20090103,c,4,5
"""
        df = read_csv(StringIO(data),
                      date_parser=lambda x: datetime.strptime(x, '%Y%m%d'))
        expected = read_csv(StringIO(data), parse_dates=True)
        assert_frame_equal(df, expected)
示例#17
0
    def test_parse_dates_implicit_first_col(self):
        data = """A,B,C
20090101,a,1,2
20090102,b,3,4
20090103,c,4,5
"""
        df = read_csv(StringIO(data), parse_dates=True)
        expected = read_csv(StringIO(data), index_col=0, parse_dates=True)
        self.assert_(
            isinstance(df.index[0], (datetime, np.datetime64, Timestamp)))
        assert_frame_equal(df, expected)
示例#18
0
def generate_from_template(template, ndim=1, exclude=None):
    output = StringIO()
    for name, c_type, dtype, can_hold_na in function_list:
        if exclude is not None and name in exclude:
            continue

        func = template % {'name': name, 'c_type': c_type,
                           'dtype': dtype,
                           'raise_on_na': 'False' if can_hold_na else 'True'}
        output.write(func)
    return output.getvalue()
示例#19
0
    def test_1000_sep(self):
        data = """A|B|C
1|2,334.0|5
10|13|10.
"""
        expected = [[1, 2334., 5], [10, 13, 10]]

        df = read_csv(StringIO(data), sep='|', thousands=',')
        assert_almost_equal(df.values, expected)

        df = read_table(StringIO(data), sep='|', thousands=',')
        assert_almost_equal(df.values, expected)
示例#20
0
    def test_fwf(self):
        data_expected = """\
2011,58,360.242940,149.910199,11950.7
2011,59,444.953632,166.985655,11788.4
2011,60,364.136849,183.628767,11806.2
2011,61,413.836124,184.375703,11916.8
2011,62,502.953953,173.237159,12468.3
"""
        expected = read_csv(StringIO(data_expected), header=None)

        data1 = """\
201158    360.242940   149.910199   11950.7
201159    444.953632   166.985655   11788.4
201160    364.136849   183.628767   11806.2
201161    413.836124   184.375703   11916.8
201162    502.953953   173.237159   12468.3
"""
        colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)]
        df = read_fwf(StringIO(data1), colspecs=colspecs, header=None)
        assert_frame_equal(df, expected)

        data2 = """\
2011 58   360.242940   149.910199   11950.7
2011 59   444.953632   166.985655   11788.4
2011 60   364.136849   183.628767   11806.2
2011 61   413.836124   184.375703   11916.8
2011 62   502.953953   173.237159   12468.3
"""
        df = read_fwf(StringIO(data2), widths=[5, 5, 13, 13, 7], header=None)
        assert_frame_equal(df, expected)

        # From Thomas Kluyver: apparently some non-space filler characters can
        # be seen, this is supported by specifying the 'delimiter' character:
        # http://publib.boulder.ibm.com/infocenter/dmndhelp/v6r1mx/index.jsp?topic=/com.ibm.wbit.612.help.config.doc/topics/rfixwidth.html
        data3 = """\
201158~~~~360.242940~~~149.910199~~~11950.7
201159~~~~444.953632~~~166.985655~~~11788.4
201160~~~~364.136849~~~183.628767~~~11806.2
201161~~~~413.836124~~~184.375703~~~11916.8
201162~~~~502.953953~~~173.237159~~~12468.3
"""
        df = read_fwf(StringIO(data3),
                      colspecs=colspecs,
                      delimiter='~',
                      header=None)
        assert_frame_equal(df, expected)

        self.assertRaises(ValueError,
                          read_fwf,
                          StringIO(data3),
                          colspecs=colspecs,
                          widths=[6, 10, 10, 7])
示例#21
0
    def test_skip_footer(self):
        data = """A,B,C
1,2,3
4,5,6
7,8,9
want to skip this
also also skip this
and this
"""
        result = read_csv(StringIO(data), skip_footer=3)
        no_footer = '\n'.join(data.split('\n')[:-4])
        expected = read_csv(StringIO(no_footer))

        assert_frame_equal(result, expected)
示例#22
0
    def test_comment(self):
        data = """A,B,C
1,2.,4.#hello world
5.,NaN,10.0
"""
        expected = [[1., 2., 4.], [5., np.nan, 10.]]
        df = read_csv(StringIO(data), comment='#')
        assert_almost_equal(df.values, expected)

        df = read_table(StringIO(data),
                        sep=',',
                        comment='#',
                        na_values=['NaN'])
        assert_almost_equal(df.values, expected)
示例#23
0
    def test_multiple_date_cols_chunked(self):
        df = read_csv(StringIO(self.ts_data),
                      parse_dates={'nominal': [1, 2]},
                      index_col='nominal')
        reader = read_csv(StringIO(self.ts_data),
                          parse_dates={'nominal': [1, 2]},
                          index_col='nominal',
                          chunksize=2)

        chunks = list(reader)

        assert_frame_equal(chunks[0], df[:2])
        assert_frame_equal(chunks[1], df[2:4])
        assert_frame_equal(chunks[2], df[4:])
示例#24
0
    def test_no_header(self):
        data = """1,2,3,4,5
6,7,8,9,10
11,12,13,14,15
"""
        df = read_table(StringIO(data), sep=',', header=None)
        names = ['foo', 'bar', 'baz', 'quux', 'panda']
        df2 = read_table(StringIO(data), sep=',', header=None, names=names)
        expected = [[1, 2, 3, 4, 5.], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]
        assert_almost_equal(df.values, expected)
        assert_almost_equal(df.values, df2.values)
        self.assert_(
            np.array_equal(df.columns, ['X.1', 'X.2', 'X.3', 'X.4', 'X.5']))
        self.assert_(np.array_equal(df2.columns, names))
示例#25
0
    def test_read_table_duplicate_index(self):
        data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo,12,13,14,15
bar,12,13,14,15
"""

        result = read_csv(StringIO(data), index_col=0)
        expected = read_csv(StringIO(data)).set_index('index',
                                                      verify_integrity=False)
        assert_frame_equal(result, expected)
示例#26
0
    def test_quoting(self):
        bad_line_small = """printer\tresult\tvariant_name
Klosterdruckerei\tKlosterdruckerei <Salem> (1611-1804)\tMuller, Jacob
Klosterdruckerei\tKlosterdruckerei <Salem> (1611-1804)\tMuller, Jakob
Klosterdruckerei\tKlosterdruckerei <Kempten> (1609-1805)\t"Furststiftische Hofdruckerei,  <Kempten""
Klosterdruckerei\tKlosterdruckerei <Kempten> (1609-1805)\tGaller, Alois
Klosterdruckerei\tKlosterdruckerei <Kempten> (1609-1805)\tHochfurstliche Buchhandlung <Kempten>"""
        self.assertRaises(Exception,
                          read_table,
                          StringIO(bad_line_small),
                          sep='\t')

        good_line_small = bad_line_small + '"'
        df = read_table(StringIO(good_line_small), sep='\t')
        self.assert_(len(df) == 3)
示例#27
0
    def test_uquery(self):
        try:
            import MySQLdb
        except ImportError:
            raise nose.SkipTest
        frame = tm.makeTimeDataFrame()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        stmt = 'INSERT INTO test_table VALUES(2.314, -123.1, 1.234, 2.3)'
        self.assertEqual(sql.uquery(stmt, con=self.db), 1)

        try:
            sys.stdout = StringIO()

            self.assertRaises(MySQLdb.ProgrammingError,
                              sql.tquery,
                              'insert into blah values (1)',
                              con=self.db)

            self.assertRaises(MySQLdb.ProgrammingError,
                              sql.tquery,
                              'insert into blah values (1)',
                              con=self.db,
                              retry=True)
        finally:
            sys.stdout = sys.__stdout__
示例#28
0
    def test_tquery(self):
        try:
            import MySQLdb
        except ImportError:
            raise nose.SkipTest
        frame = tm.makeTimeDataFrame()
        drop_sql = "DROP TABLE IF EXISTS test_table"
        cur = self.db.cursor()
        cur.execute(drop_sql)
        sql.write_frame(frame, name='test_table', con=self.db, flavor='mysql')
        result = sql.tquery("select A from test_table", self.db)
        expected = frame.A
        result = Series(result, frame.index)
        tm.assert_series_equal(result, expected)

        try:
            sys.stdout = StringIO()
            self.assertRaises(MySQLdb.ProgrammingError,
                              sql.tquery,
                              'select * from blah',
                              con=self.db)

            self.assertRaises(MySQLdb.ProgrammingError,
                              sql.tquery,
                              'select * from blah',
                              con=self.db,
                              retry=True)
        finally:
            sys.stdout = sys.__stdout__
示例#29
0
    def test_execute_closed_connection(self):
        _skip_if_no_MySQLdb()
        drop_sql = "DROP TABLE IF EXISTS test"
        create_sql = """
        CREATE TABLE test
        (
        a TEXT,
        b TEXT,
        c REAL,
        PRIMARY KEY (a(5), b(5))
        );
        """
        cur = self.db.cursor()
        cur.execute(drop_sql)
        cur.execute(create_sql)

        sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.db)
        self.db.close()
        try:
            sys.stdout = StringIO()
            self.assertRaises(Exception,
                              sql.tquery,
                              "select * from test",
                              con=self.db)
        finally:
            sys.stdout = sys.__stdout__
示例#30
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     # Redirect output to a queue
     self.queue = StringIO()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoder = codecs.getincrementalencoder(encoding)()
     self.quoting = kwds.get("quoting", None)
示例#31
0
    def test_parse_booleans(self):
        data = 'True\nFalse\nTrue\nTrue'

        reader = TextReader(StringIO(data), header=None)
        result = reader.read()

        self.assert_(result[0].dtype == np.bool_)
示例#32
0
文件: data.py 项目: zkluo1/pandas
def get_data_famafrench(name, start=None, end=None):
    start, end = _sanitize_dates(start, end)

    # path of zip files
    zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"

    url = urllib.urlopen(zipFileURL + name + ".zip")
    zipfile = ZipFile(StringIO(url.read()))
    data = zipfile.open(name + ".txt").readlines()

    file_edges = np.where(np.array([len(d) for d in data]) == 2)[0]

    datasets = {}
    for i in range(len(file_edges) - 1):
        dataset = [
            d.split() for d in data[(file_edges[i] + 1):file_edges[i + 1]]
        ]
        if (len(dataset) > 10):
            ncol = np.median(np.array([len(d) for d in dataset]))
            header_index = np.where(
                np.array([len(d) for d in dataset]) == (ncol - 1))[0][-1]
            header = dataset[header_index]
            # to ensure the header is unique
            header = [str(j + 1) + " " + header[j] for j in range(len(header))]
            index = np.array([d[0] for d in dataset[(header_index + 1):]],
                             dtype=int)
            dataset = np.array([d[1:] for d in dataset[(header_index + 1):]],
                               dtype=float)
            datasets[i] = DataFrame(dataset, index, columns=header)

    return datasets
示例#33
0
文件: data.py 项目: zkluo1/pandas
def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0):
    """
    Get historical data for the given name from yahoo.
    Date format is datetime

    Returns a DataFrame.
    """
    start, end = _sanitize_dates(start, end)

    if (name is None):
        print "Need to provide a name"
        return None

    yahoo_URL = 'http://ichart.yahoo.com/table.csv?'

    url = yahoo_URL + 's=%s' % name + \
      '&a=%s' % (start.month - 1) + \
      '&b=%s' % start.day + \
      '&c=%s' % start.year + \
      '&d=%s' % (end.month - 1) + \
      '&e=%s' % end.day + \
      '&f=%s' % end.year + \
      '&g=d' + \
      '&ignore=.csv'
    for i in range(0, retry_count):
        resp = urllib2.urlopen(url)
        if resp.code == 200:
            lines = resp.read()
            rs = read_csv(StringIO(bytes_to_str(lines)),
                          index_col=0,
                          parse_dates=True)
            return rs[::-1]
        time.sleep(pause)
    raise Exception("after %d tries, Yahoo did not return a 200 for url %s" %
                    (pause, url))
示例#34
0
    def test_verbose_import(self):
        text = """a,b,c,d
one,1,2,3
one,1,2,3
,1,2,3
one,1,2,3
,1,2,3
,1,2,3
one,1,2,3
two,1,2,3"""

        buf = StringIO()
        sys.stdout = buf

        try:
            # it works!
            df = read_csv(StringIO(text), verbose=True)
            self.assert_(buf.getvalue() == 'Filled 3 NA values in column a\n')
        finally:
            sys.stdout = sys.__stdout__

        buf = StringIO()
        sys.stdout = buf

        text = """a,b,c,d
one,1,2,3
two,1,2,3
three,1,2,3
four,1,2,3
five,1,2,3
,1,2,3
seven,1,2,3
eight,1,2,3"""

        try:
            # it works!
            df = read_csv(StringIO(text), verbose=True, index_col=0)
            self.assert_(buf.getvalue() == 'Found 1 NA values in the index\n')
        finally:
            sys.stdout = sys.__stdout__
示例#35
0
def generate_put_functions():
    function_list = [
        ('float64', 'float64_t', 'object'),
        ('float64', 'float64_t', 'float64_t'),
        ('object', 'object', 'object'),
        ('int32', 'int32_t', 'int64_t'),
        ('int32', 'int32_t', 'float64_t'),
        ('int32', 'int32_t', 'object'),
        ('int64', 'int64_t', 'int64_t'),
        ('int64', 'int64_t', 'float64_t'),
        ('int64', 'int64_t', 'object'),
        ('bool', 'uint8_t', 'uint8_t'),
        ('bool', 'uint8_t', 'object')
    ]

    output = StringIO()
    for name, c_type, dest_type in function_list:
        func = put2d_template % {'name' : name, 'c_type' : c_type,
                                 'dest_type' : dest_type.replace('_t', ''),
                                 'dest_type2' : dest_type}
        output.write(func)
    return output.getvalue()
示例#36
0
def generate_ensure_dtypes():
    output = StringIO()
    for name, ctype, dtype in ensure_functions:
        filled = ensure_dtype_template % locals()
        output.write(filled)
    return output.getvalue()
示例#37
0
    def _coef_table(self):
        buffer = StringIO()
        buffer.write('%13s %13s %13s %13s %13s %13s\n' %
                    ('Variable', 'Beta', 'Std Err', 't-stat', 'CI 2.5%', 'CI 97.5%'))
        template = '%13s %13.4f %13.4f %13.2f %13.4f %13.4f\n'

        for i, name in enumerate(self._cols):
            if i and not (i % 5):
                buffer.write('\n' + common.banner(''))

            mean_beta = self._results['mean_beta'][i]
            std_beta = self._results['std_beta'][i]
            t_stat = self._results['t_stat'][i]
            ci1 = mean_beta - 1.96 * std_beta
            ci2 = mean_beta + 1.96 * std_beta

            values = '(%s)' % name, mean_beta, std_beta, t_stat, ci1, ci2

            buffer.write(template % values)

        if self._nw_lags_beta is not None:
            buffer.write('\n')
            buffer.write('*** The Std Err, t-stat are Newey-West '
                         'adjusted with Lags %5d\n' % self._nw_lags_beta)

        return buffer.getvalue()