Пример #1
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load_pandas().data
    dta4 = dta.copy()
    for col in ('year', 'quarter'):
        dta[col] = dta[col].astype(np.int64)
        dta4[col] = dta4[col].astype(np.int32)
    # dta is int64 'i8'  given to Stata writer
    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta)

    with warnings.catch_warnings(record=True) as w:
        writer.write_file()
        assert len(w) == 0
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        assert_frame_equal(dta.reset_index(), dta5)
    else:
        # do not check index because it has different size, int32 versus int64
        assert_frame_equal(dta4, dta5[dta5.columns[1:]])
Пример #2
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load_pandas().data
    dta4 = dta.copy()
    for col in ('year','quarter'):
        dta[col] = dta[col].astype(np.int64)
        dta4[col] = dta4[col].astype(np.int32)
    # dta is int64 'i8'  given to Stata writer
    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta)

    with warnings.catch_warnings(record=True) as w:
        writer.write_file()
        assert len(w) == 0
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
Пример #3
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    dta = dta.astype(np.dtype([('year', int),
                               ('quarter', int)] + dtype.descr[2:]))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
Пример #4
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    dta = dta.astype(
        np.dtype([('year', int), ('quarter', int)] + dtype.descr[2:]))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
Пример #5
0
def test_stata_writer_array():
    buf = BytesIO()
    dta = macrodata.load().data
    dta = DataFrame.from_records(dta)
    dta.columns = ["v%d" % i for i in range(1, 15)]
    writer = StataWriter(buf, dta.values)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta = dta.to_records(index=False)
    assert_array_equal(dta, dta2)
Пример #6
0
def test_stata_writer_array():
    buf = BytesIO()
    dta = macrodata.load().data
    dta = DataFrame.from_records(dta)
    dta.columns = ["v%d" % i for i in range(1,15)]
    writer = StataWriter(buf, dta.values)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta = dta.to_records(index=False)
    assert_array_equal(dta, dta2)
Пример #7
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    ptesting.assert_frame_equal(dta.reset_index(), DataFrame.from_records(dta2))
Пример #8
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load(as_pandas=False).data
    dtype = dta.dtype
    dt = [('year', int), ('quarter', int)] + dtype.descr[2:]
    if not PY3:  # Remove unicode
        dt = [(name.encode('ascii'), typ) for name, typ in dt]
    dta = dta.astype(np.dtype(dt))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
Пример #9
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load(as_pandas=False).data
    dtype = dta.dtype
    dt = [('year', int), ('quarter', int)] + dtype.descr[2:]
    if not PY3:  # Remove unicode
        dt = [(name.encode('ascii'), typ) for name, typ in dt]
    dta = dta.astype(np.dtype(dt))
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_array_equal(dta, dta2)
Пример #10
0
def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2), (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                   dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2": "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2": "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf, pandas=True)

    assert_frame_equal(dta, dta2.drop('index', axis=1))
Пример #11
0
def test_missing_roundtrip():
    buf = StringIO()
    dta = np.array([(np.nan, np.inf, "")],
                      dtype=[("double_miss", float), ("float_miss", np.float32),
                              ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == "")

    dta = genfromdta("./data_missing.dta", missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
Пример #12
0
def test_missing_roundtrip():
    buf = BytesIO()
    dta = np.array([(np.nan, np.inf, "")],
                   dtype=[("double_miss", float), ("float_miss", np.float32),
                          ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == asbytes(""))

    dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
                     missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
Пример #13
0
def test_stata_writer_structured():
    buf = BytesIO()
    dta = macrodata.load(as_pandas=False).data
    dtype = dta.dtype
    dt = [('year', int), ('quarter', int)] + dtype.descr[2:]
    dta = dta.astype(np.dtype(dt))

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta)

    writer.write_file()
    buf.seek(0)
    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    assert_array_equal(dta, dta2)
Пример #14
0
def test_missing_roundtrip():
    buf = BytesIO()
    dta = np.array([(np.nan, np.inf, "")],
                      dtype=[("double_miss", float), ("float_miss", np.float32),
                              ("string_miss", "a1")])
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta = genfromdta(buf, missing_flt=np.nan)
    assert_(isnull(dta[0][0]))
    assert_(isnull(dta[0][1]))
    assert_(dta[0][2] == asbytes(""))

    dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
            missing_flt=-999)
    assert_(np.all([dta[0][i] == -999 for i in range(5)]))
Пример #15
0
def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2),
                    (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                    dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2" : "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2" : "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf, pandas=True)

    ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
Пример #16
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(
        np.dtype([('year', 'i8'), ('quarter', 'i8')] + dtype.descr[2:]))
    dta4 = dta.astype(
        np.dtype([('year', 'i4'), ('quarter', 'i4')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    dta4 = DataFrame.from_records(dta4)
    # dta is int64 'i8'  given to Stata writer
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
Пример #17
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta4 = dta.astype(np.dtype([('year', 'i4'),
                               ('quarter', 'i4')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    dta4 = DataFrame.from_records(dta4)
    # dta is int64 'i8'  given to Stata writer
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])