示例#1
0
def test_encode_df():
    labels = {
        'int': 7,
        'str': 'wassup?',
    }

    df = pd.read_csv('{}/weather.csv'.format(here))
    df['STATION_CAT'] = df['STATION'].astype('category')
    df['WDF2_F'] = df['WDF2'].astype(np.float)
    msg = pbutils.df2msg(df, labels)

    names = [col.name for col in msg.columns]
    assert set(names) == set(df.columns), 'columns mismatch'
    assert not msg.indices, 'has index'
    assert pbutils.pb2py(msg.labels) == labels, 'lables mismatch'

    # Now with index
    index_name = 'DATE'
    df = df.set_index(index_name)
    msg = pbutils.df2msg(df, None)

    names = [col.name for col in msg.columns]
    assert set(names) == set(df.columns), 'columns mismatch'
    assert msg.indices, 'no index'
    assert msg.indices[0].name == index_name, 'bad index name'
示例#2
0
def test_index_cols():
    cols = list('abcdef')
    size = 10
    df = pd.DataFrame({col: np.random.rand(size) for col in cols})

    index_cols = np.random.choice(cols, size=2)
    cols = set(col for col in cols if col not in index_cols)
    msg = pbutils.df2msg(df, index_cols=index_cols)
    assert set(col.name for col in msg.columns) == cols, 'bad columns'
    assert set(col.name for col in msg.indices) == set(index_cols), \
        'bad indices'
示例#3
0
def test_multi_index():
    tuples = [('bar', 'one'), ('bar', 'two'), ('baz', 'one'), ('baz', 'two'),
              ('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')]
    index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
    df = pd.DataFrame(index=index)
    df['x'] = range(len(df))

    data = pbutils.df2msg(df).SerializeToString()
    msg = fpb.Frame.FromString(data)

    for col in msg.indices:
        values = col.strings
        assert len(values) == len(df), 'bad index length'
示例#4
0
    def _read(self, *args, **kw):
        io = BytesIO()
        for df in self.data:
            data = df2msg(df, None).SerializeToString()
            io.write(struct.pack(http.header_fmt, len(data)))
            io.write(data)

        io.seek(0, 0)

        class Response:
            raw = io
            ok = True

        return Response
示例#5
0
def test_encode_df():
    labels = {
        'int': 7,
        'str': 'wassup?',
    }

    df = pd.read_csv('{}/weather.csv'.format(here))
    msg = pbutils.df2msg(df, labels)

    names = [col.name for col in msg.columns]
    assert set(names) == set(df.columns), 'columns mismatch'
    assert not msg.indices, 'has index'
    assert pbutils.pb2py(msg.labels) == labels, 'lables mismatch'

    # Now with index
    index_name = 'DATE'
    df.index = df.pop(index_name)
    msg = pbutils.df2msg(df, None)

    names = [col.name for col in msg.columns]
    assert set(names) == set(df.columns), 'columns mismatch'
    assert msg.indices, 'no index'
    assert msg.indices[0].name == index_name, 'bad index name'