Exemplo n.º 1
0
def test_set_index_num():
    e = {
        'cowq': [1, 2, 3, 4.0, 10],
        'col2':
        pd.Categorical([3, 4, 5, 6, 0]),
        'col3': ['q', '2', 'c', '4', 'x'],
        'date':
        pd.Series([
            '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994',
            '12-12-2012'
        ],
                  dtype='datetime64[ns]')
    }
    g = data.Frame(e)

    op = SetIndex()
    assert op.getOptions() == {'selected': dict()}
    ops = {'selected': {0: None}}
    op.setOptions(**ops)

    assert op.getOptions() == ops
    assert isDictDeepCopy(op.getOptions(), ops)

    assert op.getOutputShape() is None

    op.addInputShape(g.shape, 0)
    s = Shape()
    s.colNames = ['col3', 'col2', 'date']
    s.colTypes = [Types.String, Types.Nominal, Types.Datetime]
    s.index = ['cowq']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s

    h = op.execute(g)
    assert h.shape == s

    # Reset index

    op = ResetIndex()
    assert op.getOutputShape() is None
    op.addInputShape(h.shape, 0)
    s = Shape()
    s.colNames = ['cowq', 'col2', 'date', 'col3']
    s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime, Types.String]
    s.index = ['Unnamed']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s
    j = op.execute(h)
    assert j.shape == s
Exemplo n.º 2
0
def test_fillnan_ffill():
    e = {'col1': [np.nan, 2, np.nan, 4, 10],
         'col2': pd.Categorical(['3', '4', np.nan, np.nan, '0'], ordered=True),
         'col3': ['q', '2', 'c', np.nan, np.nan],
         'date': pd.Series(['05-09-1988', np.nan, np.nan, '22-06-1994', '12-12-2012'],
                           dtype='datetime64[ns]')}
    g = data.Frame(e)

    g = g.setIndex('col1')

    op = FillNan()
    assert op.getOutputShape() is None
    op.addInputShape(g.shape, 0)
    op.setOptions(selected={0: None, 1: None, 2: None}, fillMode='ffill')
    assert op.getOptions() == {
        'selected': {0: None, 1: None, 2: None},
        'fillMode': 'ffill'
    }

    s = Shape()
    s.colNames = ['col3', 'col2', 'date']
    s.colTypes = [Types.String, Types.Ordinal, Types.Datetime]
    s.index = ['col1']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s

    h = op.execute(g)
    assert h.shape == s

    assert mapDate(roundValues(nan_to_None(h.to_dict()), decimals=3)) == {
        'col3': ['q', '2', 'c', 'c', 'c'],
        'col2': ['3', '4', '4', '4', '0'],
        'date': [t.strftime(format='%Y-%m-%d') if not pd.isna(t) else '1988-05-09'
                 for t in e['date']]
    }
Exemplo n.º 3
0
def test_fromShape_categories():
    d = {
        'col1': [1, 2, 3, 4.0, 10],
        'col2':
        pd.Categorical([3, 4, 5, 6, 0]),
        'col3':
        pd.Categorical(['q', '2', 'c', '4', 'x'], ordered=True),
        'cold':
        pd.Series([
            '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994',
            '12-12-2012'
        ],
                  dtype='datetime64[ns]')
    }
    f = Frame(d)
    f = f.setIndex(['col2', 'col3', 'col1'])

    g = Frame.fromShape(f.shape)

    s = Shape()
    # fromShape does preserve index
    s.colNames = ['cold']
    s.colTypes = [Types.Datetime]
    s.index = ['col3', 'col1', 'col2']
    s.indexTypes = [
        IndexType(Types.Ordinal),
        IndexType(Types.Numeric),
        IndexType(Types.Nominal)
    ]
    assert g.shape == s == f.shape
Exemplo n.º 4
0
def test_set_index_string():
    e = {
        'cowq': [1, 2, 3, 4.0, 10],
        'col2':
        pd.Categorical(['3', 4, 5, 6, 0]),
        'col3': ['q', '2', 'c', '4', 'x'],
        'date':
        pd.Series([
            '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994',
            '12-12-2012'
        ],
                  dtype='datetime64[ns]')
    }
    g = data.Frame(e)

    op = SetIndex()
    op.setOptions(selected={2: None})

    assert op.getOutputShape() is None

    op.addInputShape(g.shape, 0)
    s = Shape()
    s.colNames = ['cowq', 'col2', 'date']
    s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime]
    s.index = ['col3']
    s.indexTypes = [IndexType(Types.String)]
    os = op.getOutputShape()
    assert os == s

    h = op.execute(g)
    hs = h.shape
    assert hs == s

    # Reset index

    op = ResetIndex()
    assert op.getOutputShape() is None
    op.addInputShape(h.shape, 0)
    s = Shape()
    s.colNames = ['cowq', 'col2', 'date', 'col3']
    s.colTypes = [Types.Numeric, Types.Nominal, Types.Datetime, Types.String]
    s.index = ['Unnamed']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s
    j = op.execute(h)
    assert j.shape == s
Exemplo n.º 5
0
def test_shape():
    d = {
        'col1': [1, 2, 3, 4, 10],
        'col2': [3, 4, 5, 6, 0],
        'col3': ['q', '2', 'c', '4', 'x']
    }
    f = Frame(d)

    s = Shape()
    s.index = ['Unnamed']
    s.indexTypes = [IndexType(Types.Numeric)]
    s.colNames = ['col1', 'col2', 'col3']
    s.colTypes = [Types.Numeric, Types.Numeric, Types.String]

    assert f.shape == s
    assert f.nRows == 5
Exemplo n.º 6
0
def test_fillnan_byVal_date_num():
    e = {'col1': [np.nan, 2, np.nan, 4, 10],
         'col2': pd.Categorical(['3', '4', np.nan, np.nan, '0'], ordered=True),
         'col3': ['q', '2', 'c', np.nan, np.nan],
         'date': pd.Series(['05-09-1988', np.nan, np.nan, '22-06-1994', '12-12-2012'],
                           dtype='datetime64[ns]'),
         'col4': [np.nan, 2, np.nan, 4, 10]}
    g = data.Frame(e)

    g = g.setIndex('col1')

    op = FillNan()
    assert op.getOutputShape() is None
    op.addInputShape(g.shape, 0)
    with pytest.raises(OptionValidationError):
        op.setOptions(selected={0: {'fill': 'pol'}, 1: {'fill': '23'},  # wrong
                                2: {'fill': '1966-04-02 00:00:30'},
                                3: {'fill': 'march'}},  # wrong
                      fillMode='value')

    op.setOptions(selected={2: {'fill': '1966-04-02 00:00:30'},
                            3: {'fill': '0.9'}},
                  fillMode='value')

    assert op.getOptions() == {
        'selected': {2: {'fill': '1966-04-02 00:00:30'},
                     3: {'fill': '0.9'}},
        'fillMode': 'value'
    }

    s = Shape()
    s.colNames = ['col3', 'col2', 'date', 'col4']
    s.colTypes = [Types.String, Types.Ordinal, Types.Datetime, Types.Numeric]
    s.index = ['col1']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert op.getOutputShape() == s

    h = op.execute(g)
    assert h.shape == s

    assert mapDate(roundValues(nan_to_None(h.to_dict()), decimals=3)) == {
        'col3': ['q', '2', 'c', None, None],
        'col2': ['3', '4', None, None, '0'],
        'date': [t.strftime(format='%Y-%m-%d') if not pd.isna(t) else '1966-04-02'
                 for t in e['date']],
        'col4': [0.9, 2.0, 0.9, 4.0, 10.0]
    }
Exemplo n.º 7
0
def test_shape_index():
    d = {
        'col1': [1, 2, 3, 4.0, 10],
        'col2': [3, 4, 5, 6, 0],
        'col3': ['q', '2', 'c', '4', 'x']
    }
    f = Frame(d)
    f = Frame(f.getRawFrame().set_index('col3'))

    # Desired shape obj
    s = Shape()
    s.index = ['col3']
    s.indexTypes = [IndexType(Types.String)]
    s.colNames = ['col1', 'col2']
    s.colTypes = [Types.Numeric, Types.Numeric]

    assert f.shape == s
    assert f.nRows == 5
Exemplo n.º 8
0
def test_cloneShape():
    s = Shape()
    s.colNames = ['cold']
    s.colTypes = [Types.Datetime]
    s.index = ['col3', 'col1', 'col2']
    s.indexTypes = [
        IndexType(Types.Ordinal),
        IndexType(Types.Numeric),
        IndexType(Types.Nominal)
    ]
    sColDict = s.columnsDict
    sIndexDict = s.indexDict

    sc = s.clone()
    sc.index.append('col4')
    sc.indexTypes.append(IndexType(Types.Numeric))
    sc.colTypes[0] = Types.Ordinal
    sc.colNames[0] = 'col_new'
    assert sc != s
    assert s.columnsDict == {'cold': Types.Datetime}
    assert sc.columnsDict == {'col_new': Types.Ordinal}
    assert s.columnsDict == sColDict
    assert s.indexDict == sIndexDict
Exemplo n.º 9
0
def test_fromShape_single_index():
    d = {
        'col1': [1, 2, 3, 4.0, 10],
        'col2': [3, 4, 5, 6, 0],
        'col3': ['q', '2', 'c', '4', 'x'],
        'cold':
        pd.Series([
            '05-09-1988', '22-12-1994', '21-11-1995', '22-06-1994',
            '12-12-2012'
        ],
                  dtype='datetime64[ns]')
    }
    f = Frame(d)
    f = f.setIndex('col1')

    g = Frame.fromShape(f.shape)

    s = Shape()
    # fromShape does preserve index
    s.colNames = ['cold', 'col2', 'col3']
    s.colTypes = [Types.Datetime, Types.Numeric, Types.String]
    s.index = ['col1']
    s.indexTypes = [IndexType(Types.Numeric)]
    assert g.shape == s == f.shape