def test_array_td(self): a = np.array([10, 20, 30], dtype='timedelta64[Y]') b = np.array([1, 2, 3], dtype='timedelta64[Y]') arr = np.column_stack((a, b)).astype('timedelta64[ns]') df1 = dx.DataFrame({'a': a, 'b': b}) assert_array_equal(arr, df1._data['m']) assert df1._column_info['a'].values == ('m', 0, 0) assert df1._column_info['b'].values == ('m', 1, 1)
def test_array_bool(self): a = np.array([True, False, True]) b = np.array([False, False, False]) arr = np.column_stack((a, b)).astype('int8') df1 = dx.DataFrame({'a': a, 'b': b}) assert_array_equal(arr, df1._data['b']) assert df1._column_info['a'].values == ('b', 0, 0) assert df1._column_info['b'].values == ('b', 1, 1)
def test_array_dt(self): a = np.array([10, 20, 30], dtype='datetime64[ns]') b = np.array([100, 200, 300], dtype='datetime64[ns]') arr = np.column_stack((a, b)) df1 = dx.DataFrame({'a': a, 'b': b}) assert_array_equal(arr, df1._data['M']) assert df1._column_info['a'].values == ('M', 0, 0) assert df1._column_info['b'].values == ('M', 1, 1)
def test_array_string(self): a = np.array(['asdf', 'wer']) b = np.array(['wyw', 'xcvd']) df1 = dx.DataFrame({'a': a, 'b': b}) a1 = array([[1, 1], [2, 2]], dtype='uint32') assert_array_equal(a1, df1._data['S']) assert df1._column_info['a'].values == ('S', 0, 0) assert df1._column_info['b'].values == ('S', 1, 1)
def test_array_float(self): a = np.array([1.1, 2, 3]) b = np.array([10, 20.2, 30]) arr = np.column_stack((a, b)) df1 = dx.DataFrame({'a': a, 'b': b}) assert_array_equal(arr, df1._data['f']) assert df1._column_info['a'].values == ('f', 0, 0) assert df1._column_info['b'].values == ('f', 1, 1)
def test_to_dict(self): d1 = self.df1.to_dict('array') d2 = { 'a': np.array([1, 5, 7, 11]), 'b': np.array([nan, 5.4, -1.1, .045]) } for key, arr in d1.items(): assert_array_equal(arr, d2[key]) d1 = self.df1.to_dict('list') d2 = {'a': [1, 5, 7, 11], 'b': [nan, 5.4, -1.1, .045]} assert_dict_list(d1, d2)
def test_streak_group(self): df = de.DataFrame( {'AIRLINE': ['AA', 'AA', 'AA', 'UA', 'DL', 'DL', 'WN', 'WN', 'AA', 'AA', None], 'DAY_OF_WEEK': [2, 3, 6, 6, 6, 6, 4, 4, 1, 6, 6], 'DEPARTURE_DELAY': [nan, nan, -1.0, -1.0, -1.0, 22.0, 3.0, 3.0, 21.0, -2.0, nan]}) arr1 = df.streak('AIRLINE', group=True) arr2 = array([1, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6]) assert_array_equal(arr1, arr2) arr1 = df.streak('DEPARTURE_DELAY', group=True) arr2 = array([1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8]) assert_array_equal(arr1, arr2)
def test_streak_value(self): df = de.DataFrame( {'AIRLINE': ['AA', 'AA', 'AA', 'UA', 'DL', 'DL', 'WN', 'WN', 'WN', 'AS', None], 'DAY_OF_WEEK': [2, 3, 6, 6, 6, 6, 4, 4, 1, 2, 2], 'DEPARTURE_DELAY': [nan, nan, -1.0, -1.0, -1.0, 22.0, 3.0, 3.0, 21.0, -2.0, nan]}) with pytest.raises(TypeError): df.streak('DEPARTURE_DELAY', 'AA') arr1 = df.streak('DEPARTURE_DELAY', -1) arr2 = array([0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0]) assert_array_equal(arr1, arr2) arr1 = df.streak('DAY_OF_WEEK', 6) arr2 = array([0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0]) assert_array_equal(arr1, arr2)
def test_get_values(self): values1 = self.df1.values values2 = np.array([[1, 5, 7, 11], [nan, 5.4, -1.1, .045]]).T assert_array_equal(values1, values2) a = np.random.rand(100, 5) df = dx.DataFrame(a) assert_array_equal(df.values, a) values1 = self.df2.values values2 = np.array([[1, 5, 7, 11], [nan, 5.4, -1.1, .045], ['ted', 'fred', 'ted', 'fred']], dtype='O').T assert_array_equal(values1, values2)
def test_streak(self): df = de.DataFrame( {'AIRLINE': ['AA', 'AA', 'AA', 'UA', 'DL', 'DL', 'WN', 'WN', 'WN', 'AS', None], 'DAY_OF_WEEK': [2, 3, 6, 6, 6, 6, 4, 4, 1, 2, 2], 'DEPARTURE_DELAY': [nan, nan, -1.0, -1.0, -1.0, 22.0, 3.0, 3.0, 21.0, -2.0, nan]}) arr1 = df.streak('AIRLINE') arr2 = array([1, 2, 3, 1, 1, 2, 1, 2, 3, 1, 1]) assert_array_equal(arr1, arr2) arr1 = df.streak('DAY_OF_WEEK') arr2 = array([1, 1, 1, 2, 3, 4, 1, 2, 1, 1, 2]) assert_array_equal(arr1, arr2) arr1 = df.streak('DEPARTURE_DELAY') arr2 = array([1, 1, 1, 2, 3, 1, 1, 2, 1, 1, 1]) assert_array_equal(arr1, arr2)
def test_single_list_string(self): a = np.array(['a', 'b']) df1 = dx.DataFrame({'a': a.tolist()}) a1 = array([1, 2], dtype='uint32') assert_array_equal(a1, df1._data['S'][:, 0]) assert df1._column_info['a'].values == ('S', 0, 0)
def test_all(self): assert_array_equal(np.array(a), df_mix._data['i'][:, 0]) assert_array_equal(np.array(b), df_mix._data['f'][:, 0]) a1 = array([1, 2, 3, 4, 5, 6, 7, 8], dtype='uint32') assert_array_equal(a1, df_mix._data['S'][:, 0]) assert_array_equal(np.array(d).astype('int8'), df_mix._data['b'][:, 0]) assert_array_equal(np.array(e, dtype='datetime64[ns]'), df_mix._data['M'][:, 0]) assert_array_equal(np.array(f, dtype='timedelta64[ns]'), df_mix._data['m'][:, 0]) assert df_mix._column_info['a'].values == ('i', 0, 0) assert df_mix._column_info['b'].values == ('f', 0, 1) assert df_mix._column_info['c'].values == ('S', 0, 2) assert df_mix._column_info['d'].values == ('b', 0, 3) assert df_mix._column_info['e'].values == ('M', 0, 4) assert df_mix._column_info['f'].values == ('m', 0, 5)
def test_single_array_bool(self): a = np.array([True, False]) df1 = dx.DataFrame(a) assert_array_equal(a.astype('int8'), df1._data['b'][:, 0]) assert df1._column_info['a0'].values == ('b', 0, 0)
def test_single_array_float(self): a = np.array([1, 2.5, 3.2]) df1 = dx.DataFrame(a) assert_array_equal(a, df1._data['f'][:, 0]) assert df1._column_info['a0'].values == ('f', 0, 0)
def test_single_array_int(self): a = np.array([1, 2, 3]) df1 = dx.DataFrame({'a': a}) assert_array_equal(a, df1._data['i'][:, 0]) assert df1._column_info['a'].values == ('i', 0, 0)
def test_all(self): assert_array_equal(a1, df_mix2._data['i'][:, 0]) assert_array_equal(b1, df_mix2._data['f'][:, 0]) arr1 = array([1, 2, 3, 4, 5, 6, 7, 8], dtype='uint32') assert_array_equal(arr1, df_mix2._data['S'][:, 0]) assert_array_equal(d1.astype('int8'), df_mix2._data['b'][:, 0]) assert_array_equal(e1, df_mix2._data['M'][:, 0]) assert_array_equal(f1, df_mix2._data['m'][:, 0]) assert df_mix2._column_info['a0'].values == ('i', 0, 0) assert df_mix2._column_info['a1'].values == ('f', 0, 1) assert df_mix2._column_info['a2'].values == ('S', 0, 2) assert df_mix2._column_info['a3'].values == ('b', 0, 3) assert df_mix2._column_info['a4'].values == ('M', 0, 4) assert df_mix2._column_info['a5'].values == ('m', 0, 5)
def test_single_array_dt(self): a = np.array([10, 20, 30], dtype='datetime64[ns]') df1 = dx.DataFrame({'a': a}) assert_array_equal(a, df1._data['M'][:, 0]) assert df1._column_info['a'].values == ('M', 0, 0)
def test_single_array_td(self): a = np.array([10, 20, 30], dtype='timedelta64[Y]') df1 = dx.DataFrame({'a': a}) assert_array_equal(a.astype('timedelta64[ns]'), df1._data['m'][:, 0]) assert df1._column_info['a'].values == ('m', 0, 0)
def test_factorize(self): data = {'a': [9, 10, 9, 9, 10], 'b': [0, nan, nan, 0, 1], 'c': [''] + list('eeaz'), 'd': [False, False, True, False, True], 'e': [0, 20, 30, 4, 4], 'f': ['a', nan, 'ad', None, 'ad'], 'g': [np.nan] * 5} df = de.DataFrame(data) arr11, arr12 = df.factorize('a') arr21, arr22 = (array([0, 1, 0, 0, 1]), array([9, 10])) assert_array_equal(arr11, arr21) assert_array_equal(arr12, arr22) arr11, arr12 = df.factorize('b') arr21, arr22 = (array([0, 1, 1, 0, 2]), array([0., nan, 1.])) assert_array_equal(arr11, arr21) assert_array_equal(arr12, arr22) arr11, arr12 = df.factorize('c') arr21, arr22 = (array([0, 1, 1, 2, 3]), array(['', 'e', 'a', 'z'], dtype=object)) assert_array_equal(arr11, arr21) assert_array_equal(arr12, arr22) arr11, arr12 = df.factorize('d') arr21, arr22 = (array([0, 0, 1, 0, 1]), array([False, True])) assert_array_equal(arr11, arr21) assert_array_equal(arr12, arr22)
def test_single_list_td(self): a = [np.timedelta64(x, 'ns') for x in [10, 20, 30]] df1 = dx.DataFrame({'a': a}) assert_array_equal(np.array(a), df1._data['m'][:, 0]) assert df1._column_info['a'].values == ('m', 0, 0)