Exemplo n.º 1
0
 def test_flag_null(self):
     """Detect an column that is mostly null except for a small
     percent of rows
     """
     col = pd.Series([None] * 18 + [1] * 2)
     (cts, disquals) = ops.col_type(col)
     self.assertHasColType('flag null', cts, disquals)        
Exemplo n.º 2
0
 def test_num_accounting(self):
     """Detect a numeric value that looks like accounting totals
     """
     col = pd.Series([0,0,0,100,110, 1000, 1_000_000,
                      50_000, 0, 7000])
     (cts, disquals) = ops.col_type(col)
     self.assertHasColType('num accounting', cts, disquals)
Exemplo n.º 3
0
 def test_date_reg(self):
     """Detect a date field with regularly spaced values
     """
     col = pd.Series([datetime.date(2020, i, 1) \
                      for i in range(1, 7)])
     (cts, disquals) = ops.col_type(col)
     self.assertHasColType(ops.DateRegularColumn.label,
                           cts, disquals)        
Exemplo n.º 4
0
 def test_text(self):
     """Detect a column with free form text
     """
     col = pd.Series([
         'this is a comment',
         None,
         'this is a comment',
         'this is a much longer comment that contains more words',
         ''])
     cts = ops.col_type(col)
     self.assertColType(cts, ['text'])
Exemplo n.º 5
0
 def test_date_irreg(self):
     """Detect a date field with irregularly spaced values
     """
     col = pd.Series([
         datetime.date(2020, 1, 15),
         datetime.date(2020, 1, 27),
         datetime.date(2020, 2, 1),
         datetime.date(2020, 9, 30),
         datetime.date(2020, 10, 31),
         ])        
     (cts, disquals) = ops.col_type(col)
     self.assertNotColType(ops.DateRegularColumn.label,
                           cts, disquals)
Exemplo n.º 6
0
 def test_flag(self):
     """Detect a column with two values, a common and a rare
     """
     col = pd.Series(['Y'] * 17 + ['N'] * 3)
     (cts, disquals) = ops.col_type(col)
     self.assertHasColType('flag', cts, disquals)
Exemplo n.º 7
0
 def test_num_long_tail(self):
     """Detect an numeric column with long tail distribution
     """
     col = pd.Series([1,1,1,1,1,2,2,2,2,3,3,3,4,4,5,5,6,6,8,10])
     (cts, disquals) = ops.col_type(col)
     self.assertHasColType('num long tail', cts, disquals)        
Exemplo n.º 8
0
 def test_num_normal(self):
     """Detect an normally distributed numeric column
     """
     col = pd.Series([1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,7,7,8,11])
     (cts, disquals) = ops.col_type(col)
     self.assertHasColType('num normal', cts, disquals)
Exemplo n.º 9
0
 def test_categorical_num(self):
     """Detect a categorical field with numerical values
     """
     col = pd.Series([10] * 3 + [20])
     (cts, disquals) = ops.col_type(col)
     self.assertHasColType('categorical', cts, disquals)
Exemplo n.º 10
0
 def test_categorical_alpha(self):
     """Detect a categorical field with string values
     """
     col = pd.Series(['cat'] * 3 + ['dog'])
     (cts, disquals) = ops.col_type(col)
     self.assertOnlyColType('categorical', cts, disquals)
Exemplo n.º 11
0
 def test_id(self):
     """Detect an ID column
     """
     col = pd.Series([1,2,3,4,5])
     (cts, disquals) = ops.col_type(col)
     self.assertHasColType('id', cts, disquals)