Exemplo n.º 1
1
class TestDora(unittest.TestCase):
  def setUp(self):
    self.dora = Dora()
    self.dora.configure(output = 'A', data = './spec_data.csv')

  def test_configure(self):
    data = pd.read_csv('./spec_data.csv')
    self.assertEqual(self.dora.output, 'A')
    self.assertTrue(self.dora.data.equals(data))

  def test_remove_feature(self):
    self.dora.remove_feature('useless_feature')
    self.assertFalse('useless_feature' in self.dora.data.columns)

  def test_extract_feature(self):
    self.dora.extract_feature(
      'useless_feature',
      'another_useless_feature',
      lambda x: x * 2
    )

    actual_column = list(self.dora.data['another_useless_feature'])
    expected_column = [2, 2, 2]
    self.assertEqual(actual_column, expected_column)

  def test_impute_missing_values(self):
    del self.dora.data['D']
    self.dora.impute_missing_values()

    actual_column = list(self.dora.data['B'])
    expected_column = [2.0, 5.0, 8.0]
    self.assertEqual(actual_column, expected_column)

  def test_scale_input_values(self):
    del self.dora.data['D'], self.dora.data['B']
    self.dora.scale_input_values()

    actual_column = list(self.dora.data['C'])
    expected_column = [-1.224745, 0.0, 1.224745]
    pairwise_diffs = map(
      lambda actual, expected: abs(actual - expected),
      actual_column,
      expected_column
    )
    total_diff = sum(pairwise_diffs)
    self.assertAlmostEqual(total_diff, 0, places = 6)

  def test_extract_ordinal_feature(self):
    self.dora.extract_ordinal_feature('D')
    features = self.dora.data.columns
    self.assertTrue('D=left' in features and 'D=right' in features)

  def test_input_columns(self):
    actual_input_columns = list(self.dora.input_columns())
    expected_input_columns = list(self.dora.data.columns)
    expected_input_columns.remove(self.dora.output)
    self.assertEqual(actual_input_columns, expected_input_columns)

  def test_logs(self):
    self.dora.extract_ordinal_feature('D')
    self.dora.impute_missing_values()
    self.dora.scale_input_values()

    actual_logs = self.dora.logs
    expected_logs = [
      "self.extract_ordinal_feature('D')",
       'self.impute_missing_values()',
       'self.scale_input_values()'
    ]
    self.assertEqual(actual_logs, expected_logs)

  def test_snapshots(self):
    self.dora.snapshot('start')
    self.dora.extract_ordinal_feature('D')
    self.dora.use_snapshot('start')

    self.assertEqual(self.dora.logs, [])
    self.assertTrue(self.dora.data.equals(self.dora.initial_data))
Exemplo n.º 2
0
 def setUp(self):
   self.dora = Dora()
   self.dora.configure(output = 'A', data = './spec_data.csv')