def setUp(self):
        self.names = ['a', 'b', 'c', 'd']
        self.dtypes = ['str', 'int', 'float', 'bool']

        self.task = task_factory(self.dtypes[0], self.names[0])
        self.tasks = [
            task_factory(args[0], args[1])
            for args in zip(self.dtypes, self.names)
        ]

        self.instance = Tasks(self.tasks)
示例#2
0
    def extract_tasks_from_df(df, instructions=None):
        """
        Extract tasks from dataframe by inferring task kind from dtypes.
        Instructions may be passed as a separate list.

        Limitations:
        - The str and regex tasks have the same dtype (object).
        - When introducing NA the dtype may have been promoted.
            This happens to boolean tasks that are nullable.
            The incorrect inference will be made that the task is str.
            See: https://pandas.pydata.org/pandas-docs/stable/user_guide/gotchas.html#na-type-promotions
        """

        args = []
        instructions = pd.Series(instructions, index=df.columns)
        tasks = pd.concat([df.dtypes, instructions], axis=1)
        tasks.columns = ['dtype', 'instruction']
        for task in tasks.itertuples():
            kwargs = {}
            name = task.Index
            for item in REGISTRY.values():
                if item.dtype == task.dtype.name:
                    kind = item.kind
                    if kind == 'category':
                        kwargs['categories'] = task.dtype.categories
                    break
            kwargs['instruction'] = task.instruction
            args.append((kind, name, kwargs))
        return [
            task_factory(kind, name, **kwargs) for kind, name, kwargs in args
        ]
 def test_inequality(self):
     self.names.insert(0, self.names.pop())
     self.dtypes.insert(0, self.dtypes.pop())
     output = [
         task_factory(args[0], args[1])
         for args in zip(self.dtypes, self.names)
     ]
     self.assertNotEqual(self.tasks, output)
示例#4
0
 def __setitem__(self, id, value):
     if isinstance(value, Task):
         if id != value.name:
             warnings.warn(f"The task name '{value.name}' "
                           f"does not match the id '{id}'. "
                           f"Task name is set to '{id}'.")
             value.name = id
     else:
         if isinstance(value, tuple):
             kind, *args, kwargs = value
             if isinstance(kwargs, dict):
                 value = task_factory(kind, id, *args, **kwargs)
             else:
                 value = task_factory(kind, id, *args, kwargs)
         else:
             value = task_factory(value, id)
     self.tasks[id] = value
     self._set_pos_in_tasks()
示例#5
0
 def test_dependency_from_tuple(self):
     condition = "`relevant` == True"
     value = None
     output = Dependency(condition, value)
     dependency = (condition, value)
     task = task_factory('str',
                         'topic',
                         nullable=True,
                         dependencies=dependency)
     self.assertEqual(task.dependencies[0], output)
    def setUp(self):
        self.names = ['a', 'b', 'c', 'd']
        self.dtypes = ['str', 'int', 'float', 'bool']
        self.instructions = ['Enter string', 'Enter integer', None, None]

        tasks = [
            task_factory(args[0], args[1], instruction=args[2])
            for args in zip(self.dtypes, self.names, self.instructions)
        ]
        self.instance = Annotations(tasks)
示例#7
0
 def test_created_str_task_attributes(self):
     tests = {
         'kind': 'str',
         'dtype': 'object',
         'name': 'a',
         'instruction': 'eat my shorts  \n',
         'nullable': False,
     }
     task = task_factory('str', 'a', instruction='eat my shorts')
     for i in tests:
         with self.subTest(i=i):
             self.assertEqual(getattr(task, i), tests[i])
 def test_equality(self):
     output = [
         task_factory(args[0], args[1])
         for args in zip(self.dtypes, self.names)
     ]
     self.assertEqual(self.tasks, output)
示例#9
0
 def test_validation_valid_bool(self):
     key, value = next(iter(BOOLEAN_STATES.items()))
     task = task_factory('bool', 'a')
     self.assertEqual(task(key), value)
示例#10
0
 def test_validation_invalid_regex(self):
     task = task_factory('regex', 'a', regex=r'[fs]\d{4}r')
     self.assertIsInstance(task('f0084r!'), Invalid)
示例#11
0
 def test_validation_valid_regex(self):
     task = task_factory('regex', 'a', regex=r'[fs]\d{4}r')
     self.assertEqual(task('f0084r'), 'f0084r')
示例#12
0
 def test_validation_invalid_int(self):
     task = task_factory('int', 'a')
     self.assertIsInstance(task('1.0'), Invalid)
示例#13
0
 def test_none_if_nullable(self):
     task = task_factory('str', 'a', nullable=True)
     self.assertEqual(task(KEYS.none), None)
示例#14
0
 def test_equality_with_task_from_iterable(self):
     task = task_factory(['x', 'y', 'z'], 'a')
     self.assertEqual(task, self.task)
示例#15
0
 def setUp(self):
     self.task = task_factory('category', 'a', categories=['x', 'y', 'z'])
示例#16
0
 def test_inequality_name(self):
     task1 = task_factory('int', 'a')
     task2 = task_factory('int', 'b')
     self.assertNotEqual(task1, task2)
示例#17
0
 def test_quality(self):
     task1 = task_factory('int', 'a')
     task2 = task_factory('int', 'a')
     self.assertEqual(task1, task2)
示例#18
0
 def test_created_int_task_dtype(self):
     task = task_factory('int', 'a')
     self.assertEqual(task.dtype, 'Int64')
示例#19
0
 def test_contains(self):
     self.assertTrue(task_factory('str', 'a') in self.instance)
示例#20
0
 def test_validation_valid_category(self):
     task = task_factory(['x', 'y', 'z'], 'a')
     self.assertEqual(task('1'), 'x')
示例#21
0
 def test_none_if_not_nullable(self):
     task = task_factory('int', 'a', nullable=False)
     self.assertIsInstance(task(KEYS.none), Invalid)
示例#22
0
 def test_validation_invalid_category(self):
     task = task_factory(['x', 'y', 'z'], 'a')
     self.assertIsInstance(task('u'), Invalid)
示例#23
0
 def test_validation_valid_int(self):
     task = task_factory('int', 'a')
     self.assertEqual(task('1337'), 1337)
示例#24
0
 def test_validation_invalid_bool(self):
     task = task_factory('bool', 'a')
     self.assertIsInstance(task('u'), Invalid)