Exemplo n.º 1
0
 def test_allow_different_columns(self):
     result = render(pd.DataFrame({'A': [1, 2]}),
                     params={
                         'tabs': [
                             TabOutput(
                                 'tab-2', 'Tab 2', {
                                     'B': RenderColumn('B', 'number', '{}'),
                                 }, pd.DataFrame({'B': [3, 4]})),
                         ],
                         'add_source_column':
                         False,
                         'source_column_name':
                         '',
                     },
                     tab_name='Tab 1',
                     input_columns={
                         'A': RenderColumn('A', 'number', '{}'),
                     })
     # This tests the ordering of columns, too
     assert_frame_equal(
         result,
         pd.DataFrame({
             'A': [1, 2, np.nan, np.nan],
             'B': [np.nan, np.nan, 3, 4],
         }))
Exemplo n.º 2
0
    def test_prevent_overwrite(self):
        left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
        right = pd.DataFrame({'A': ['1', '2'], 'B': ['X', 'Y']})
        result = render(left, {
            'right_tab': TabOutput(
                'slug',
                'Tab 2',
                {'A': RenderColumn('A', 'number', '{}'),
                 'B': RenderColumn('B', 'text', None)},
                right
            ),
            'join_columns': {
                'on': ['A'],
                'right': ['B'],
            },
            'type': 'left',
        }, input_columns={
            'A': RenderColumn('A', 'number', '{}'),
            'B': RenderColumn('B', 'text', None),
        })

        self.assertEqual(result, (
            'You tried to add "B" from Tab 2, but your table already has that '
            'column. Please rename the column in one of the tabs, or unselect '
            'the column.'
        ))
Exemplo n.º 3
0
    def test_import_columns_without_formats(self):
        dataframe = pd.DataFrame(
            {
                "A": [1, 2, 3],
                "B": pd.Series(
                    ["2012-01-01", "2015-02-03", "2019-05-23"], dtype="datetime64[ns]"
                ),
                "C": ["a", "b", "c"],
            }
        )

        result = render(
            pd.DataFrame(),
            {
                "tab": TabOutput(
                    "tab-2",
                    "Tab 2",
                    {
                        "A": RenderColumn("A", "number", "{,.2f}"),
                        "B": RenderColumn("B", "datetime", None),
                        "C": RenderColumn("C", "text", None),
                    },
                    dataframe,
                )
            },
        )
        assert_frame_equal(result["dataframe"], dataframe)
        self.assertEqual(result["column_formats"], {"A": "{,.2f}"})
Exemplo n.º 4
0
 def test_add_source_column(self):
     result = render(
         pd.DataFrame({'A': [1, 2]}),
         params={
             'tabs': [
                 TabOutput('tab-2', 'Tab 2', {
                     'A': RenderColumn('A', 'number'),
                 }, pd.DataFrame({'A': [3, 4]})),
             ],
             'add_source_column': True,
             'source_column_name': 'S',
         },
         tab_name='Tab 1',
         input_columns={
             'A': RenderColumn('A', 'number'),
         }
     )
     expected = pd.DataFrame({
         # Source column comes _first_
         'S': ['Tab 1', 'Tab 1', 'Tab 2', 'Tab 2'],
         'A': [1, 2, 3, 4],
     })
     # Source column should be categorical: no need to load it with useless
     # copied bytes.
     expected['S'] = expected['S'].astype('category')
     assert_frame_equal(result, expected)
Exemplo n.º 5
0
 def test_left_join_delete_unused_categories_in_added_columns(self):
     left = pd.DataFrame({'A': ['a', 'b']}, dtype='category')
     right = pd.DataFrame({
         'A': pd.Series(['a', 'z'], dtype='category'),
         'B': pd.Series(['x', 'y'], dtype='category'),
     })
     result = render(left, {
         'right_tab': TabOutput(
             'slug',
             'Tab 2',
             {'A': RenderColumn('A', 'text', None),
              'B': RenderColumn('B', 'text', None)},
             right
         ),
         'join_columns': {'on': ['A'], 'right': ['B']},
         'type': 'left',
     }, input_columns={
         'A': RenderColumn('A', 'text', None),
     })
     # 'z' category does not appear in result, so it should not be a
     # category in the 'B' column.
     assert_frame_equal(result['dataframe'], pd.DataFrame({
         'A': pd.Series(['a', 'b'], dtype='category'),
         'B': pd.Series(['x', np.nan], dtype='category')
     }))
Exemplo n.º 6
0
 def test_error_different_types(self):
     result = render(
         pd.DataFrame({"A": ["x", "y"]}),
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"A": RenderColumn("A", "number", "{}")},
                     pd.DataFrame({"A": [3, 4]}),
                 )
             ],
             "add_source_column":
             False,
             "source_column_name":
             "",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "text", None)},
     )
     self.assertEqual(
         result,
         ('Cannot concatenate column "A" of type "number" in "Tab 2" to '
          'column "A" of type "text" in "Tab 1". Please convert one or the '
          "other so they are the same type."),
     )
Exemplo n.º 7
0
 def test_allow_different_columns(self):
     result = render(
         pd.DataFrame({"A": [1, 2]}),
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"B": RenderColumn("B", "number", "{}")},
                     pd.DataFrame({"B": [3, 4]}),
                 )
             ],
             "add_source_column":
             False,
             "source_column_name":
             "",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "number", "{}")},
     )
     # This tests the ordering of columns, too
     assert_frame_equal(
         result,
         pd.DataFrame({
             "A": [1, 2, np.nan, np.nan],
             "B": [np.nan, np.nan, 3, 4]
         }),
     )
Exemplo n.º 8
0
 def test_inner_join_delete_unused_categories_in_all_columns(self):
     left = pd.DataFrame({
         'A': pd.Series(['a', 'b'], dtype='category'),  # join column
         'B': pd.Series(['c', 'd'], dtype='category'),  # other column
     })
     right = pd.DataFrame({
         'A': pd.Series(['a', 'x'], dtype='category'),  # join column
         'C': pd.Series(['e', 'y'], dtype='category'),  # other column
     })
     result = render(left, {
         'right_tab': TabOutput(
             'slug',
             'Tab 2',
             {'A': RenderColumn('A', 'text', None),
              'C': RenderColumn('C', 'text', None)},
             right
         ),
         'join_columns': {'on': ['A'], 'right': ['C']},
         'type': 'inner',
     }, input_columns={
         'A': RenderColumn('A', 'text', None),
         'B': RenderColumn('B', 'text', None),
     })
     # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the
     # dtypes should not contain them.
     assert_frame_equal(result['dataframe'], pd.DataFrame({
         'A': ['a'],
         'B': ['c'],
         'C': ['e'],
     }, dtype='category'))
Exemplo n.º 9
0
 def test_left(self):
     left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
     right = pd.DataFrame({'A': [1, 2], 'C': ['X', 'Y'], 'D': [0.1, 0.2]})
     result = render(left, {
         'right_tab': TabOutput(
             'slug',
             'name',
             {'A': RenderColumn('A', 'number', '{:,.2f}'),
              'C': RenderColumn('C', 'text', None),
              'D': RenderColumn('D', 'number', '{:,}')},
             right),
         'join_columns': {
             'on': ['A'],
             'right': ['C', 'D'],
         },
         'type': 'left',
     }, input_columns={
         'A': RenderColumn('A', 'number', '{:d}'),
         'B': RenderColumn('B', 'text', None),
     })
     assert_frame_equal(result['dataframe'], pd.DataFrame({
         'A': [1, 2, 3],
         'B': ['x', 'y', 'z'],
         'C': ['X', 'Y', np.nan],
         'D': [0.1, 0.2, np.nan],
     }))
     self.assertEqual(result['column_formats'], {'C': None, 'D': '{:,}'})
Exemplo n.º 10
0
 def test_add_source_column(self):
     result = render(
         pd.DataFrame({"A": [1, 2]}),
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"A": RenderColumn("A", "number", "{}")},
                     pd.DataFrame({"A": [3, 4]}),
                 )
             ],
             "add_source_column":
             True,
             "source_column_name":
             "S",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "number", "{}")},
     )
     expected = pd.DataFrame({
         # Source column comes _first_
         "S": ["Tab 1", "Tab 1", "Tab 2", "Tab 2"],
         "A": [1, 2, 3, 4],
     })
     # Source column should be categorical: no need to load it with useless
     # copied bytes.
     expected["S"] = expected["S"].astype("category")
     assert_frame_equal(result, expected)
Exemplo n.º 11
0
    def test_prevent_overwrite(self):
        left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
        right = pd.DataFrame({"A": ["1", "2"], "B": ["X", "Y"]})
        result = render(
            left,
            {
                "right_tab":
                TabOutput(
                    "slug",
                    "Tab 2",
                    {
                        "A": RenderColumn("A", "number", "{}"),
                        "B": RenderColumn("B", "text", None),
                    },
                    right,
                ),
                "join_columns": {
                    "on": ["A"],
                    "right": ["B"]
                },
                "type":
                "left",
            },
            input_columns={
                "A": RenderColumn("A", "number", "{}"),
                "B": RenderColumn("B", "text", None),
            },
        )

        self.assertEqual(
            result,
            ('You tried to add "B" from Tab 2, but your table already has that '
             "column. Please rename the column in one of the tabs, or unselect "
             "the column."),
        )
Exemplo n.º 12
0
    def test_on_types_differ(self):
        left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
        right = pd.DataFrame({"A": ["1", "2"], "C": ["X", "Y"]})
        result = render(
            left,
            {
                "right_tab":
                TabOutput(
                    "slug",
                    "Tab 2",
                    {
                        "A": RenderColumn("A", "text", None),
                        "C": RenderColumn("C", "text", None),
                    },
                    right,
                ),
                "join_columns": {
                    "on": ["A"],
                    "right": ["C"]
                },
                "type":
                "left",
            },
            input_columns={
                "A": RenderColumn("A", "number", "{}"),
                "B": RenderColumn("B", "text", None),
            },
        )

        self.assertEqual(
            result,
            ('Column "A" is *number* in this tab and *text* in Tab 2. '
             "Please convert one or the other so they are both the same type."
             ),
        )
Exemplo n.º 13
0
 def test_happy_path(self):
     result = render(
         pd.DataFrame(), {
             'tab':
             TabOutput('tab-2', 'Tab 2', {
                 'A': RenderColumn('A', 'number', '{}'),
             }, pd.DataFrame({'A': [3, 4]})),
         })
     assert_frame_equal(result['dataframe'], pd.DataFrame({'A': [3, 4]}))
     self.assertEqual(result['column_formats'], {'A': '{}'})
Exemplo n.º 14
0
 def test_happy_path(self):
     result = render(
         pd.DataFrame(),
         {
             "tab": TabOutput(
                 "tab-2",
                 "Tab 2",
                 {"A": RenderColumn("A", "number", "{}")},
                 pd.DataFrame({"A": [3, 4]}),
             )
         },
     )
     assert_frame_equal(result["dataframe"], pd.DataFrame({"A": [3, 4]}))
     self.assertEqual(result["column_formats"], {"A": "{}"})
Exemplo n.º 15
0
def _(dtype: ParamDType.Tab, value: str, context: RenderContext) -> TabOutput:
    tab_slug = value
    try:
        shape = context.tab_shapes[tab_slug]
    except KeyError:
        # It's a tab that doesn't exist.
        return None
    if shape is None:
        # It's an un-rendered tab. Or at least, the executor _tells_ us it's
        # un-rendered. That means there's a tab-cycle.
        raise TabCycleError
    if shape.status != 'ok':
        raise TabOutputUnreachableError

    # Load Tab output from database. Assumes we've locked the workflow.
    try:
        tab = Tab.objects.get(
            workflow_id=context.workflow_id,
            is_deleted=False,
            slug=tab_slug
        )
    except Tab.DoesNotExist:
        # If the Tab doesn't exist, someone deleted it mid-render. (We already
        # verified that the tab has been rendered -- that was
        # context.tab_shapes[tab_slug].) So our param is stale.
        raise UnneededExecution

    wf_module = tab.live_wf_modules.last()
    if wf_module is None:
        # empty tab -> empty output
        raise TabOutputUnreachableError

    crr = wf_module.cached_render_result
    if crr is None:
        # ... but tab_shapes implies we just cached the correct result! It
        # looks like that version must be stale.
        raise UnneededExecution

    result = crr.result  # read Parquet file from disk (slow)
    return TabOutput(
        tab_slug,
        tab.name,
        dict((c.name, RenderColumn(c.name, c.type.name,
                                   getattr(c.type, 'format', None)))
             for c in result.columns),
        result.dataframe
    )
Exemplo n.º 16
0
 def test_inner_join_delete_unused_categories_in_all_columns(self):
     left = pd.DataFrame({
         "A": pd.Series(["a", "b"], dtype="category"),  # join column
         "B": pd.Series(["c", "d"], dtype="category"),  # other column
     })
     right = pd.DataFrame({
         "A": pd.Series(["a", "x"], dtype="category"),  # join column
         "C": pd.Series(["e", "y"], dtype="category"),  # other column
     })
     result = render(
         left,
         {
             "right_tab":
             TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "C": RenderColumn("C", "text", None),
                 },
                 right,
             ),
             "join_columns": {
                 "on": ["A"],
                 "right": ["C"]
             },
             "type":
             "inner",
         },
         input_columns={
             "A": RenderColumn("A", "text", None),
             "B": RenderColumn("B", "text", None),
         },
     )
     # 'b', 'd', 'x' and 'y' categories don't appear in the result, so the
     # dtypes should not contain them.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({
             "A": ["a"],
             "B": ["c"],
             "C": ["e"]
         },
                      dtype="category"),
     )
Exemplo n.º 17
0
 def test_right_join_delete_unused_categories_in_input_columns(self):
     left = pd.DataFrame({
         "A": pd.Series(["a", "b"], dtype="category"),  # join column
         "B": pd.Series(["c", "d"], dtype="category"),  # other column
     })
     right = pd.DataFrame({
         "A": pd.Series(["a"], dtype="category"),
         "C": ["e"]
     }  # join column
                          )
     result = render(
         left,
         {
             "right_tab":
             TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "C": RenderColumn("C", "text", None),
                 },
                 right,
             ),
             "join_columns": {
                 "on": ["A"],
                 "right": ["C"]
             },
             "type":
             "right",
         },
         input_columns={
             "A": RenderColumn("A", "text", None),
             "B": RenderColumn("B", "text", None),
         },
     )
     # 'b' and 'd' categories don't appear in result, so it should not be
     # categories in the result dataframe.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({
             "A": pd.Series(["a"], dtype="category"),
             "B": pd.Series(["c"], dtype="category"),
             "C": ["e"],
         }),
     )
Exemplo n.º 18
0
 def test_happy_path(self):
     result = render(
         pd.DataFrame({'A': [1, 2]}),
         params={
             'tabs': [
                 TabOutput('tab-2', 'Tab 2', {
                     'A': RenderColumn('A', 'number'),
                 }, pd.DataFrame({'A': [3, 4]})),
             ],
             'add_source_column': False,
             'source_column_name': '',
         },
         tab_name='Tab 1',
         input_columns={
             'A': RenderColumn('A', 'number'),
         }
     )
     assert_frame_equal(result, pd.DataFrame({'A': [1, 2, 3, 4]}))
Exemplo n.º 19
0
 def test_coerce_categories_and_str(self):
     result = render(
         pd.DataFrame({'A': ['a', 'b']}, dtype='category'),  # cat
         params={
             'tabs': [
                 TabOutput('tab-2', 'Tab 2', {
                     'A': RenderColumn('A', 'text'),
                 }, pd.DataFrame({'A': ['c', 'd']})),  # str
             ],
             'add_source_column': False,
             'source_column_name': '',
         },
         tab_name='Tab 1',
         input_columns={
             'A': RenderColumn('A', 'text'),
         }
     )
     assert_frame_equal(result, pd.DataFrame({
         'A': ['a', 'b', 'c', 'd'],  # str
     }))
Exemplo n.º 20
0
 def test_happy_path(self):
     result = render(
         pd.DataFrame({"A": [1, 2]}),
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"A": RenderColumn("A", "number", "{}")},
                     pd.DataFrame({"A": [3, 4]}),
                 )
             ],
             "add_source_column":
             False,
             "source_column_name":
             "",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "number", "{}")},
     )
     assert_frame_equal(result, pd.DataFrame({"A": [1, 2, 3, 4]}))
Exemplo n.º 21
0
    def test_import_columns_without_formats(self):
        dataframe = pd.DataFrame({
            'A': [1, 2, 3],
            'B':
            pd.Series(['2012-01-01', '2015-02-03', '2019-05-23'],
                      dtype='datetime64[ns]'),
            'C': ['a', 'b', 'c'],
        })

        result = render(
            pd.DataFrame(), {
                'tab':
                TabOutput(
                    'tab-2', 'Tab 2', {
                        'A': RenderColumn('A', 'number', '{,.2f}'),
                        'B': RenderColumn('B', 'datetime', None),
                        'C': RenderColumn('C', 'text', None),
                    }, dataframe)
            })
        assert_frame_equal(result['dataframe'], dataframe)
        self.assertEqual(result['column_formats'], {'A': '{,.2f}'})
Exemplo n.º 22
0
 def test_error_different_types(self):
     result = render(
         pd.DataFrame({'A': ['x', 'y']}),
         params={
             'tabs': [
                 TabOutput('tab-2', 'Tab 2', {
                     'A': RenderColumn('A', 'number'),
                 }, pd.DataFrame({'A': [3, 4]})),
             ],
             'add_source_column': False,
             'source_column_name': '',
         },
         tab_name='Tab 1',
         input_columns={
             'A': RenderColumn('A', 'text'),
         }
     )
     self.assertEqual(result, (
         'Cannot concatenate column "A" of type "number" in "Tab 2" to '
         'column "A" of type "text" in "Tab 1". Please convert one or the '
         'other so they are the same type.'
     ))
Exemplo n.º 23
0
 def test_left(self):
     left = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
     right = pd.DataFrame({"A": [1, 2], "C": ["X", "Y"], "D": [0.1, 0.2]})
     result = render(
         left,
         {
             "right_tab":
             TabOutput(
                 "slug",
                 "name",
                 {
                     "A": RenderColumn("A", "number", "{:,.2f}"),
                     "C": RenderColumn("C", "text", None),
                     "D": RenderColumn("D", "number", "{:,}"),
                 },
                 right,
             ),
             "join_columns": {
                 "on": ["A"],
                 "right": ["C", "D"]
             },
             "type":
             "left",
         },
         input_columns={
             "A": RenderColumn("A", "number", "{:d}"),
             "B": RenderColumn("B", "text", None),
         },
     )
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({
             "A": [1, 2, 3],
             "B": ["x", "y", "z"],
             "C": ["X", "Y", np.nan],
             "D": [0.1, 0.2, np.nan],
         }),
     )
     self.assertEqual(result["column_formats"], {"C": None, "D": "{:,}"})
Exemplo n.º 24
0
 def test_coerce_categories_and_str(self):
     result = render(
         pd.DataFrame({"A": ["a", "b"]}, dtype="category"),  # cat
         params={
             "tabs": [
                 TabOutput(
                     "tab-2",
                     "Tab 2",
                     {"A": RenderColumn("A", "text", None)},
                     pd.DataFrame({"A": ["c", "d"]}),
                 )  # str
             ],
             "add_source_column":
             False,
             "source_column_name":
             "",
         },
         tab_name="Tab 1",
         input_columns={"A": RenderColumn("A", "text", None)},
     )
     assert_frame_equal(result, pd.DataFrame({"A": ["a", "b", "c",
                                                    "d"]}))  # str
Exemplo n.º 25
0
 def test_left_join_delete_unused_categories_in_added_columns(self):
     left = pd.DataFrame({"A": ["a", "b"]}, dtype="category")
     right = pd.DataFrame({
         "A": pd.Series(["a", "z"], dtype="category"),
         "B": pd.Series(["x", "y"], dtype="category"),
     })
     result = render(
         left,
         {
             "right_tab":
             TabOutput(
                 "slug",
                 "Tab 2",
                 {
                     "A": RenderColumn("A", "text", None),
                     "B": RenderColumn("B", "text", None),
                 },
                 right,
             ),
             "join_columns": {
                 "on": ["A"],
                 "right": ["B"]
             },
             "type":
             "left",
         },
         input_columns={"A": RenderColumn("A", "text", None)},
     )
     # 'z' category does not appear in result, so it should not be a
     # category in the 'B' column.
     assert_frame_equal(
         result["dataframe"],
         pd.DataFrame({
             "A": pd.Series(["a", "b"], dtype="category"),
             "B": pd.Series(["x", np.nan], dtype="category"),
         }),
     )
Exemplo n.º 26
0
    def test_on_types_differ(self):
        left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
        right = pd.DataFrame({'A': ['1', '2'], 'C': ['X', 'Y']})
        result = render(left, {
            'right_tab': TabOutput(
                'slug',
                'Tab 2',
                {'A': RenderColumn('A', 'text', None),
                 'C': RenderColumn('C', 'text', None)},
                right),
            'join_columns': {
                'on': ['A'],
                'right': ['C'],
            },
            'type': 'left',
        }, input_columns={
            'A': RenderColumn('A', 'number', '{}'),
            'B': RenderColumn('B', 'text', None),
        })

        self.assertEqual(result, (
            'Column "A" is *number* in this tab and *text* in Tab 2. '
            'Please convert one or the other so they are both the same type.'
        ))
Exemplo n.º 27
0
 def test_left(self):
     left = pd.DataFrame({'A': [1, 2, 3], 'B': ['x', 'y', 'z']})
     right = pd.DataFrame({'A': [1, 2], 'C': ['X', 'Y']})
     result = render(left, {
         'right_tab': TabOutput(
             'slug',
             'name',
             {'A': RenderColumn('A', 'number'),
              'C': RenderColumn('C', 'text')},
             right),
         'join_columns': {
             'on': 'A',
             'right': 'C',
         },
         'type': 0,
     }, input_columns={
         'A': RenderColumn('A', 'number'),
         'B': RenderColumn('B', 'text'),
     })
     assert_frame_equal(result, pd.DataFrame({
         'A': [1, 2, 3],
         'B': ['x', 'y', 'z'],
         'C': ['X', 'Y', np.nan],
     }))
Exemplo n.º 28
0
 def test_import_empty_tab(self):
     result = render(
         pd.DataFrame(),
         {'tab': TabOutput('tab-2', 'Tab 2', {}, pd.DataFrame())})
     assert_frame_equal(result['dataframe'], pd.DataFrame())
     self.assertEqual(result['column_formats'], {})
Exemplo n.º 29
0
 def test_import_empty_tab(self):
     result = render(
         pd.DataFrame(), {"tab": TabOutput("tab-2", "Tab 2", {}, pd.DataFrame())}
     )
     assert_frame_equal(result["dataframe"], pd.DataFrame())
     self.assertEqual(result["column_formats"], {})