def test_clean_multicolumn_sort_in_table_order(self): input_shape = TableShape(3, [ Column("B", ColumnType.NUMBER()), Column("A", ColumnType.NUMBER()) ]) result = clean_value(ParamDType.Multicolumn(), ["A", "B"], input_shape) self.assertEqual(result, ["B", "A"])
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.DATETIME()), ]), None, None) value = [ { 'column': 'A', 'color': '#aaaaaa' }, { 'column': 'B', 'color': '#cccccc' }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'number'})), ])
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) value = [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_list_prompting_error_concatenate_different_type_to_text(self): context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()) ], ), None, None, ) schema = ParamDType.List(inner_dtype=ParamDType.Column( column_types=frozenset({"text"}))) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), ], ), None, None, ) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, "A,B", context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_dict_prompting_error_concatenate_different_types(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) schema = ParamDType.Dict({ "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_result_and_metadata_come_from_memory_when_available(self): columns = [ Column("A", ColumnType.NUMBER(format="{:,d}")), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), Column("D", ColumnType.TEXT()), ] result = ProcessResult( dataframe=pandas.DataFrame( { "A": [1], # int64 "B": [datetime.datetime(2018, 8, 20)], # datetime64[ns] "C": ["foo"], # str "D": pandas.Series(["cat"], dtype="category"), } ), columns=columns, ) cached_result = self.wf_module.cache_render_result(self.delta.id, result) # cache_render_result() keeps its `result` parameter in memory, so we # can avoid disk entirely. Prove it by deleting from disk. minio.remove(minio.CachedRenderResultsBucket, cached_result.parquet_key) self.assertFalse(cached_result._result is None) self.assertEqual(cached_result.result, result) self.assertEqual(cached_result.nrows, 1) self.assertEqual(cached_result.columns, columns)
def test_metadata_comes_from_db_columns(self): columns = [ Column("A", ColumnType.NUMBER(format="{:,d}")), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), Column("D", ColumnType.TEXT()), ] result = ProcessResult( dataframe=pandas.DataFrame( { "A": [1], # int64 "B": [datetime.datetime(2018, 8, 20)], # datetime64[ns] "C": ["foo"], # str "D": pandas.Series(["cat"], dtype="category"), } ), columns=columns, ) cached_result = self.wf_module.cache_render_result(self.delta.id, result) # cache_render_result() keeps its `result` parameter in memory, so we # can avoid disk entirely. Prove it by deleting from disk. minio.remove(minio.CachedRenderResultsBucket, cached_result.parquet_key) # Load _new_ CachedRenderResult -- from DB columns, not memory fresh_wf_module = WfModule.objects.get(id=self.wf_module.id) cached_result = fresh_wf_module.cached_render_result self.assertFalse(hasattr(cached_result, "_result")) self.assertEqual(cached_result.nrows, 1) self.assertEqual(cached_result.columns, columns)
def test_coerce_infer_columns(self): table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) result = ProcessResult.coerce(table) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), ])
def test_clean_multicolumn_sort_in_table_order(self): input_shape = TableShape(3, [ Column('B', ColumnType.NUMBER()), Column('A', ColumnType.NUMBER()), ]) result = clean_value(ParamDType.Multicolumn(), ['A', 'B'], input_shape) self.assertEqual(result, ['B', 'A'])
def test_clean_multicolumn_missing_is_removed(self): input_shape = TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.NUMBER()), ]) result = clean_value(ParamDType.Multicolumn(), ['A', 'X', 'B'], input_shape) self.assertEqual(result, ['A', 'B'])
def test_clean_multicolumn_missing_is_removed(self): input_shape = TableShape(3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.NUMBER()) ]) result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"], input_shape) self.assertEqual(result, ["A", "B"])
def test_coerce_infer_columns(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce(table) self.assertEqual( result.columns, [Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT())], )
def check_key_types(left_dtypes, right_dtypes): for key in left_dtypes.index: l_type = ColumnType.from_dtype(left_dtypes.loc[key]) r_type = ColumnType.from_dtype(right_dtypes.loc[key]) if l_type != r_type: raise TypeError( f'Types do not match for key column "{key}" ({l_type.value} ' f'and {r_type.value}). Please use a type conversion module to ' 'make these column types consistent.')
def test_clean_multicolumn_sort_in_table_order(self): context = RenderContext( None, None, TableShape(3, [ Column('B', ColumnType.NUMBER()), Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Multicolumn(), ['A', 'B'], context) self.assertEqual(result, ['B', 'A'])
def test_clean_multicolumn_missing_is_removed(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Multicolumn(), ['A', 'X', 'B'], context) self.assertEqual(result, ['A', 'B'])
def test_coerce_infer_columns_with_format(self): table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) result = ProcessResult.coerce({ 'dataframe': table, 'column_formats': {'A': '{:,d}'}, }) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER(format='{:,d}')), Column('B', ColumnType.TEXT()), ])
def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self): table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) result = ProcessResult.coerce(table, try_fallback_columns=[ Column('A', ColumnType.TEXT()), Column('B', ColumnType.NUMBER()), ]) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), ])
def test_clean_multichartseries_missing_is_removed(self): context = RenderContext(None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.NUMBER()), ]), None, None) value = [ {'column': 'A', 'color': '#aaaaaa'}, {'column': 'C', 'color': '#cccccc'}, ] result = clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(result, [{'column': 'A', 'color': '#aaaaaa'}])
def test_coerce_infer_columns_format_supercedes_try_fallback_columns(self): table = pd.DataFrame({'A': [1, 2]}) result = ProcessResult.coerce({ 'dataframe': pd.DataFrame({'A': [1, 2]}), 'column_formats': {'A': '{:,d}'}, }, try_fallback_columns=[ Column('A', ColumnType.NUMBER('{:,.2f}')), ]) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER('{:,d}')), ])
def test_ctor_infer_columns(self): result = ProcessResult(pd.DataFrame({ 'A': [1, 2], 'B': ['x', 'y'], 'C': [ np.nan, dt(2019, 3, 3, 4, 5, 6, 7) ], })) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), Column('C', ColumnType.DATETIME()), ])
def test_coerce_infer_columns_format_supercedes_try_fallback_columns(self): table = pd.DataFrame({"A": [1, 2]}) result = ProcessResult.coerce( { "dataframe": pd.DataFrame({"A": [1, 2]}), "column_formats": { "A": "{:,d}" }, }, try_fallback_columns=[Column("A", ColumnType.NUMBER("{:,.2f}"))], ) self.assertEqual(result.columns, [Column("A", ColumnType.NUMBER("{:,d}"))])
def test_clean_multicolumn_sort_in_table_order(self): context = RenderContext( None, None, TableShape(3, [ Column("B", ColumnType.NUMBER()), Column("A", ColumnType.NUMBER()) ]), None, None, ) result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context) self.assertEqual(result, ["B", "A"])
def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce( table, try_fallback_columns=[ Column("A", ColumnType.TEXT()), Column("B", ColumnType.NUMBER()), ], ) self.assertEqual( result.columns, [Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT())], )
def test_clean_multicolumn_missing_is_removed(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.NUMBER()) ]), None, None, ) result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"], context) self.assertEqual(result, ["A", "B"])
def test_coerce_infer_columns_with_format(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce({ "dataframe": table, "column_formats": { "A": "{:,d}" } }) self.assertEqual( result.columns, [ Column("A", ColumnType.NUMBER(format="{:,d}")), Column("B", ColumnType.TEXT()), ], )
def test_columns(self): df = pd.DataFrame({ 'A': [1], # number 'B': ['foo'], # str 'C': dt(2018, 8, 20), # datetime64 }) df['D'] = pd.Series(['cat'], dtype='category') result = ProcessResult(df) self.assertEqual(result.column_names, ['A', 'B', 'C', 'D']) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), Column('C', ColumnType.DATETIME()), Column('D', ColumnType.TEXT()), ])
def test_list_prompting_error_concatenate_same_type(self): context = RenderContext(None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.TEXT()), ]), None, None) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({'number'})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ['A', 'B'], context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A', 'B'], 'text', frozenset({'number'})), ])
def test_ctor_infer_columns(self): result = ProcessResult( pd.DataFrame({ "A": [1, 2], "B": ["x", "y"], "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)], })) self.assertEqual( result.columns, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT()), Column("C", ColumnType.DATETIME()), ], )
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({'A-from-tab-2': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ 'tab': ParamDType.Tab(), 'columns': ParamDType.Multicolumn(tab_parameter='tab'), }) params = {'tab': tab.slug, 'columns': ['A-from-tab-1', 'A-from-tab-2']} context = RenderContext( workflow.id, None, TableShape(3, [ Column('A-from-tab-1', ColumnType.NUMBER()), ]), { tab.slug: StepResultShape('ok', tab_output.table_shape), }, params) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result['columns'], ['A-from-tab-2'])