def test_render(self): # select a single column self.cols_pval.value = 'Month' self.cols_pval.save() out = execute_nocache(self.wf_module) table = mock_csv_table[['Month']] self.assertEqual(str(out), str(table)) # select a single column, with stripped whitespace self.cols_pval.value = 'Month ' self.cols_pval.save() out = execute_nocache(self.wf_module) self.assertEqual(str(out), str(table)) # reverse column order, should not reverse self.cols_pval.value = 'Amount,Month' self.cols_pval.save() out = execute_nocache(self.wf_module) table = mock_csv_table[['Month', 'Amount']] self.assertEqual(str(out), str(mock_csv_table)) # bad column name should just be ignored self.cols_pval.value = 'Amountxxx,Month' self.cols_pval.save() out = execute_nocache(self.wf_module) table = mock_csv_table[['Month']] self.assertEqual(str(out), str(table)) self.wf_module.refresh_from_db() self.assertEqual(self.wf_module.status, WfModule.READY)
def test_render_select(self): # Perform a deselection self.column_pval.value = 'name' self.column_pval.save() self.edits.append({ 'type': 'select', 'column': 'name', 'content': { 'value': 'Dolores' } }) self.edits_pval.value = json.dumps(self.edits) self.edits_pval.save() out = execute_nocache(self.wf_module) ref_table = self.table[[False, True, True, False]] ref_table.index = pd.RangeIndex(len( ref_table.index)) # reset to contiguous indices self.assertTrue(out.equals(ref_table)) # Perform a selection on the same value, table should be back to normal self.edits.append({ 'type': 'select', 'column': 'name', 'content': { 'value': 'Dolores' } }) self.edits_pval.value = json.dumps(self.edits) self.edits_pval.save() out = execute_nocache(self.wf_module) ref_table = self.table[[True, True, True, True]] self.assertTrue(out.equals(ref_table))
def test_python_formula(self): # set up a formula to double the Amount column self.python_pval.set_value('Amount*2') self.python_pval.save() self.syntax_pval.set_value(1) self.syntax_pval.save() self.rpval.value= 'output' self.rpval.save() table = mock_csv_table.copy() table['output'] = table['Amount']*2 table['output'] = table['output'].astype(object) out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # empty result parameter should produce 'result' self.rpval.set_value('') self.rpval.save() table = mock_csv_table.copy() table['result'] = table['Amount']*2 table['result'] = table['result'].astype(object) out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # formula with missing column name should error self.python_pval.set_value('xxx*2') self.python_pval.save() out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR) self.assertTrue(out.equals(mock_csv_table)) # NOP on error
def test_date_only(self): set_string(self.csv_data, self.count_csv_dates) set_string(self.col_pval, 'Date') set_integer(self.group_pval, 2) execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(self.wf_module.status, 'error') self.assertEqual(self.wf_module.error_msg, 'The column \'Date\' only contains date values. Please group by Day, Month, Quarter or Year.')
def test_str_ordering(self): # Tests ordering of a string column as strings self.column_pval.value = 'name' self.column_pval.save() # dtype is string self.dtype_pval.value = 0 self.dtype_pval.save() # If direction is "Select", NOP self.direction_pval.value = 0 self.direction_pval.save() out = execute_nocache(self.wf_module) self.assertTrue(out.equals(self.table)) # If direction is "Ascending" self.direction_pval.value = 1 self.direction_pval.save() out = execute_nocache(self.wf_module) ref_order = [1, 0, 3, 2] ref_table = reorder_table(self.table, ref_order) self.assertTrue(out.equals(ref_table)) # If direction is "Descending" self.direction_pval.value = 2 self.direction_pval.save() out = execute_nocache(self.wf_module) ref_order = [2, 0, 3, 1] ref_table = reorder_table(self.table, ref_order) self.assertTrue(out.equals(ref_table)) # Tests ordering of a numeric column as strings self.column_pval.value = 'float' self.column_pval.save() # dtype is string self.dtype_pval.value = 0 self.dtype_pval.save() # We only test Ascending here; others have been covered above self.direction_pval.value = 1 self.direction_pval.save() out = execute_nocache(self.wf_module) ref_order = [0, 1, 3, 2] ref_table = reorder_table(self.table, ref_order) self.assertTrue(out.equals(ref_table)) # Test ordering of a date column as string, # using SortFromTable's render() directly mock_sort = MockModule({ 'column': 'date', 'dtype': 0, # We only test Ascending here; others have been covered above 'direction': 1 }) out = SortFromTable.render(mock_sort, self.dates_table.copy()) ref_order = [2, 0, 1, 3] ref_table = reorder_table(self.dates_table, ref_order) self.assertTrue(out.equals(ref_table))
def test_bad_dates(self): # integers are not dates set_string(self.col_pval,'Amount') out = execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(self.wf_module.status, WfModule.ERROR) # Weird strings are not dates (different error code path) set_string(self.col_pval, 'Foo') out = execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(self.wf_module.status, WfModule.ERROR)
def test_bad_colname(self): # NOP if no column given set_string(self.col_pval, '') out = execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(self.wf_module.status, WfModule.READY) self.assertFalse(out.empty) # bad column name should produce error set_string(self.col_pval,'hilarious') out = execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(self.wf_module.status, WfModule.ERROR)
def test_time_only(self): set_string(self.csv_data, self.count_csv_time) set_string(self.col_pval, 'Date') execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(self.wf_module.status, 'error') self.assertEqual(self.wf_module.error_msg, 'The column \'Date\' only contains time values. Please group by Hour, Minute or Second.') # Set to hours set_integer(self.group_pval, 2) out = execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(out.to_csv(index=False), 'Date,count\n00:00,3\n01:00,1\n11:00,2\n12:00,1\n')
def test_scrape_list(self): source_options = "List|Input column".split('|') source_pval = get_param_by_id_name('urlsource') source_pval.value = source_options.index('List') source_pval.save() get_param_by_id_name('urllist').set_value('\n'.join([ 'http://a.com/file', 'https://b.com/file2', 'c.com/file/dir' # Removed 'http://' to test the URL-fixing part ])) # Code below mostly lifted from the column test async def mock_scrapeurls(urls, table): table['status'] = self.scraped_table['status'] table['html'] = self.scraped_table['html'] return URLScraper._mynow = lambda: testnow with mock.patch('server.modules.urlscraper.scrape_urls') as scraper: scraper.side_effect = mock_scrapeurls # call the mock function instead, the real fn is tested above self.press_fetch_button() out = execute_nocache(self.wfmodule) self.assertTrue(out.equals(self.scraped_table))
def test_reorder(self): # In chronological order reorder_ops = [ { 'column': 'count', 'from': 2, 'to': 0 }, # gives ['count', 'name', 'date', 'float'] { 'column': 'name', 'from': 1, 'to': 3 }, # gives ['count', 'date', 'name', 'float'] { 'column': 'float', 'from': 3, 'to': 2 }, # gives ['count', 'date', 'float', 'name'] ] self.history_pval.value = json.dumps(reorder_ops) self.history_pval.save() out = execute_nocache(self.wf_module) ref_cols = ['count', 'date', 'float', 'name'] self.assertEqual(out.columns.tolist(), ref_cols) for col in ref_cols: self.assertTrue(out[col].equals(self.table[col]))
def test_corrupt_reorder(self): # If an input column is removed (e.g. via select columns) # then the entire reorder history becomes incoherent # and the module should report error reorder_ops = [ { 'column': 'count', 'from': 2, 'to': 0 }, # gives ['count', 'name', 'date', 'float'] { 'column': 'nonexistent-name', 'from': 1, 'to': 3 }, # invalid { 'column': 'float', 'from': 3, 'to': 2 }, ] self.history_pval.value = json.dumps(reorder_ops) self.history_pval.save() _ = execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(self.wf_module.status, WfModule.ERROR)
def test_numeric_ordering(self): # Test ordering of a numeric column as numeric self.column_pval.value = 'float' self.column_pval.save() # dtype is number self.dtype_pval.value = 1 self.dtype_pval.save() # If direction is "Select", NOP self.direction_pval.value = 0 self.direction_pval.save() out = execute_nocache(self.wf_module) self.assertTrue(out.equals(self.table)) # If direction is "Ascending" self.direction_pval.value = 1 self.direction_pval.save() out = execute_nocache(self.wf_module) ref_order = [1, 3, 0, 2] ref_table = reorder_table(self.table, ref_order) self.assertTrue(out.equals(ref_table)) # If direction is "Descending" self.direction_pval.value = 2 self.direction_pval.save() out = execute_nocache(self.wf_module) ref_order = [0, 3, 1, 2] ref_table = reorder_table(self.table, ref_order) self.assertTrue(out.equals(ref_table)) # Test ordering of a string column as numeric self.column_pval.value = 'count' self.column_pval.save() # dtype is number self.dtype_pval.value = 1 self.dtype_pval.save() # We only test Ascending here; others have been covered above self.direction_pval.value = 1 self.direction_pval.save() out = execute_nocache(self.wf_module) ref_order = [0, 3, 2, 1] ref_table = reorder_table(self.table, ref_order) self.assertTrue(out.equals(ref_table))
def test_nop_with_initial_col_selection(self): # When a column is first selected and no scraping is performed, the initial table should be returned source_options = "List of URLs|Load from column".split('|') source_pval = get_param_by_id_name('urlsource') source_pval.value = source_options.index('Load from column') source_pval.save() column_pval = get_param_by_id_name('urlcol') column_pval.value = 'url' column_pval.save() out = execute_nocache(self.wfmodule) self.assertTrue(out.equals(self.url_table))
def test_render(self): # Replace the output with our own data code = "columns = ['A','B', 'C']\ndata = np.array([np.arange(5)]*3).T\nreturn pd.DataFrame(columns=columns, data=data)" self.code_pval.value = code self.code_pval.save() out = execute_nocache(self.wf_module) self.assertEqual( str(out), " A B C\n0 0 0 0\n1 1 1 1\n2 2 2 2\n3 3 3 3\n4 4 4 4" )
def test_render_edit(self): # Perform a single edit on a string self.column_pval.value = 'name' self.column_pval.save() self.edits.append({ 'type': 'change', 'column': 'name', 'content': { 'fromVal': 'Dolores', 'toVal': 'Wyatt' } }) self.edits_pval.value = json.dumps(self.edits) self.edits_pval.save() out = execute_nocache(self.wf_module) ref_table = self.table.copy() ref_table.loc[ref_table['name'] == 'Dolores', 'name'] = 'Wyatt' self.assertTrue(out.equals(ref_table)) # Perform a single edit on a number self.column_pval.value = 'count' self.column_pval.save() # Content are all strings as this is what we get from UI self.edits = [{ 'type': 'change', 'column': 'count', 'content': { 'fromVal': '5', 'toVal': '4' } }] self.edits_pval.value = json.dumps(self.edits) self.edits_pval.save() out = execute_nocache(self.wf_module) ref_table = self.table.copy() ref_table.loc[ref_table['count'] == 5, 'count'] = 4 self.assertTrue(out.equals(ref_table))
def test_first_row_is_header(self): url = 'http://test.com/tablepage.html' self.url_pval.set_value(url) self.url_pval.save() self.first_row_pval.set_value(True) self.first_row_pval.save() with mock.patch('pandas.read_html') as readmock: readmock.return_value = [mock_csv_table] self.press_fetch_button() self.assertEqual(readmock.call_args, mock.call(url, flavor='html5lib') ) out = execute_nocache(self.wfmodule) self.assertListEqual(list(out.columns), [str(x) for x in mock_csv_table.iloc[0,:]]) self.assertEqual(len(out), len(mock_csv_table)-1)
def test_spaces_to_underscores(self): # column names with spaces should be referenced with underscores in the formula underscore_csv = 'Month,The Amount,Name\nJan,10,Alicia Aliciason\nFeb,666,Fred Frederson' underscore_table = pd.read_csv(io.StringIO(underscore_csv)) workflow = create_testdata_workflow(underscore_csv) wfm = load_and_add_module('formula', workflow=workflow) pval = get_param_by_id_name('formula_python', wf_module=wfm) pval.set_value('The_Amount*2') sval = get_param_by_id_name('syntax', wf_module=wfm) sval.set_value(1) out = execute_nocache(wfm) table = underscore_table.copy() table['formula output'] = table['The Amount']*2 table['formula output'] = table['formula output'].astype(object) self.assertTrue(out.equals(table))
def test_scrape_table(self): url = 'http://test.com/tablepage.html' self.url_pval.set_value(url) self.url_pval.save() # should be no data saved yet, no Deltas on the workflow self.assertIsNone(self.wfmodule.get_fetched_data_version()) self.assertIsNone(self.wfmodule.retrieve_fetched_table()) self.assertIsNone(self.wfmodule.workflow.last_delta) with mock.patch('pandas.read_html') as readmock: readmock.return_value = [mock_csv_table] self.press_fetch_button() self.assertEqual(readmock.call_args, mock.call(url, flavor='html5lib') ) out = execute_nocache(self.wfmodule) self.assertTrue(out.equals(mock_csv_table)) # should create a new data version on the WfModule, and a new delta representing the change self.wfmodule.refresh_from_db() self.wfmodule.workflow.refresh_from_db() self.assertIsNotNone(self.wfmodule.get_fetched_data_version()) self.assertIsNotNone(self.wfmodule.workflow.last_delta)
def test_count(self): # sort by value. # Use out.to_csv() instead of str(out) to ensure rows are output in index order (otherwise variable) set_string(self.col_pval, 'Date') out = execute_nocache(self.wf_module) self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01-10,5\n2011-01-15,1\n2016-07-25,1\n') # sort by date & set groupby to 'seconds' set_integer(self.group_pval, 0) # 0 = group by seconds out = execute_nocache(self.wf_module) self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01-10 00:00:00,1\n2011-01-10 00:00:01,2\n2011-01-10 00:01:00,1\n2011-01-10 01:00:00,1\n2011-01-15 00:00:00,1\n2016-07-25 00:00:00,1\n') # sort by date & set groupby to 'minutes' set_integer(self.group_pval, 1) # 0 = group by minutes out = execute_nocache(self.wf_module) self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01-10 00:00,3\n2011-01-10 00:01,1\n2011-01-10 01:00,1\n2011-01-15 00:00,1\n2016-07-25 00:00,1\n') # sort by date & set groupby to 'hours' set_integer(self.group_pval, 2) # 0 = group by minutes out = execute_nocache(self.wf_module) self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01-10 00:00,4\n2011-01-10 01:00,1\n2011-01-15 00:00,1\n2016-07-25 00:00,1\n') # sort by date & set groupby to 'months' set_integer(self.group_pval, 4) # 4 = group by months out = execute_nocache(self.wf_module) self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01,6\n2016-07,1\n') # sort by date & set groupby to 'quarters' set_integer(self.group_pval, 5) # 4 = group by quarters out = execute_nocache(self.wf_module) self.assertEqual(out.to_csv(index=False), 'Date,count\n2011 Q1,6\n2016 Q3,1\n') # sort by date & set groupby to 'years' set_integer(self.group_pval, 6) # 6 = group by years out = execute_nocache(self.wf_module) self.assertEqual(out.to_csv(index=False), 'Date,count\n2011,6\n2016,1\n')
def test_scrape_column(self): source_options = "List|Input column".split('|') source_pval = get_param_by_id_name('urlsource') source_pval.value = source_options.index('Input column') source_pval.save() get_param_by_id_name('urlcol').set_value('url') # modifies the table in place to add results, just like the real thing async def mock_scrapeurls(urls, table): table['status'] = self.scraped_table['status'] table['html'] = self.scraped_table['html'] return # can't mock datetime.datetime.now with a patch because it's builtin or something, sigh URLScraper._mynow = lambda: testnow with mock.patch('server.modules.urlscraper.scrape_urls') as scraper: scraper.side_effect = mock_scrapeurls # call the mock function instead, the real fn is tested above self.press_fetch_button() out = execute_nocache(self.wfmodule) self.assertTrue(out.equals(self.scraped_table))
def test_initial_nop(self): out = execute_nocache(self.wfmodule) self.assertTrue(out.equals(self.url_table))
def test_timestamps(self): set_string(self.csv_data, self.count_csv_dates) set_string(self.col_pval, 'Date') out = execute_nocache(self.wf_module) self.wf_module.refresh_from_db() self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01-10,5\n2011-01-15,1\n2016-07-25,1\n')
def test_reorder_empty(self): self.history_pval.value = ' ' self.history_pval.save() out = execute_nocache(self.wf_module) self.assertTrue(out.equals(self.table))
def test_tsv(self): set_string(self.csv_pval, self.count_tsv) out = execute_nocache(self.wf_module) self.assertTrue(out.equals(self.table))
def test_excel_formula(self): # We have custom range handling logic and syntax, so this test exercises many types of ranges self.syntax_pval.set_value(0) self.syntax_pval.save() table = mock_csv_table.copy() # simple single-column reference self.excel_pval.set_value('=B*2') self.excel_pval.save() # empty result parameter should produce 'result' self.rpval.value = '' self.rpval.save() table['result'] = table['Amount'] * 2 table['result'] = table['result'].astype(object) out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # simple single-column reference self.excel_pval.set_value('=B*2') self.excel_pval.save() table = mock_csv_table.copy() self.rpval.value = 'output' self.rpval.save() table['output'] = table['Amount'] * 2 table['output'] = table['output'].astype(object) out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # simple single-column reference self.excel_pval.set_value('=B1*2') self.excel_pval.save() out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # formula with range should grab the right values and compute them self.excel_pval.set_value('=SUM(B:C)') self.excel_pval.save() table['output'] = table['Amount'] + table['Amount2'] table['output'] = table['output'].astype(object) out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # same formula with B1 and C1 should still work self.excel_pval.set_value('=SUM(B1:C1)') self.excel_pval.save() out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # same formula with B and C1 should still work self.excel_pval.set_value('=SUM(B:C1)') self.excel_pval.save() out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # text formula self.excel_pval.set_value('=LEFT(D,5)') self.excel_pval.save() table['output'] = table['Name'].apply(lambda x: x[:5]) table['output'] = table['output'].astype(object) out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.READY) self.assertTrue(out.equals(table)) # bad formula should produce error self.excel_pval.set_value('=SUM B:C') self.excel_pval.save() out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR) self.assertTrue(out.equals(mock_csv_table)) # NOP on error # out of range selector should produce error self.excel_pval.set_value('=SUM(B:ZZ)') self.excel_pval.save() out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR) self.assertTrue(out.equals(mock_csv_table)) # NOP on error # selector with a 0 should produce an error self.excel_pval.set_value('=SUM(B0)') self.excel_pval.save() out = execute_nocache(self.wfmodule) self.wfmodule.refresh_from_db() self.assertEqual(self.wfmodule.status, WfModule.ERROR) self.assertTrue(out.equals(mock_csv_table)) # NOP on error
def test_empty(self): set_string(self.csv_pval, '') out = execute_nocache(self.wf_module) self.assertTrue(out.equals(pd.DataFrame())) # No input, no output