Пример #1
0
    def test_render(self):
        # select a single column
        self.cols_pval.value = 'Month'
        self.cols_pval.save()
        out = execute_nocache(self.wf_module)
        table = mock_csv_table[['Month']]
        self.assertEqual(str(out), str(table))

        # select a single column, with stripped whitespace
        self.cols_pval.value = 'Month '
        self.cols_pval.save()
        out = execute_nocache(self.wf_module)
        self.assertEqual(str(out), str(table))

        # reverse column order, should not reverse
        self.cols_pval.value = 'Amount,Month'
        self.cols_pval.save()
        out = execute_nocache(self.wf_module)
        table = mock_csv_table[['Month', 'Amount']]
        self.assertEqual(str(out), str(mock_csv_table))

        # bad column name should just be ignored
        self.cols_pval.value = 'Amountxxx,Month'
        self.cols_pval.save()
        out = execute_nocache(self.wf_module)
        table = mock_csv_table[['Month']]
        self.assertEqual(str(out), str(table))
        self.wf_module.refresh_from_db()
        self.assertEqual(self.wf_module.status, WfModule.READY)
Пример #2
0
    def test_render_select(self):
        # Perform a deselection
        self.column_pval.value = 'name'
        self.column_pval.save()
        self.edits.append({
            'type': 'select',
            'column': 'name',
            'content': {
                'value': 'Dolores'
            }
        })
        self.edits_pval.value = json.dumps(self.edits)
        self.edits_pval.save()
        out = execute_nocache(self.wf_module)
        ref_table = self.table[[False, True, True, False]]
        ref_table.index = pd.RangeIndex(len(
            ref_table.index))  # reset to contiguous indices
        self.assertTrue(out.equals(ref_table))

        # Perform a selection on the same value, table should be back to normal
        self.edits.append({
            'type': 'select',
            'column': 'name',
            'content': {
                'value': 'Dolores'
            }
        })
        self.edits_pval.value = json.dumps(self.edits)
        self.edits_pval.save()
        out = execute_nocache(self.wf_module)
        ref_table = self.table[[True, True, True, True]]
        self.assertTrue(out.equals(ref_table))
Пример #3
0
    def test_python_formula(self):
        # set up a formula to double the Amount column
        self.python_pval.set_value('Amount*2')
        self.python_pval.save()
        self.syntax_pval.set_value(1)
        self.syntax_pval.save()
        self.rpval.value= 'output'
        self.rpval.save()
        table = mock_csv_table.copy()
        table['output'] = table['Amount']*2
        table['output'] = table['output'].astype(object)

        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # empty result parameter should produce 'result'
        self.rpval.set_value('')
        self.rpval.save()
        table = mock_csv_table.copy()
        table['result'] = table['Amount']*2
        table['result'] = table['result'].astype(object)
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # formula with missing column name should error
        self.python_pval.set_value('xxx*2')
        self.python_pval.save()
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.ERROR)
        self.assertTrue(out.equals(mock_csv_table))  # NOP on error
Пример #4
0
    def test_date_only(self):
        set_string(self.csv_data, self.count_csv_dates)
        set_string(self.col_pval, 'Date')
        set_integer(self.group_pval, 2)

        execute_nocache(self.wf_module)
        self.wf_module.refresh_from_db()
        self.assertEqual(self.wf_module.status, 'error')
        self.assertEqual(self.wf_module.error_msg, 'The column \'Date\' only contains date values. Please group by Day, Month, Quarter or Year.')
Пример #5
0
    def test_str_ordering(self):
        # Tests ordering of a string column as strings
        self.column_pval.value = 'name'
        self.column_pval.save()
        # dtype is string
        self.dtype_pval.value = 0
        self.dtype_pval.save()

        # If direction is "Select", NOP
        self.direction_pval.value = 0
        self.direction_pval.save()
        out = execute_nocache(self.wf_module)
        self.assertTrue(out.equals(self.table))

        # If direction is "Ascending"
        self.direction_pval.value = 1
        self.direction_pval.save()
        out = execute_nocache(self.wf_module)
        ref_order = [1, 0, 3, 2]
        ref_table = reorder_table(self.table, ref_order)
        self.assertTrue(out.equals(ref_table))

        # If direction is "Descending"
        self.direction_pval.value = 2
        self.direction_pval.save()
        out = execute_nocache(self.wf_module)
        ref_order = [2, 0, 3, 1]
        ref_table = reorder_table(self.table, ref_order)
        self.assertTrue(out.equals(ref_table))

        # Tests ordering of a numeric column as strings
        self.column_pval.value = 'float'
        self.column_pval.save()
        # dtype is string
        self.dtype_pval.value = 0
        self.dtype_pval.save()
        # We only test Ascending here; others have been covered above
        self.direction_pval.value = 1
        self.direction_pval.save()
        out = execute_nocache(self.wf_module)
        ref_order = [0, 1, 3, 2]
        ref_table = reorder_table(self.table, ref_order)
        self.assertTrue(out.equals(ref_table))

        # Test ordering of a date column as string,
        # using SortFromTable's render() directly
        mock_sort = MockModule({
            'column': 'date',
            'dtype': 0,
            # We only test Ascending here; others have been covered above
            'direction': 1
        })
        out = SortFromTable.render(mock_sort, self.dates_table.copy())
        ref_order = [2, 0, 1, 3]
        ref_table = reorder_table(self.dates_table, ref_order)
        self.assertTrue(out.equals(ref_table))
Пример #6
0
    def test_bad_dates(self):
        # integers are not dates
        set_string(self.col_pval,'Amount')
        out = execute_nocache(self.wf_module)
        self.wf_module.refresh_from_db()
        self.assertEqual(self.wf_module.status, WfModule.ERROR)

        # Weird strings are not dates (different error code path)
        set_string(self.col_pval, 'Foo')
        out = execute_nocache(self.wf_module)
        self.wf_module.refresh_from_db()
        self.assertEqual(self.wf_module.status, WfModule.ERROR)
Пример #7
0
    def test_bad_colname(self):
        # NOP if no column given
        set_string(self.col_pval, '')
        out = execute_nocache(self.wf_module)
        self.wf_module.refresh_from_db()
        self.assertEqual(self.wf_module.status, WfModule.READY)
        self.assertFalse(out.empty)

        # bad column name should produce error
        set_string(self.col_pval,'hilarious')
        out = execute_nocache(self.wf_module)
        self.wf_module.refresh_from_db()
        self.assertEqual(self.wf_module.status, WfModule.ERROR)
Пример #8
0
    def test_time_only(self):
        set_string(self.csv_data, self.count_csv_time)
        set_string(self.col_pval, 'Date')

        execute_nocache(self.wf_module)
        self.wf_module.refresh_from_db()
        self.assertEqual(self.wf_module.status, 'error')
        self.assertEqual(self.wf_module.error_msg, 'The column \'Date\' only contains time values. Please group by Hour, Minute or Second.')

        # Set to hours
        set_integer(self.group_pval, 2)
        out = execute_nocache(self.wf_module)
        self.wf_module.refresh_from_db()
        self.assertEqual(out.to_csv(index=False), 'Date,count\n00:00,3\n01:00,1\n11:00,2\n12:00,1\n')
Пример #9
0
    def test_scrape_list(self):
        source_options = "List|Input column".split('|')
        source_pval = get_param_by_id_name('urlsource')
        source_pval.value = source_options.index('List')
        source_pval.save()

        get_param_by_id_name('urllist').set_value('\n'.join([
            'http://a.com/file',
            'https://b.com/file2',
            'c.com/file/dir' # Removed 'http://' to test the URL-fixing part
        ]))

        # Code below mostly lifted from the column test
        async def mock_scrapeurls(urls, table):
            table['status'] = self.scraped_table['status']
            table['html'] = self.scraped_table['html']
            return

        URLScraper._mynow = lambda: testnow

        with mock.patch('server.modules.urlscraper.scrape_urls') as scraper:
            scraper.side_effect = mock_scrapeurls # call the mock function instead, the real fn is tested above

            self.press_fetch_button()
            out = execute_nocache(self.wfmodule)
            self.assertTrue(out.equals(self.scraped_table))
Пример #10
0
 def test_reorder(self):
     # In chronological order
     reorder_ops = [
         {
             'column': 'count',
             'from': 2,
             'to': 0
         },  # gives ['count', 'name', 'date', 'float']
         {
             'column': 'name',
             'from': 1,
             'to': 3
         },  # gives ['count', 'date', 'name', 'float']
         {
             'column': 'float',
             'from': 3,
             'to': 2
         },  # gives ['count', 'date', 'float', 'name']
     ]
     self.history_pval.value = json.dumps(reorder_ops)
     self.history_pval.save()
     out = execute_nocache(self.wf_module)
     ref_cols = ['count', 'date', 'float', 'name']
     self.assertEqual(out.columns.tolist(), ref_cols)
     for col in ref_cols:
         self.assertTrue(out[col].equals(self.table[col]))
Пример #11
0
 def test_corrupt_reorder(self):
     # If an input column is removed (e.g. via select columns)
     # then the entire reorder history becomes incoherent
     # and the module should report error
     reorder_ops = [
         {
             'column': 'count',
             'from': 2,
             'to': 0
         },  # gives ['count', 'name', 'date', 'float']
         {
             'column': 'nonexistent-name',
             'from': 1,
             'to': 3
         },  # invalid
         {
             'column': 'float',
             'from': 3,
             'to': 2
         },
     ]
     self.history_pval.value = json.dumps(reorder_ops)
     self.history_pval.save()
     _ = execute_nocache(self.wf_module)
     self.wf_module.refresh_from_db()
     self.assertEqual(self.wf_module.status, WfModule.ERROR)
Пример #12
0
    def test_numeric_ordering(self):
        # Test ordering of a numeric column as numeric
        self.column_pval.value = 'float'
        self.column_pval.save()
        # dtype is number
        self.dtype_pval.value = 1
        self.dtype_pval.save()

        # If direction is "Select", NOP
        self.direction_pval.value = 0
        self.direction_pval.save()
        out = execute_nocache(self.wf_module)
        self.assertTrue(out.equals(self.table))

        # If direction is "Ascending"
        self.direction_pval.value = 1
        self.direction_pval.save()
        out = execute_nocache(self.wf_module)
        ref_order = [1, 3, 0, 2]
        ref_table = reorder_table(self.table, ref_order)
        self.assertTrue(out.equals(ref_table))

        # If direction is "Descending"
        self.direction_pval.value = 2
        self.direction_pval.save()
        out = execute_nocache(self.wf_module)
        ref_order = [0, 3, 1, 2]
        ref_table = reorder_table(self.table, ref_order)
        self.assertTrue(out.equals(ref_table))

        # Test ordering of a string column as numeric
        self.column_pval.value = 'count'
        self.column_pval.save()
        # dtype is number
        self.dtype_pval.value = 1
        self.dtype_pval.save()
        # We only test Ascending here; others have been covered above
        self.direction_pval.value = 1
        self.direction_pval.save()
        out = execute_nocache(self.wf_module)
        ref_order = [0, 3, 2, 1]
        ref_table = reorder_table(self.table, ref_order)
        self.assertTrue(out.equals(ref_table))
Пример #13
0
 def test_nop_with_initial_col_selection(self):
     # When a column is first selected and no scraping is performed, the initial table should be returned
     source_options = "List of URLs|Load from column".split('|')
     source_pval = get_param_by_id_name('urlsource')
     source_pval.value = source_options.index('Load from column')
     source_pval.save()
     column_pval = get_param_by_id_name('urlcol')
     column_pval.value = 'url'
     column_pval.save()
     out = execute_nocache(self.wfmodule)
     self.assertTrue(out.equals(self.url_table))
Пример #14
0
    def test_render(self):
        # Replace the output with our own data

        code = "columns = ['A','B', 'C']\ndata = np.array([np.arange(5)]*3).T\nreturn pd.DataFrame(columns=columns, data=data)"
        self.code_pval.value = code
        self.code_pval.save()

        out = execute_nocache(self.wf_module)
        self.assertEqual(
            str(out),
            "   A  B  C\n0  0  0  0\n1  1  1  1\n2  2  2  2\n3  3  3  3\n4  4  4  4"
        )
Пример #15
0
    def test_render_edit(self):
        # Perform a single edit on a string
        self.column_pval.value = 'name'
        self.column_pval.save()
        self.edits.append({
            'type': 'change',
            'column': 'name',
            'content': {
                'fromVal': 'Dolores',
                'toVal': 'Wyatt'
            }
        })
        self.edits_pval.value = json.dumps(self.edits)
        self.edits_pval.save()
        out = execute_nocache(self.wf_module)
        ref_table = self.table.copy()
        ref_table.loc[ref_table['name'] == 'Dolores', 'name'] = 'Wyatt'
        self.assertTrue(out.equals(ref_table))

        # Perform a single edit on a number
        self.column_pval.value = 'count'
        self.column_pval.save()
        # Content are all strings as this is what we get from UI
        self.edits = [{
            'type': 'change',
            'column': 'count',
            'content': {
                'fromVal': '5',
                'toVal': '4'
            }
        }]
        self.edits_pval.value = json.dumps(self.edits)
        self.edits_pval.save()
        out = execute_nocache(self.wf_module)
        ref_table = self.table.copy()
        ref_table.loc[ref_table['count'] == 5, 'count'] = 4
        self.assertTrue(out.equals(ref_table))
Пример #16
0
    def test_first_row_is_header(self):
        url = 'http://test.com/tablepage.html'
        self.url_pval.set_value(url)
        self.url_pval.save()
        self.first_row_pval.set_value(True)
        self.first_row_pval.save()

        with mock.patch('pandas.read_html') as readmock:
            readmock.return_value = [mock_csv_table]
            self.press_fetch_button()
            self.assertEqual(readmock.call_args, mock.call(url, flavor='html5lib') )

        out = execute_nocache(self.wfmodule)
        self.assertListEqual(list(out.columns), [str(x) for x in mock_csv_table.iloc[0,:]])
        self.assertEqual(len(out), len(mock_csv_table)-1)
Пример #17
0
    def test_spaces_to_underscores(self):
        # column names with spaces should be referenced with underscores in the formula
        underscore_csv = 'Month,The Amount,Name\nJan,10,Alicia Aliciason\nFeb,666,Fred Frederson'
        underscore_table = pd.read_csv(io.StringIO(underscore_csv))

        workflow = create_testdata_workflow(underscore_csv)
        wfm = load_and_add_module('formula', workflow=workflow)
        pval = get_param_by_id_name('formula_python', wf_module=wfm)
        pval.set_value('The_Amount*2')
        sval = get_param_by_id_name('syntax', wf_module=wfm)
        sval.set_value(1)

        out = execute_nocache(wfm)

        table = underscore_table.copy()
        table['formula output'] = table['The Amount']*2
        table['formula output'] = table['formula output'].astype(object)
        self.assertTrue(out.equals(table))
Пример #18
0
    def test_scrape_table(self):
        url = 'http://test.com/tablepage.html'
        self.url_pval.set_value(url)
        self.url_pval.save()

        # should be no data saved yet, no Deltas on the workflow
        self.assertIsNone(self.wfmodule.get_fetched_data_version())
        self.assertIsNone(self.wfmodule.retrieve_fetched_table())
        self.assertIsNone(self.wfmodule.workflow.last_delta)

        with mock.patch('pandas.read_html') as readmock:
            readmock.return_value = [mock_csv_table]
            self.press_fetch_button()
            self.assertEqual(readmock.call_args, mock.call(url, flavor='html5lib') )

        out = execute_nocache(self.wfmodule)
        self.assertTrue(out.equals(mock_csv_table))

        # should create a new data version on the WfModule, and a new delta representing the change
        self.wfmodule.refresh_from_db()
        self.wfmodule.workflow.refresh_from_db()
        self.assertIsNotNone(self.wfmodule.get_fetched_data_version())
        self.assertIsNotNone(self.wfmodule.workflow.last_delta)
Пример #19
0
    def test_count(self):
        # sort by value.
        # Use out.to_csv() instead of str(out) to ensure rows are output in index order (otherwise variable)
        set_string(self.col_pval, 'Date')

        out = execute_nocache(self.wf_module)
        self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01-10,5\n2011-01-15,1\n2016-07-25,1\n')

        # sort by date & set groupby to 'seconds'
        set_integer(self.group_pval, 0)  # 0 = group by seconds
        out = execute_nocache(self.wf_module)
        self.assertEqual(out.to_csv(index=False),
                         'Date,count\n2011-01-10 00:00:00,1\n2011-01-10 00:00:01,2\n2011-01-10 00:01:00,1\n2011-01-10 01:00:00,1\n2011-01-15 00:00:00,1\n2016-07-25 00:00:00,1\n')

        # sort by date & set groupby to 'minutes'
        set_integer(self.group_pval, 1)  # 0 = group by minutes
        out = execute_nocache(self.wf_module)
        self.assertEqual(out.to_csv(index=False),
                         'Date,count\n2011-01-10 00:00,3\n2011-01-10 00:01,1\n2011-01-10 01:00,1\n2011-01-15 00:00,1\n2016-07-25 00:00,1\n')

        # sort by date & set groupby to 'hours'
        set_integer(self.group_pval, 2)  # 0 = group by minutes
        out = execute_nocache(self.wf_module)
        self.assertEqual(out.to_csv(index=False),
                         'Date,count\n2011-01-10 00:00,4\n2011-01-10 01:00,1\n2011-01-15 00:00,1\n2016-07-25 00:00,1\n')

        # sort by date & set groupby to 'months'
        set_integer(self.group_pval, 4)  # 4 = group by months
        out = execute_nocache(self.wf_module)
        self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01,6\n2016-07,1\n')

        # sort by date & set groupby to 'quarters'
        set_integer(self.group_pval, 5)  # 4 = group by quarters
        out = execute_nocache(self.wf_module)
        self.assertEqual(out.to_csv(index=False), 'Date,count\n2011 Q1,6\n2016 Q3,1\n')

        # sort by date & set groupby to 'years'
        set_integer(self.group_pval, 6)  # 6 = group by years
        out = execute_nocache(self.wf_module)
        self.assertEqual(out.to_csv(index=False), 'Date,count\n2011,6\n2016,1\n')
Пример #20
0
    def test_scrape_column(self):
        source_options = "List|Input column".split('|')
        source_pval = get_param_by_id_name('urlsource')
        source_pval.value = source_options.index('Input column')
        source_pval.save()

        get_param_by_id_name('urlcol').set_value('url')

        # modifies the table in place to add results, just like the real thing
        async def mock_scrapeurls(urls, table):
            table['status'] = self.scraped_table['status']
            table['html'] = self.scraped_table['html']
            return

        # can't mock datetime.datetime.now with a patch because it's builtin or something, sigh
        URLScraper._mynow = lambda: testnow

        with mock.patch('server.modules.urlscraper.scrape_urls') as scraper:
            scraper.side_effect = mock_scrapeurls # call the mock function instead, the real fn is tested above

            self.press_fetch_button()
            out = execute_nocache(self.wfmodule)
            self.assertTrue(out.equals(self.scraped_table))
Пример #21
0
 def test_initial_nop(self):
     out = execute_nocache(self.wfmodule)
     self.assertTrue(out.equals(self.url_table))
Пример #22
0
 def test_timestamps(self):
     set_string(self.csv_data, self.count_csv_dates)
     set_string(self.col_pval, 'Date')
     out = execute_nocache(self.wf_module)
     self.wf_module.refresh_from_db()
     self.assertEqual(out.to_csv(index=False), 'Date,count\n2011-01-10,5\n2011-01-15,1\n2016-07-25,1\n')
Пример #23
0
 def test_reorder_empty(self):
     self.history_pval.value = ' '
     self.history_pval.save()
     out = execute_nocache(self.wf_module)
     self.assertTrue(out.equals(self.table))
Пример #24
0
 def test_tsv(self):
     set_string(self.csv_pval, self.count_tsv)
     out = execute_nocache(self.wf_module)
     self.assertTrue(out.equals(self.table))
Пример #25
0
    def test_excel_formula(self):
        # We have custom range handling logic and syntax, so this test exercises many types of ranges
        self.syntax_pval.set_value(0)
        self.syntax_pval.save()
        table = mock_csv_table.copy()

        # simple single-column reference
        self.excel_pval.set_value('=B*2')
        self.excel_pval.save()

        # empty result parameter should produce 'result'
        self.rpval.value = ''
        self.rpval.save()
        table['result'] = table['Amount'] * 2
        table['result'] = table['result'].astype(object)
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # simple single-column reference
        self.excel_pval.set_value('=B*2')
        self.excel_pval.save()

        table = mock_csv_table.copy()
        self.rpval.value = 'output'
        self.rpval.save()

        table['output'] = table['Amount'] * 2
        table['output'] = table['output'].astype(object)
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # simple single-column reference
        self.excel_pval.set_value('=B1*2')
        self.excel_pval.save()
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # formula with range should grab the right values and compute them
        self.excel_pval.set_value('=SUM(B:C)')
        self.excel_pval.save()
        table['output'] = table['Amount'] + table['Amount2']
        table['output'] = table['output'].astype(object)
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # same formula with B1 and C1 should still work
        self.excel_pval.set_value('=SUM(B1:C1)')
        self.excel_pval.save()
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # same formula with B and C1 should still work
        self.excel_pval.set_value('=SUM(B:C1)')
        self.excel_pval.save()
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # text formula
        self.excel_pval.set_value('=LEFT(D,5)')
        self.excel_pval.save()
        table['output'] = table['Name'].apply(lambda x: x[:5])
        table['output'] = table['output'].astype(object)
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.READY)
        self.assertTrue(out.equals(table))

        # bad formula should produce error
        self.excel_pval.set_value('=SUM B:C')
        self.excel_pval.save()
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.ERROR)
        self.assertTrue(out.equals(mock_csv_table))  # NOP on error

        # out of range selector should produce error
        self.excel_pval.set_value('=SUM(B:ZZ)')
        self.excel_pval.save()
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.ERROR)
        self.assertTrue(out.equals(mock_csv_table))  # NOP on error

        # selector with a 0 should produce an error
        self.excel_pval.set_value('=SUM(B0)')
        self.excel_pval.save()
        out = execute_nocache(self.wfmodule)
        self.wfmodule.refresh_from_db()
        self.assertEqual(self.wfmodule.status, WfModule.ERROR)
        self.assertTrue(out.equals(mock_csv_table))  # NOP on error
Пример #26
0
 def test_empty(self):
     set_string(self.csv_pval, '')
     out = execute_nocache(self.wf_module)
     self.assertTrue(out.equals(pd.DataFrame()))  # No input, no output