def test_scrape_list(self):
        wf_module = MockWfModule(
            'List',
            '',
            '\n'.join([
                'http://a.com/file',
                'https://b.com/file2',
                'c.com/file/dir'  # Removed 'http://' to test the URL-fixing part
            ]))

        scraped_table = simple_result_table.copy()

        # Code below mostly lifted from the column test
        async def mock_scrapeurls(urls, table):
            table['status'] = scraped_table['status']
            table['html'] = scraped_table['html']
            return

        with patch('django.utils.timezone.now') as now:
            now.return_value = testnow

            with patch('server.modules.urlscraper.scrape_urls') as scrape:
                # call the mock function instead, the real fn is tested above
                scrape.side_effect = mock_scrapeurls

                fetch(wf_module)
                result = URLScraper.render(wf_module, pd.DataFrame())
                result = ProcessResult.coerce(result)
                self.assertEqual(result, ProcessResult(scraped_table))
    def test_scrape_column(self):
        wf_module = MockWfModule('Input column', 'url')

        scraped_table = simple_result_table.copy()

        # modifies the table in place to add results, just like the real thing
        async def mock_scrapeurls(urls, table):
            table['status'] = scraped_table['status']
            table['html'] = scraped_table['html']
            return

        with patch('django.utils.timezone.now') as now:
            now.return_value = testnow

            with patch('server.modules.urlscraper.scrape_urls') as scrape:
                # call the mock function instead, the real fn is tested above
                scrape.side_effect = mock_scrapeurls

                wf_module.previous = MockWfModule('', '')
                wf_module.previous.cached_render_result = \
                    MockCachedRenderResult(pd.DataFrame({
                        'url': self.urls,
                    }))

                fetch(wf_module)
                result = URLScraper.render(wf_module, pd.DataFrame())
                result = ProcessResult.coerce(result)
                self.assertEqual(result, ProcessResult(scraped_table))
Пример #3
0
def render(has_header, table, fetch_result):
    x = UploadFile.render(MockParams(has_header=has_header),
                          table,
                          fetch_result=fetch_result)
    result = ProcessResult.coerce(x)
    result.sanitize_in_place()
    return result
Пример #4
0
 def test_coerce_dict_with_quickfix_dict(self):
     dataframe = DataFrame({'A': [1, 2]})
     quick_fix = QuickFix('Hi', 'prependModule',
                          ['texttodate', {
                              'column': 'created_at'
                          }])
     result = ProcessResult.coerce({
         'dataframe':
         dataframe,
         'error':
         'an error',
         'json': {
             'foo': 'bar'
         },
         'quick_fixes': [
             {
                 'text': 'Hi',
                 'action': 'prependModule',
                 'args': ['texttodate', {
                     'column': 'created_at'
                 }],
             },
         ]
     })
     expected = ProcessResult(dataframe,
                              'an error',
                              json={'foo': 'bar'},
                              quick_fixes=[quick_fix])
     self.assertEqual(result, expected)
Пример #5
0
    def _call_method(self, method_name: str, *args, **kwargs) -> ProcessResult:
        """
        Call module.method_name(*params, **kwargs) and coerce result.

        Exceptions become error messages. This method cannot produce an
        exception.
        """
        method = getattr(self.module, method_name)
        try:
            out = method(*args, **kwargs)
        except Exception as e:
            from server.importmodulefromgithub import original_module_lineno
            # Catch exceptions in the module render function, and return error
            # message + line number to user
            exc_name = type(e).__name__
            exc_type, exc_obj, exc_tb = sys.exc_info()
            # [1] = where the exception ocurred, not the render() just above
            tb = traceback.extract_tb(exc_tb)[1]
            fname = os.path.split(tb[0])[1]
            lineno = original_module_lineno(tb[1])
            error = f'{exc_name}: {str(e)} at line {lineno} of {fname}'
            return ProcessResult(error=error)

        out = ProcessResult.coerce(out)
        out.truncate_in_place_if_too_big()
        out.sanitize_in_place()
        return out
Пример #6
0
 def test_coerce_dict_bad_quickfix_dict(self):
     with self.assertRaises(ValueError):
         ProcessResult.coerce({
             'error':
             'an error',
             'json': {
                 'foo': 'bar'
             },
             'quick_fixes': [
                 {
                     'text': 'Hi',
                     'action': 'prependModule',
                     'arguments': ['texttodate', {
                         'column': 'created_at'
                     }],
                 },
             ]
         })
Пример #7
0
 def test_first_row_is_header(self):
     # TODO make fetch_result _not_ a pd.DataFrame, so we don't lose info
     # when converting types here
     fetch_result = ProcessResult(pd.DataFrame(a_table.copy()))
     result = ScrapeTable.render(P(first_row_is_header=True),
                                 pd.DataFrame(),
                                 fetch_result=fetch_result)
     result = ProcessResult.coerce(result)
     expected = pd.DataFrame({'1': [2], '2': [3]})
     self.assertEqual(result, ProcessResult(pd.DataFrame(expected)))
Пример #8
0
    def test_render_empty_without_columns(self):
        # An empty table might be stored as zero-column. This is a bug, but we
        # must handle it because we have actual data like this. We want to
        # output all the same columns as a tweet table.
        result = Twitter.render(P(querytype=1, query='cat'),
                                pd.DataFrame(),
                                fetch_result=ProcessResult(pd.DataFrame()))
        result = ProcessResult.coerce(result)

        assert_frame_equal(result.dataframe, mock_tweet_table[0:0])
Пример #9
0
def _module_dispatch_render_static(dispatch, params, table, fetch_result):
    try:
        result = dispatch.render(params, table, fetch_result=fetch_result)
    except Exception as err:
        traceback.print_exc()
        result = ProcessResult(error=f'Internal error: {err}')

    result = ProcessResult.coerce(result)
    result.truncate_in_place_if_too_big()
    result.sanitize_in_place()
    return result
Пример #10
0
def render(wf_module):
    if hasattr(wf_module, 'fetch_result'):
        fetch_result = wf_module.fetch_result
    else:
        fetch_result = None

    result = ScrapeTable.render(wf_module.get_params(),
                                pd.DataFrame(),
                                fetch_result=fetch_result)
    result = ProcessResult.coerce(result)
    return result
Пример #11
0
    def _test(self, table: pd.DataFrame, params: Dict[str, Any]={},
              expected_table: pd.DataFrame=pd.DataFrame(),
              expected_error: str=''):
        result = ProcessResult.coerce(Formula.render(P(**params), table))
        result.sanitize_in_place()

        expected = ProcessResult(expected_table, expected_error)
        expected.sanitize_in_place()

        self.assertEqual(result.error, expected.error)
        assert_frame_equal(result.dataframe, expected.dataframe)
Пример #12
0
def test_render(in_table, patch_json, out_table=pd.DataFrame(), out_error=''):
    sanitize_dataframe(in_table)

    result = EditCells.render(MockParams(celledits=patch_json), in_table)
    result = ProcessResult.coerce(result)
    result.sanitize_in_place()

    expected = ProcessResult(out_table, out_error)
    expected.sanitize_in_place()

    assert result.error == expected.error
    assert_frame_equal(result.dataframe, expected.dataframe)
Пример #13
0
def test_render(in_table, patch_json, out_table=pd.DataFrame(),
                out_error=''):
    wfm = MockWfModule(patch_json)
    sanitize_dataframe(in_table)

    result = ProcessResult.coerce(EditCells.render(wfm, in_table))
    result.sanitize_in_place()

    expected = ProcessResult(out_table, out_error)
    expected.sanitize_in_place()

    assert result.error == expected.error
    assert_frame_equal(result.dataframe, expected.dataframe)
Пример #14
0
    def _test_refine_spec_apply(self, in_table: pd.DataFrame, column: str,
                                spec: RefineSpec,
                                expected_out: pd.DataFrame=pd.DataFrame(),
                                expected_error: str='') -> None:
        """Render and assert the output is as expected."""
        result = ProcessResult.coerce(spec.apply(in_table, column))
        # Sanitize result+expected, so if sanitize changes these tests may
        # break (which is what we want).
        result.sanitize_in_place()

        expected = ProcessResult(expected_out, expected_error)
        expected.sanitize_in_place()

        self.assertEqual(result.error, expected.error)
        assert_frame_equal(result.dataframe, expected.dataframe)
Пример #15
0
def render(reorder_history_json, table):
    wf_module = MockWfModule(reorder_history_json)
    return ProcessResult.coerce(ReorderFromTable.render(wf_module,
                                                        table.copy()))
def render(wf_module, table):
    return ProcessResult.coerce(CountByDate.render(wf_module, table))
Пример #17
0
def render(wf_module, table):
    result = DuplicateColumn.render(wf_module, table)
    result = ProcessResult.coerce(result)
    result.sanitize_in_place()  # important: duplicate makes colname conflicts
    return result
Пример #18
0
 def test_coerce_tuple_dataframe_none_dict(self):
     df = DataFrame({'foo': ['bar']})
     expected = ProcessResult(df, '', json={'a': 'b'})
     result = ProcessResult.coerce((df, None, {'a': 'b'}))
     self.assertEqual(result, expected)
Пример #19
0
 def test_coerce_tuple_dataframe_str(self):
     df = DataFrame({'foo': ['bar']})
     expected = ProcessResult(dataframe=df, error='hi')
     result = ProcessResult.coerce((df, 'hi'))
     self.assertEqual(result, expected)
Пример #20
0
 def test_coerce_tuple_none_str_dict(self):
     expected = ProcessResult(error='hi', json={'a': 'b'})
     result = ProcessResult.coerce((None, 'hi', {'a': 'b'}))
     self.assertEqual(result, expected)
Пример #21
0
 def test_coerce_tuple_dataframe_none_none(self):
     df = DataFrame({'foo': ['bar']})
     expected = ProcessResult(df)
     result = ProcessResult.coerce((df, None, None))
     self.assertEqual(result, expected)
Пример #22
0
def render(colnames, table):
    result = DuplicateColumn.render(MockParams(colnames=colnames), table)
    result = ProcessResult.coerce(result)
    result.sanitize_in_place()  # important: duplicate makes colname conflicts
    return result
Пример #23
0
def render(params, table):
    return ProcessResult.coerce(CountByDate.render(params, table))
Пример #24
0
 def test_coerce_tuple_none_none_dict(self):
     expected = ProcessResult(json={'a': 'b'})
     result = ProcessResult.coerce((None, None, {'a': 'b'}))
     self.assertEqual(result, expected)
Пример #25
0
 def test_coerce_tuple_none_str_none(self):
     expected = ProcessResult(error='hi')
     result = ProcessResult.coerce((None, 'hi', None))
     self.assertEqual(result, expected)
Пример #26
0
 def test_coerce_tuple_none_none_none(self):
     expected = ProcessResult()
     result = ProcessResult.coerce((None, None, None))
     self.assertEqual(result, expected)
Пример #27
0
 def test_coerce_bad_tuple(self):
     result = ProcessResult.coerce(('foo', 'bar', 'baz', 'moo'))
     self.assertIsNotNone(result.error)
Пример #28
0
def _module_dispatch_render_static(dispatch, wf_module, table):
    result = dispatch.render(wf_module, table)
    result = ProcessResult.coerce(result)
    result.truncate_in_place_if_too_big()
    result.sanitize_in_place()
    return result
Пример #29
0
 def test_coerce_2tuple_no_dataframe(self):
     result = ProcessResult.coerce(('foo', 'bar'))
     self.assertIsNotNone(result.error)
Пример #30
0
def render(reorder_history, table):
    params = MockParams(reorder_history=reorder_history)
    result = ReorderFromTable.render(params, table.copy())
    return ProcessResult.coerce(result)