def get_rows(self, facets=None, sort_by=None, start=0, limit=10): if facets: self.engine.set_facets(facets) if sort_by is None: self.sorting = facet.Sorting([]) elif sort_by is not None: self.sorting = facet.Sorting(sort_by) response = self.do_json('get-rows', params={'start': start, 'limit': limit}, data={'sorting': self.sorting.as_json()}) return self.rows_response_factory(response)
def test_web_scraping(self): # Section "6. Web Scraping" # {1}, {2} self.project.split_column('key', separator=':') self.assertInResponse('Split 5409 cell(s) in column key') self.project.rename_column('key 1', 'page') self.assertInResponse('Rename column key 1 to page') self.project.rename_column('key 2', 'top') self.assertInResponse('Rename column key 2 to top') self.project.move_column('line', 'end') self.assertInResponse('Move column line to position 2') # {3} self.project.sorting = facet.Sorting([ {'column': 'page', 'valueType': 'number'}, {'column': 'top', 'valueType': 'number'}, ]) self.project.reorder_rows() self.assertInResponse('Reorder rows') first_row = self.project.get_rows(limit=1).rows[0] self.assertEqual(first_row['page'], 1) self.assertEqual(first_row['top'], 24) # {4} filter_facet = facet.TextFilterFacet('line', 'ahman') rows = self.project.get_rows(filter_facet).rows self.assertEqual(len(rows), 1) self.assertEqual(rows[0]['top'], 106) filter_facet.query = 'alvarez' rows = self.project.get_rows().rows self.assertEqual(len(rows), 2) self.assertEqual(rows[-1]['top'], 567) self.project.engine.remove_all() # {5} - tutorial says 'line'; it means 'top' line_facet = facet.NumericFacet('top') line_facet.to = 100 self.project.remove_rows(line_facet) self.assertInResponse('Remove 775 rows') line_facet.From = 570 line_facet.to = 600 self.project.remove_rows(line_facet) self.assertInResponse('Remove 71 rows') line_facet.reset() response = self.project.get_rows() self.assertEqual(response.filtered, 4563) # {6} page_facet = facet.TextFacet('page', 1) # 1 not '1' self.project.engine.add_facet(page_facet) # {7} rows = self.project.get_rows().rows # Look for a row with a name in it by skipping HTML name_row = [row for row in rows if '<b>' not in row['line']][0] self.assertTrue('WELLNESS' in name_row['line']) self.assertEqual(name_row['top'], 161) line_facet.From = 20 line_facet.to = 160 self.project.remove_rows() self.assertInResponse('Remove 9 rows') self.project.engine.remove_all() # {8} self.project.text_transform('line', expression=self.filter_expr_1) self.assertInResponse('Text transform on 4554 cells in column line')
def reorder_rows(self, sort_by=None): if sort_by is not None: self.sorting = facet.Sorting(sort_by) response = self.do_json('reorder-rows', params={'sorting': self.sorting.as_json()}) # clear sorting # self.sorting = facet.Sorting() return response
def __init__(self, server, project_id=None): if not isinstance(server, RefineServer): if '/project?project=' in server: server, project_id = server.split('/project?project=') server = RefineServer(server) elif re.match(r'\d+$', server): # just digits => project ID server, project_id = RefineServer(), server else: server = RefineServer(server) self.server = server if not project_id: raise Exception('Missing Refine project ID') self.project_id = project_id self.engine = facet.Engine() self.sorting = facet.Sorting() self.history_entry = None # following filled in by get_models() self.key_column = None self.has_records = False self.columns = None self.column_order = {} # map of column names to order in UI self.rows_response_factory = None # for parsing get_rows() self.get_models() # following filled in by get_reconciliation_services self.recon_services = None