def _get_df_and_columns( workflow: Workflow, pk: int, ) -> Optional[Tuple[DataFrame, List, View]]: """Get the DF and the columns to process. :param workflow: Workflow object :param pk: Optional id for a view :return: Tuple DataFrame, List of columns (None, None if error """ # If a view is given, filter the columns view = None if pk: view = workflow.views.filter(pk=pk).first() if not view: # View not found. Redirect to workflow detail return None columns_to_view = view.columns.filter(is_key=False) df = load_table(workflow.get_data_frame_table_name(), [col.name for col in columns_to_view], view.formula) else: # No view given, fetch the entire data frame columns_to_view = workflow.columns.filter(is_key=False) df = load_table(workflow.get_data_frame_table_name(), [col.name for col in columns_to_view]) return df, columns_to_view, view
def test_table_pandas_merge_to_outer_NaN(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] age = workflow.columns.filter(name='age')[0] age.is_key = False age.save() email = workflow.columns.filter(name='email')[0] email.is_key = False email.save() # Drop the column with booleans because the data type is lost workflow_delete_column( workflow, workflow.columns.get(name='registered') ) # Transform new table into string r_df = pd.DataFrame(self.src_df2) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) new_df = pd.merge(df, r_df, how="outer", left_on="sid", right_on="sid") # Get the data through the API response = self.client.put( reverse('table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": df_to_string(r_df), "how": "outer", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 4) self.assertEqual(workflow.ncols, 8) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) # Compare both elements and check wf df consistency self.compare_tables(df, new_df) # Check for df/wf consistency self.assertTrue(check_wf_df(workflow))
def test_table_pandas_update(self): # Get the only workflow in the fixture workflow = Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.new_table) r_df = detect_datetime_columns(r_df) # Upload a new table response = self.client.put( reverse( 'table:api_pops', kwargs={'wid': workflow.id}), {'data_frame': df_to_string(r_df)}, format='json') # Refresh wflow (has been updated) workflow = Workflow.objects.get(id=workflow.id) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, df) # Check that the rest of the # information is correct workflow = Workflow.objects.get(id=workflow.id) self.assertTrue(check_wf_df(workflow))
def test_table_json_create(self): # Create a second workflow response = self.client.post( reverse('workflow:api_workflows'), {'name': test.wflow_name + '2', 'attributes': {'one': 'two'}}, format='json') # Get the only workflow in the fixture workflow = Workflow.objects.get(id=response.data['id']) # Upload the table response = self.client.post( reverse('table:api_ops', kwargs={'wid': workflow.id}), {'data_frame': self.new_table}, format='json') # Refresh wflow (has been updated) workflow = Workflow.objects.get(id=workflow.id) # Load the df from the db df = load_table(workflow.get_data_frame_table_name()) # Transform new table into data frame r_df = pd.DataFrame(self.new_table) r_df = detect_datetime_columns(r_df) # Compare both elements self.compare_tables(r_df, df) # Check that the rest of the information is correct self.assertTrue(check_wf_df(workflow))
def test_table_JSON_merge_to_left(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] age = workflow.columns.filter(name='age')[0] age.is_key = False age.save() email = workflow.columns.filter(name='email')[0] email.is_key = False email.save() # Get the data through the API self.client.put(reverse('table:api_merge', kwargs={'wid': workflow.id}), { "src_df": self.src_df, "how": "left", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = models.Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 3) dframe = pandas.load_table(workflow.get_data_frame_table_name()) self.assertEqual(dframe[dframe['sid'] == 1]['newcol'].values[0], self.src_df['newcol'][0])
def test_table_pandas_create(self): # Create a second workflow response = self.client.post(reverse('workflow:api_workflows'), { 'name': tests.wflow_name + '2', 'attributes': { 'one': 'two' } }, format='json') # Get the only workflow in the fixture workflow = models.Workflow.objects.get(id=response.data['id']) # Transform new table into a data frame r_df = pd.DataFrame(self.new_table) r_df = pandas.detect_datetime_columns(r_df) # Upload the table self.client.post(reverse('table:api_pops', kwargs={'wid': workflow.id}), {'data_frame': serializers.df_to_string(r_df)}, format='json') # Refresh wflow (has been updated) workflow = models.Workflow.objects.get(id=workflow.id) # Load the df from the db dframe = pandas.load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, dframe)
def workflow_restrict_column(column: Column) -> Optional[str]: """Set category of the column to the existing set of values. Given a workflow and a column, modifies the column so that only the values already present are allowed for future updates. :param column: Column object to restrict :return: String with error or None if correct """ # Load the data frame data_frame = load_table(column.workflow.get_data_frame_table_name()) cat_values = set(data_frame[column.name].dropna()) if not cat_values: # Column has no meaningful values. Nothing to do. return _('Column has no meaningful values') # Set categories column.set_categories(list(cat_values)) column.save() # Re-evaluate the operands in the workflow column.workflow.set_query_builder_ops() column.workflow.save() # Correct execution return None
def restrict_column(user, column: models.Column): """Set category of the column to the existing set of values. Given a workflow and a column, modifies the column so that only the values already present are allowed for future updates. :param user: User executing this action :param column: Column object to restrict :return: String with error or None if correct """ # Load the data frame data_frame = pandas.load_table(column.workflow.get_data_frame_table_name()) cat_values = set(data_frame[column.name].dropna()) if not cat_values: raise errors.OnTaskColumnCategoryValueError( message=_('The column has no meaningful values')) # Set categories column.set_categories(list(cat_values)) # Re-evaluate the operands in the workflow column.log(user, models.Log.COLUMN_RESTRICT) column.workflow.set_query_builder_ops() column.workflow.save()
def _get_column_visualization_items( workflow: Workflow, column: Column, ): """Get the visualization items (scripts and HTML) for a column. :param workflow: Workflow being processed :param column: Column requested :return: Tuple stat_data with descriptive stats, visualization scripts and visualization HTML """ # Get the dataframe df = load_table(workflow.get_data_frame_table_name(), [column.name]) # Extract the data to show at the top of the page stat_data = get_column_statistics(df[column.name]) vis_scripts = [] visualizations = _get_column_visualisations( column, df[[column.name]], vis_scripts, context={ 'style': 'width:100%; height:100%;' + 'display:inline-block;' }, ) return stat_data, vis_scripts, visualizations
def test_table_pandas_merge_to_left(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] # Transform new table into string r_df = pd.DataFrame(self.src_df) # Get the data through the API self.client.put(reverse('table:api_pmerge', kwargs={'wid': workflow.id}), { "src_df": serializers.df_to_string(r_df), "how": "left", "left_on": "sid", "right_on": "sid" }, format='json') # Get the new workflow workflow = models.Workflow.objects.all()[0] # Result should have three rows as the initial DF self.assertEqual(workflow.nrows, 3) dframe = pandas.load_table(workflow.get_data_frame_table_name()) self.assertEqual(dframe[dframe['sid'] == 1]['newcol'].values[0], self.src_df['newcol'][0])
def test_action_1_preview(self): """Test that first action renders correctly.""" self.workflow = Workflow.objects.all().first() self.user = get_user_model().objects.filter( email='*****@*****.**').first() attribute_value = list(self.workflow.attributes.values())[0] df = load_table(self.workflow.get_data_frame_table_name()) for action_name in [self.action_name1, self.action_name2]: action = self.workflow.actions.get(name=action_name) for index, row in df.iterrows(): condition_value = row['!#$%&()*+,-./\:;<=>?@[]^_`{|}~ 1'] < 12.5 # JSON request to obtain preview resp = self.get_response('action:preview', url_params={ 'pk': action.id, 'idx': index + 1 }, is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) self.assertTrue(attribute_value in str(resp.content)) self.assertEquals('Condition 1' in str(resp.content), condition_value) self.assertEquals('Condition 2' in str(resp.content), condition_value) self.assertEquals('Condition 3' in str(resp.content), condition_value) self.assertEquals('Condition 4' in str(resp.content), condition_value)
def test_02_second_plugin(self): # Login self.login('*****@*****.**') # GO TO THE WORKFLOW PAGE self.access_workflow_from_home_page('Plugin test') # Open the transform page self.go_to_transform() # Click in the second plugin element = self.search_table_row_by_string('transform-table', 1, 'Test Plugin 2 Name') element.find_element_by_link_text('Test Plugin 2 Name').click() WebDriverWait(self.selenium, 10).until( EC.presence_of_element_located((By.NAME, 'csrfmiddlewaretoken'))) # Spinner not visible WebDriverWait(self.selenium, 10).until_not( EC.visibility_of_element_located((By.ID, 'div-spinner'))) # Provide the execution data (input columns and merge key self.selenium.find_element_by_id( "id____ontask___upload_input_0").click() Select( self.selenium.find_element_by_id( "id____ontask___upload_input_0")).select_by_visible_text('A1') self.selenium.find_element_by_id( "id____ontask___upload_input_1").click() Select( self.selenium.find_element_by_id( "id____ontask___upload_input_1")).select_by_visible_text('A2') # merge key self.select_plugin_output_tab() self.selenium.find_element_by_id("id_merge_key").click() Select(self.selenium.find_element_by_id( "id_merge_key")).select_by_visible_text("email") # Submit the execution self.selenium.find_element_by_name("Submit").click() WebDriverWait(self.selenium, 10).until( EC.text_to_be_present_in_element((By.XPATH, "//body/div/h1"), 'Plugin scheduled for execution')) # There should be a message on that page self.assertTrue( self.selenium.find_element_by_xpath( "//div[@id='plugin-run-done']/div/a").text.startswith( 'You may check the status in log number')) # Assert the content of the dataframe wflow = models.Workflow.objects.get(name='Plugin test') df = pandas.load_table(wflow.get_data_frame_table_name()) self.assertTrue('RESULT 3' in set(df.columns)) self.assertTrue('RESULT 4' in set(df.columns)) self.assertTrue(df['RESULT 3'].equals(df['A1'] + df['A2'])) self.assertTrue(df['RESULT 4'].equals(df['A1'] - df['A2'])) # End of session self.logout()
def put( self, request: HttpRequest, wid: int, format=None, workflow: Optional[Workflow] = None, ) -> HttpResponse: """Process the put request.""" # Get the dst_df dst_df = load_table(workflow.get_data_frame_table_name()) serializer = self.serializer_class(data=request.data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) # Check that the parameters are correct how = serializer.validated_data['how'] if how == '' or how not in ['left', 'right', 'outer', 'inner']: raise APIException( _('how must be one of left, right, outer or inner')) left_on = serializer.validated_data['left_on'] if not is_unique_column(dst_df[left_on]): raise APIException( _('column {0} does not contain a unique key.').format(left_on)) # Operation has been accepted by the serializer src_df = serializer.validated_data['src_df'] right_on = serializer.validated_data['right_on'] if right_on not in list(src_df.columns): raise APIException( _('column {0} not found in data frame').format(right_on)) if not is_unique_column(src_df[right_on]): raise APIException( _('column {0} does not contain a unique key.').format( right_on)) merge_info = { 'how_merge': how, 'dst_selected_key': left_on, 'src_selected_key': right_on, 'initial_column_names': list(src_df.columns), 'rename_column_names': list(src_df.columns), 'columns_to_upload': [True] * len(list(src_df.columns)), } # Ready to perform the MERGE try: perform_dataframe_upload_merge(workflow, dst_df, src_df, merge_info) except Exception as exc: raise APIException( _('Unable to perform merge operation: {0}').format(str(exc))) # Merge went through. return Response(serializer.data, status=status.HTTP_201_CREATED)
def upload_step_four( request: http.HttpRequest, workflow: models.Workflow, upload_data: Dict, ) -> http.HttpResponse: """Perform the merge operation. :param request: Received request :param workflow: Workflow being processed :param upload_data: Dictionary with all the information about the merge. :return: HttpResponse """ # Get the dataframes to merge try: dst_df = pandas.load_table(workflow.get_data_frame_table_name()) src_df = pandas.load_table(workflow.get_upload_table_name()) except Exception: return render(request, 'error.html', {'message': _('Exception while loading data frame')}) try: pandas.perform_dataframe_upload_merge(workflow, dst_df, src_df, upload_data) except Exception as exc: # Nuke the temporary table sql.delete_table(workflow.get_upload_table_name()) col_info = workflow.get_column_info() workflow.log(request.user, models.Log.WORKFLOW_DATA_FAILEDMERGE, column_names=col_info[0], column_types=col_info[1], column_unique=col_info[2], error_message=str(exc)) messages.error(request, _('Merge operation failed. ') + str(exc)) return redirect(reverse('table:display')) col_info = workflow.get_column_info() workflow.log(request.user, upload_data['log_upload'], column_names=col_info[0], column_types=col_info[1], column_unique=col_info[2]) store_workflow_in_session(request.session, workflow) request.session.pop('upload_data', None) return redirect(reverse('table:display'))
def test_merge_inner(self): # Get the workflow self.workflow = models.Workflow.objects.all()[0] # Parse the source data frame df_dst, df_src = self.parse_data_frames() self.merge_info['how_merge'] = 'inner' result = pandas.perform_dataframe_upload_merge(self.workflow, df_dst, df_src, self.merge_info) # Load again the workflow data frame pandas.load_table(self.workflow.get_data_frame_table_name()) # Result must be correct (None) self.assertEquals(result, None)
def column_restrict_values( request: HttpRequest, pk: int, workflow: Optional[Workflow] = None, column: Optional[Column] = None, ) -> JsonResponse: """Restrict future values in this column to one of those already present. :param request: HTTP request :param pk: ID of the column to restrict. The workflow element is taken from the session. :return: Render the delete column form """ # If the columns is unique and it is the only one, we cannot allow # the operation if column.is_key: # This is the only key column messages.error(request, _('You cannot restrict a key column')) return JsonResponse({'html_redirect': reverse('workflow:detail')}) # Get the name of the column to delete context = {'pk': pk, 'cname': column.name} # Get the values from the data frame df = load_table(workflow.get_data_frame_table_name()) context['values'] = ', '.join(set(df[column.name])) if request.method == 'POST': # Proceed restricting the column error_txt = workflow_restrict_column(column) if isinstance(error_txt, str): # Something went wrong. Show it messages.error(request, error_txt) # Log the event Log.objects.register( request.user, Log.COLUMN_RESTRICT, workflow, { 'id': workflow.id, 'name': workflow.name, 'column_name': column.name, 'values': context['values']}) return JsonResponse({'html_redirect': reverse('workflow:detail')}) return JsonResponse({ 'html_form': render_to_string( 'workflow/includes/partial_column_restrict.html', context, request=request), })
def parse_data_frames(self): # Parse the two CSV strings and return as data frames if self.workflow: # Get the workflow data frame df_dst = load_table(self.workflow.get_data_frame_table_name()) else: df_dst = load_df_from_csvfile(io.StringIO(self.csv1), 0, 0) df_src = load_df_from_csvfile(io.StringIO(self.csv2), 0, 0) store_table(df_src, 'TEMPORARY_TABLE') df_src = load_table('TEMPORARY_TABLE') # Fix the merge_info fields. self.merge_info['initial_column_names'] = list(df_src.columns) self.merge_info['rename_column_names'] = list(df_src.columns) self.merge_info['columns_to_upload'] = list(df_src.columns) return df_dst, df_src
def check_wf_df(workflow: models.Workflow) -> bool: """Check consistency between Workflow info and the data frame. Check the consistency between the information stored in the workflow and the structure of the underlying dataframe :param workflow: Workflow object :return: Boolean stating the result of the check. True: Correct. """ # Get the df df = pandas.load_table(workflow.get_data_frame_table_name()) # Set values in case there is no df if df is not None: dfnrows = df.shape[0] dfncols = df.shape[1] df_col_names = list(df.columns) else: dfnrows = 0 dfncols = 0 df_col_names = [] # Check 1: Number of rows and columns assert workflow.nrows == dfnrows, 'Inconsistent number of rows' assert workflow.ncols == dfncols, 'Inconsistent number of columns' # Identical sets of columns wf_cols = workflow.columns.all() assert set(df_col_names) == {col.name for col in wf_cols }, ('Inconsistent set of columns') # Identical data types # for n1, n2 in zip(wf_cols, df_col_names): for col in wf_cols: df_dt = pandas.datatype_names.get(df[col.name].dtype.name) if col.data_type == 'boolean' and df_dt == 'string': # This is the case of a column with Boolean and Nulls continue assert col.data_type == df_dt, ('Inconsistent data type {0}'.format( col.name)) # Verify that the columns marked as unique are preserved for col in workflow.columns.filter(is_key=True): assert pandas.is_unique_column( df[col.name]), ('Column {0} should be unique.'.format(col.name)) # Columns are properly numbered cpos = workflow.columns.values_list('position', flat=True) rng = range(1, len(cpos) + 1) assert sorted(cpos) == list(rng) return True
def get( self, request: HttpRequest, wid: int, format=None, workflow: Optional[Workflow] = None, ) -> HttpResponse: """Retrieve the existing data frame.""" serializer = self.serializer_class( {'data_frame': load_table(workflow.get_data_frame_table_name())}) return Response(serializer.data)
def test_create_random_column_string_form(self): """Create a random string column""" # Get the workflow first self.workflow = models.Workflow.objects.all().first() # Column name and current number of them cname = 'random_column' ncols = self.workflow.ncols # JSON POST request for column creation with string value resp = self.get_response('workflow:random_column_add', method='POST', req_params={ 'name': cname, 'data_type': 'string', 'raw_categories': 'bogus', 'position': 0 }, is_ajax=True) resp_content = json.loads(resp.content) self.assertTrue('html_form' in resp_content) self.assertTrue(status.is_success(resp.status_code)) self.workflow.refresh_from_db() self.assertEqual(self.workflow.ncols, ncols) new_column = self.workflow.columns.filter(name=cname).first() self.assertIsNone(new_column) # JSON POST request for column creation with a string list resp = self.get_response('workflow:random_column_add', method='POST', req_params={ 'name': cname, 'data_type': 'string', 'raw_categories': 'one, two, three', 'position': 0 }, is_ajax=True) resp_content = json.loads(resp.content) self.assertTrue('html_form' not in resp_content) self.assertTrue(status.is_success(resp.status_code)) self.workflow.refresh_from_db() self.assertEqual(self.workflow.ncols, ncols + 1) new_column = self.workflow.columns.filter(name=cname).first() self.assertIsNotNone(new_column) self.assertEqual(new_column.name, cname) self.assertEqual(new_column.data_type, 'string') data_frame = pandas.load_table( self.workflow.get_data_frame_table_name()) self.assertTrue( all(element in ['one', 'two', 'three'] for element in data_frame[cname]))
def do_sql_txt_operand(self, input_value, op_value, type_value, value, row_yes=1, row_no=1): self.set_skel(input_value, op_value.format(''), type_value, value, 'v_' + type_value) data_frame = load_table(self.test_table, self.test_columns, self.skel) self.assertEqual(data_frame.shape[0], row_yes) evaluate_formula(self.skel, EVAL_TXT) if op_value.find('{0}') != -1: self.set_skel(input_value, op_value.format('not_'), type_value, value, 'v_' + type_value) data_frame = load_table(self.test_table, self.test_columns, self.skel) self.assertEqual(data_frame.shape[0], row_no) evaluate_formula(self.skel, EVAL_TXT)
def add_formula_column( user, workflow: models.Workflow, column: models.Column, operation: str, selected_columns: List[models.Column], ): """Add the formula column to the workflow. :param user: User making the request :param workflow: Workflow to add the column :param column: Column being added :param operation: string denoting the operation :param selected_columns: List of columns selected for the operation. :return: Column is added to the workflow """ # Save the column object attached to the form and add additional fields column.workflow = workflow column.is_key = False # Save the instance column.save() # Update the data frame df = pandas.load_table(workflow.get_data_frame_table_name()) # Add the column with the appropriate computation cnames = [col.name for col in selected_columns] df[column.name] = _op_distrib[operation](df[cnames]) # Populate the column type column.data_type = pandas.datatype_names.get( df[column.name].dtype.name) # Update the positions of the appropriate columns workflow.reposition_columns(workflow.ncols + 1, column.position) column.save() workflow.refresh_from_db() # Store the df to DB try: pandas.store_dataframe(df, workflow) except Exception as exc: raise services.OnTaskWorkflowAddColumn( message=_('Unable to add column: {0}').format(str(exc)), to_delete=[column]) workflow.ncols = workflow.columns.count() workflow.save() column.log(user, models.Log.COLUMN_ADD_FORMULA)
def test_column_clone(self): """Test adding a random column.""" column = self.workflow.columns.get(name='Q01') resp = self.get_response('workflow:column_clone', {'pk': column.id}, is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) # Create the new question resp = self.get_response('workflow:column_clone', {'pk': column.id}, method='POST', is_ajax=True) self.assertTrue(status.is_success(resp.status_code)) df = pandas.load_table(self.workflow.get_data_frame_table_name()) self.assertTrue(df['Copy of Q01'].equals(df['Q01']))
def post( self, request: http.HttpRequest, wid: int, format=None, workflow: Optional[models.Workflow] = None, ) -> http.HttpResponse: """Create a new data frame.""" if pandas.load_table(workflow.get_data_frame_table_name()) is not None: raise APIException( _('Post request requires workflow without a table')) return self.override(request, wid=wid, format=format, workflow=workflow)
def test_04_merge_right(self): self.template_merge('right', rename=False) # Assert the content of the dataframe wflow = models.Workflow.objects.get(name=self.wf_name) df = pandas.load_table(wflow.get_data_frame_table_name()) self.assertTrue('key' in set(df.columns)) self.assertTrue('text3' in set(df.columns)) self.assertTrue('double3' in set(df.columns)) self.assertTrue('bool3' in set(df.columns)) self.assertTrue('date3' in set(df.columns)) # End of session self.logout()
def test_table_pandas_get(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] # Get the data through the API response = self.client.get( reverse('table:api_pops', kwargs={'wid': workflow.id})) # Transform the response into a data frame r_df = serializers.string_to_df(response.data['data_frame']) # Load the df from the db dframe = pandas.load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, dframe)
def test_table_JSON_get(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] # Get the data through the API response = self.client.get( reverse('table:api_ops', kwargs={'wid': workflow.id})) # Transform the response into a data frame r_df = pd.DataFrame(response.data['data_frame']) r_df = pandas.detect_datetime_columns(r_df) # Load the df from the db dframe = pandas.load_table(workflow.get_data_frame_table_name()) # Compare both elements self.compare_tables(r_df, dframe)
def test_03_merge_left(self): self.template_merge('left') # Assert the content of the dataframe wflow = Workflow.objects.get(name=self.wf_name) df = load_table(wflow.get_data_frame_table_name()) self.assertTrue('key' in set(df.columns)) self.assertTrue('key2' in set(df.columns)) self.assertTrue('text3' in set(df.columns)) self.assertTrue('double3' in set(df.columns)) self.assertTrue('bool3' in set(df.columns)) self.assertTrue('date3' in set(df.columns)) assert check_wf_df(Workflow.objects.get(name='Testing Merge')) # End of session self.logout()
def test_table_pandas_merge_get(self): # Get the only workflow in the fixture workflow = models.Workflow.objects.all()[0] # Get the data through the API response = self.client.get( reverse('table:api_pmerge', kwargs={'wid': workflow.id})) workflow = models.Workflow.objects.all()[0] # Transform new table into string r_df = serializers.string_to_df(response.data['src_df']) # Load the df from the db dframe = pandas.load_table(workflow.get_data_frame_table_name()) # Compare both elements and check wf df consistency self.compare_tables(r_df, dframe)
def put( self, request: http.HttpRequest, wid: int, format=None, workflow: Optional[models.Workflow] = None, ) -> http.HttpResponse: """Process the put request.""" del wid, format # Get the dst_df dst_df = pandas.load_table(workflow.get_data_frame_table_name()) serializer = self.serializer_class(data=request.data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) src_df = serializer.validated_data['src_df'] how = serializer.validated_data['how'] left_on = serializer.validated_data['left_on'] right_on = serializer.validated_data['right_on'] error = pandas.validate_merge_parameters(dst_df, src_df, how, left_on, right_on) if error: raise APIException(error) # Ready to perform the MERGE try: pandas.perform_dataframe_upload_merge( workflow, dst_df, src_df, { 'how_merge': how, 'dst_selected_key': left_on, 'src_selected_key': right_on, 'initial_column_names': list(src_df.columns), 'rename_column_names': list(src_df.columns), 'columns_to_upload': [True] * len(list(src_df.columns)) }) except Exception as exc: raise APIException( _('Unable to perform merge operation: {0}').format(str(exc))) # Merge went through. return Response(serializer.data, status=status.HTTP_201_CREATED)