Exemplo n.º 1
0
    def put(self, request, pk, format=None):
        # Try to retrieve the wflow to check for permissions
        self.get_object(pk)
        # Get the dst_df
        dst_df = pandas_db.load_from_db(pk)

        serializer = self.serializer_class(data=request.data)
        if not serializer.is_valid():
            return Response(serializer.errors,
                            status=status.HTTP_400_BAD_REQUEST)

        # Check that the parameters are correct
        how = serializer.validated_data['how']
        if how == '' or how not in ['left', 'right', 'outer', 'inner']:
            raise APIException(
                _('how must be one of left, right, outer '
                  'or inner'))

        left_on = serializer.validated_data['left_on']
        if not ops.is_unique_column(dst_df[left_on]):
            raise APIException(
                _('column {0} does not contain a unique '
                  'key.').format(left_on))

        # Operation has been accepted by the serializer
        src_df = serializer.validated_data['src_df']

        right_on = serializer.validated_data['right_on']
        if right_on not in list(src_df.columns):
            raise APIException(
                _('column {0} not found in data frame').format(right_on))

        if not ops.is_unique_column(src_df[right_on]):
            raise APIException(
                _('column {0} does not contain a unique key.').format(
                    right_on))

        merge_info = {
            'how_merge': how,
            'dst_selected_key': left_on,
            'src_selected_key': right_on,
            'initial_column_names': list(src_df.columns),
            'rename_column_names': list(src_df.columns),
            'columns_to_upload': [True] * len(list(src_df.columns)),
        }

        # Ready to perform the MERGE
        try:
            merge_result = ops.perform_dataframe_upload_merge(
                pk, dst_df, src_df, merge_info)
        except Exception:
            raise APIException(_('Unable to perform merge operation'))

        if merge_result:
            # Something went wrong, raise the exception
            raise APIException(merge_result)

        # Merge went through.
        return Response(serializer.data, status=status.HTTP_201_CREATED)
Exemplo n.º 2
0
    def clean(self):

        data = super(ColumnRenameForm, self).clean()

        # Check if there has been a change in the 'is_key' status
        if 'is_key' in self.changed_data:
            # Case 1: True -> False If it is the only one, cannot be
            # allowed
            column_unique = self.instance.workflow.get_column_unique()
            if self.instance.is_key and \
                    len([x for x in column_unique if x]) == 1:
                self.add_error(
                    'is_key',
                    'There must be at least one column with unique values')
                return data

            # Case 2: False -> True Unique values must be verified
            if not self.instance.is_key and \
                    not ops.is_unique_column(self.data_frame[
                                                 self.instance.name]):
                self.add_error(
                    'is_key',
                    'The column does not have unique values for each row.')
                return data

        return data
Exemplo n.º 3
0
    def clean(self):

        data = super(ColumnRenameForm, self).clean()

        # Check if there has been a change in the 'is_key' status
        if 'is_key' in self.changed_data:
            # Case 1: True -> False If it is the only one, cannot be
            # allowed
            column_unique = self.instance.workflow.get_column_unique()
            if self.instance.is_key and \
                    len([x for x in column_unique if x]) == 1:
                self.add_error(
                    'is_key',
                    _('There must be at least one column with unique values'))
                return data

            # Case 2: False -> True Unique values must be verified
            if not self.instance.is_key and \
                    not ops.is_unique_column(self.data_frame[
                                                 self.instance.name]):
                self.add_error(
                    'is_key',
                    _('The column does not have unique values for each row.'))
                return data

        # Check and force a correct column index
        ncols = Column.objects.filter(workflow__id=self.workflow.id).count()
        if data['position'] < 1 or data['position'] > ncols:
            data['position'] = ncols

        return data
Exemplo n.º 4
0
def row_create(request):
    """
    Receives a POST request to create a new row in the data table
    :param request: Request object with all the data.
    :return:
    """

    # If there is no workflow object, go back to the index
    workflow = get_workflow(request)
    if not workflow:
        return redirect('workflow:index')

    # If the workflow has no data, the operation should not be allowed
    if workflow.nrows == 0:
        return redirect('dataops:uploadmerge')

    # Create the form
    form = RowForm(request.POST or None, workflow=workflow)

    if request.method == 'GET' or not form.is_valid():
        return render(
            request, 'dataops/row_create.html', {
                'workflow': workflow,
                'form': form,
                'cancel_url': reverse('table:display')
            })

    # Create the query to update the row
    columns = workflow.get_columns()
    column_names = [c.name for c in columns]
    field_name = field_prefix + '%s'
    row_vals = [
        form.cleaned_data[field_name % idx] for idx in range(len(columns))
    ]

    # Load the existing df from the db
    df = pandas_db.load_from_db(workflow.id)

    # Perform the row addition in the DF first
    # df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))
    # df.append(df2, ignore_index=True)
    new_row = pd.DataFrame([row_vals], columns=column_names)
    df = df.append(new_row, ignore_index=True)

    # Verify that the unique columns remain unique
    for ucol in [c for c in columns if c.is_key]:
        if not ops.is_unique_column(df[ucol.name]):
            form.add_error(
                None, 'Repeated value in column ' + ucol.name + '.' +
                ' It must be different to maintain Key property')
            return render(
                request, 'dataops/row_create.html', {
                    'workflow': workflow,
                    'form': form,
                    'cancel_url': reverse('table:display')
                })

    # Restore the dataframe to the DB
    ops.store_dataframe_in_db(df, workflow.id)

    # Recompute all the values of the conditions in each of the actions
    for act in workflow.actions.all():
        act.update_n_rows_selected()

    # Log the event
    log_payload = zip(column_names, [str(x) for x in row_vals])
    logs.ops.put(request.user, 'tablerow_create', workflow, {
        'id': workflow.id,
        'name': workflow.name,
        'new_values': log_payload
    })

    # Done. Back to the table view
    return redirect('table:display')
Exemplo n.º 5
0
    def put(self, request, pk, format=None):
        # Try to retrieve the wflow to check for permissions
        self.get_object(pk, user=self.request.user)
        # Get the dst_df
        dst_df = pandas_db.load_from_db(pk)

        serializer = self.serializer_class(data=request.data)
        if not serializer.is_valid():
            return Response(serializer.errors,
                            status=status.HTTP_400_BAD_REQUEST)

        # Check that the parameters are correct
        how = serializer.validated_data['how']
        if how == '' or how not in ['left', 'right', 'outer', 'inner']:
            raise APIException('how must be one of left, right, outer '
                               'or inner')

        left_on = serializer.validated_data['left_on']
        if not ops.is_unique_column(dst_df[left_on]):
            raise APIException('column' + left_on +
                               'does not contain a unique key.')

        # Operation has been accepted by the serializer
        src_df = serializer.validated_data['src_df']

        right_on = serializer.validated_data['right_on']
        if right_on not in list(src_df.columns):
            raise APIException('column ' + right_on +
                               ' not found in data frame')

        if not ops.is_unique_column(src_df[right_on]):
            raise APIException('column' + right_on +
                               'does not contain a unique key.')

        dup_column = serializer.validated_data['dup_column']
        if dup_column == '' or dup_column not in ['override', 'rename']:
            raise APIException('dup_column must be override or rename')

        override_columns_names = []
        autorename_column_names = None
        if dup_column == 'override':
            # List of columns to drop (the ones in both data sets
            override_columns_names = list((set(dst_df.columns)
                                           & set(src_df.columns)) - {left_on})
        else:
            autorename_column_names = []
            for colname in list(src_df.columns):
                # If the column is the key, insert as is
                if colname == right_on:
                    autorename_column_names.append(colname)
                    continue

                # If the column does not collide, insert as is
                if colname not in dst_df.columns:
                    autorename_column_names.append(colname)
                    continue

                # Column name collides with existing column
                i = 0  # Suffix to rename
                while True:
                    i += 1
                    new_name = colname + '_{0}'.format(i)
                    if new_name not in dst_df.columns:
                        break
                autorename_column_names.append(new_name)

        merge_info = {
            'how_merge': how,
            'dst_selected_key': left_on,
            'src_selected_key': right_on,
            'initial_column_names': list(src_df.columns),
            'autorename_column_names': autorename_column_names,
            'rename_column_names': list(src_df.columns),
            'columns_to_upload': [True] * len(list(src_df.columns)),
            'override_columns_names': override_columns_names
        }

        # Ready to perform the MERGE
        try:
            merge_result = ops.perform_dataframe_upload_merge(
                pk, dst_df, src_df, merge_info)
        except Exception:
            raise APIException('Unable to perform merge operation')

        if merge_result:
            # Something went wrong, raise the exception
            raise APIException(merge_result)

        # Merge went through.
        return Response(serializer.data, status=status.HTTP_201_CREATED)