Exemplo n.º 1
0
    def get(self, study_id, assay_file_name):
        # param validation
        if study_id is None or assay_file_name is None:
            logger.info('No study_id and/or assay file name given')
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('Assay Table: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)
        logger.info('Trying to load Assay (%s) for Study %s', assay_file_name, study_id)
        # Get the Assay table or create a new one if it does not already exist
        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        # Get rid of empty numerical values
        assay_df = assay_df.replace(np.nan, '', regex=True)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 2
0
    def get(self, study_id):
        log_request(request)
        # param validation
        if study_id is None:
            abort(404)

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]
        else:
            # user token is required
            abort(401)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        isa_study, isa_inv, std_path = iac.get_isa_study(
            study_id,
            user_token,
            skip_load_tables=True,
            study_location=study_location)

        samples = read_characteristics_from_sample_sheet(
            study_location, isa_study)
        return totuples(samples, 'organisms')
Exemplo n.º 3
0
    def get_dataframe(self):
        """
        Yield an individual dataframe-as-a-dict. This is a generator method, with the idea being that with such massive files
        we want to limit how many dataframes we are holding in memory at once. We convert the dataframe to a dict in this method,
        and then yield it. This means we only have one dataframe open in memory at a time.

        The method also sorts through each of the maf files found in the study directory, attempting to cast off any
        that might correspond to other analytical methods.
        """
        for i, study_id in enumerate(self.studies_to_combine):
            copy = repr(self.original_study_location).strip("'")
            study_location = copy.replace("MTBLS1", study_id)

            for maf in self.sort_mafs(study_location, study_id):
                maf_temp = None
                try:
                    maf_temp = pandas.read_csv(os.path.join(study_location, maf), sep="\t", header=0, encoding='unicode_escape')
                except pandas.errors.EmptyDataError as e:
                    logger.error(f'EmptyDataError Issue with opening maf file {maf}: {str(e)}')
                    self.unopenable_maf_register.append(maf)
                    continue
                except Exception as e:
                    logger.error(f'Issue with opening maf file {maf}, cause of error unclear: {str(e)}')
                    self.unopenable_maf_register.append(maf)
                    continue

                cleanup_function = getattr(DataFrameUtils, f'{self.method}_maf_cleanup')
                maf_temp = cleanup_function(maf_temp, study_id, maf)
                maf_as_dict = totuples(df=maf_temp, text='dict')['dict']

                yield maf_as_dict
Exemplo n.º 4
0
    def post(self, study_id, file_name):

        parser = reqparse.RequestParser()
        parser.add_argument('new_column_name', help="Name of new column")
        new_column_name = None
        parser.add_argument('new_column_position', help="The position (column #) of new column")
        new_column_position = None
        parser.add_argument('new_column_default_value', help="The (optional) default value of new column")
        new_column_default_value = None

        if request.args:
            args = parser.parse_args(req=request)
            new_column_name = args['new_column_name']
            new_column_position = args['new_column_position']
            new_column_default_value = args['new_column_default_value']

        if new_column_name is None:
            abort(404, "Please provide valid name for the new column")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        #  Need to add values for each existing row (not header)
        new_col = []
        for row_val in range(table_df.shape[0]):
            new_col.append(new_column_default_value)

        # Add new column to the spreadsheet
        table_df.insert(loc=int(new_column_position), column=new_column_name, value=new_col, allow_duplicates=True)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'data': df_data_dict, 'message': message}
Exemplo n.º 5
0
    def put(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            columns_rows = data_dict['data']
        except KeyError:
            columns_rows = None

        if columns_rows is None:
            abort(404, "Please provide valid key-value pairs for the cell value."
                       "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and/or file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for column in columns_rows:
            cell_value = column['value']
            row_index = column['row']
            column_index = column['column']
            #  Need to add values for column and row (not header)
            try:
                #for row_val in range(table_df.shape[0]):
                table_df.iloc[int(row_index), int(column_index)] = cell_value
            except ValueError:
                abort(417, "Unable to find the required 'value', 'row' and 'column' values")

        # Write the new row back in the file
        message = write_tsv(table_df, file_name)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}
Exemplo n.º 6
0
    def delete(self, study_id, assay_file_name):

        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument('row_num', help="The row number of the cell(s) to remove (exclude header)", location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or assay_file_name is None or row_num is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN
        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            assay_df = assay_df.drop(assay_df.index[num])  # Drop row(s) in the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True)

        # Write the updated file
        assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False)

        # To be sure we read the file again
        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 7
0
    def post(self, study_id, assay_file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_row = data_dict['data']
        except KeyError:
            new_row = None

        if new_row is None:
            abort(404, "Please provide valid data for updated new row(s). "
                       "The JSON string has to have a 'assayData' element")

        try:
            for element in new_row:
                element.pop('index', None)  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or assay_file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and assay file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN
        assay_df = assay_df.append(new_row, ignore_index=True)  # Add new row to the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True)

        # Write the new row back in the file
        assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 8
0
    def delete(self, study_id, file_name):
        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument('row_num', help="The row number of the cell(s) to remove (exclude header)", location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or file_name is None or row_num is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            file_df = file_df.drop(file_df.index[num])  # Drop row(s) in the spreadsheet

        message = write_tsv(file_df, file_name)

        # To be sure we read the file again
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}
Exemplo n.º 9
0
    def get(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            logger.info('No study_id and/or TSV file name given')
            abort(404)

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()
        file_name_param = file_name  # store the passed filename for simplicity

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('Assay Table: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        logger.info('Trying to load TSV file (%s) for Study %s', file_name,
                    study_id)
        # Get the Assay table or create a new one if it does not already exist
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df, study_id, file_name_param)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 10
0
    def get(self, study_id, sample_file_name):
        # param validation
        if study_id is None:
            logger.info('No study_id given')
            abort(404)

        if sample_file_name is None:
            logger.info('No sample file name given')
            abort(404)

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info(
            'sample Table: Getting ISA-JSON Study %s, using API-Key %s',
            study_id, user_token)

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \
            wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        sample_file_name = os.path.join(study_location, sample_file_name)
        logger.info('Trying to load sample (%s) for Study %s',
                    sample_file_name, study_id)
        # Get the sample table or create a new one if it does not already exist
        sample_df = pd.read_csv(sample_file_name,
                                sep="\t",
                                header=0,
                                encoding='utf-8')
        # Get rid of empty numerical values
        sample_df = sample_df.replace(np.nan, '', regex=True)

        df_data_dict = totuples(sample_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(sample_df)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 11
0
    def get(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            logger.info('No study_id and/or TSV file name given')
            abort(404)
        study_id = study_id.upper()
        file_name_param = file_name  # store the passed filename for simplicity

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        logger.info('Assay Table: Getting ISA-JSON Study %s', study_id)
        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not read_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        logger.info('Trying to load TSV file (%s) for Study %s', file_name, study_id)
        # Get the Assay table or create a new one if it does not already exist
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df, study_id, file_name_param)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 12
0
    def post(self, study_id, file_name):

        parser = reqparse.RequestParser()
        parser.add_argument('new_column_name', help="Name of new column")
        new_column_name = None
        parser.add_argument('new_column_position',
                            help="The position (column #) of new column")
        new_column_position = None
        parser.add_argument('new_column_default_value',
                            help="The (optional) default value of new column")
        new_column_default_value = None

        if request.args:
            args = parser.parse_args(req=request)
            new_column_name = args['new_column_name']
            new_column_position = args['new_column_position']
            new_column_default_value = args['new_column_default_value']

        if new_column_name is None:
            abort(404, "Please provide valid name for the new column")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and file name'
            )
        study_id = study_id.upper()

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        audit_status, dest_path = write_audit_files(study_location)

        #  Need to add values for each existing row (not header)
        new_col = []
        for row_val in range(table_df.shape[0]):
            new_col.append(new_column_default_value)

        # Add new column to the spreadsheet
        table_df.insert(loc=int(new_column_position),
                        column=new_column_name,
                        value=new_col,
                        allow_duplicates=True)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'data': df_data_dict, 'message': message}
Exemplo n.º 13
0
    def delete(self, study_id, file_name):
        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument(
            'row_num',
            help="The row number of the cell(s) to remove (exclude header)",
            location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or file_name is None or row_num is None:
            abort(404)

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            file_df = file_df.drop(
                file_df.index[num])  # Drop row(s) in the spreadsheet

        message = write_tsv(file_df, file_name)

        # To be sure we read the file again
        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}
Exemplo n.º 14
0
    def post(self, study_id, file_name):
        log_request(request)
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_row = data_dict['data']
        except KeyError:
            new_row = None

        if new_row is None:
            abort(417, "Please provide valid data for updated new row(s). The JSON string has to have a 'data' element")

        try:
            for element in new_row:
                element.pop('index', None)  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and TSV file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file name was not found")

        # Validate column names in new rows
        valid_column_name, message = validate_row(file_df, new_row, "post")
        if not valid_column_name:
            abort(417, message)

        if new_row[0]:
            file_df = file_df.append(new_row, ignore_index=True)  # Add new row to the spreadsheet (TSV file)
        else:
            file_df = file_df.append(pd.Series(), ignore_index=True)

        message = write_tsv(file_df, file_name)

        # Get an indexed header row
        df_header = get_table_header(file_df)

        # Get the updated data table
        try:
            df_data_dict = totuples(read_tsv(file_name), 'rows')
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        return {'header': df_header, 'data': df_data_dict, 'message': message}
Exemplo n.º 15
0
    def delete(self, study_id, assay_file_name):

        # query validation
        parser = reqparse.RequestParser()
        parser.add_argument(
            'row_num',
            help="The row number of the cell(s) to remove (exclude header)",
            location="args")
        args = parser.parse_args()
        row_num = args['row_num']

        # param validation
        if study_id is None or assay_file_name is None or row_num is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN
        row_nums = row_num.split(",")

        # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed
        sorted_num_rows = [int(x) for x in row_nums]
        sorted_num_rows.sort(reverse=True)
        for num in sorted_num_rows:
            assay_df = assay_df.drop(
                assay_df.index[num])  # Drop row(s) in the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x),
                        inplace=True)

        # Write the updated file
        assay_df.to_csv(assay_file_name,
                        sep="\t",
                        encoding='utf-8',
                        index=False)

        # To be sure we read the file again
        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 16
0
    def post(self, study_id, file_name):
        log_request(request)
        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            data = data_dict['data']
            new_row = data['rows']
        except KeyError:
            new_row = None
            data = None

        if new_row is None:
            abort(
                417,
                "Please provide valid data for updated new row(s). The JSON string has to have a 'rows' element"
            )

        try:
            for element in new_row:
                element.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and TSV file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        if file_name == 'metabolights_zooma.tsv':  # This will edit the MetaboLights Zooma mapping file
            if not is_curator:
                abort(403)
            file_name = app.config.get('MTBLS_ZOOMA_FILE')
        else:
            file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file name was not found")

        # Validate column names in new rows
        valid_column_name, message = validate_row(file_df, new_row, "post")
        if not valid_column_name:
            abort(417, message)

        if data:
            try:
                start_index = data['index']
                if start_index == -1:
                    start_index = 0
                start_index = start_index - 0.5

            except KeyError:
                start_index = len(file_df.index)

            # Map the complete row first, update with new_row
            complete_row = {}
            for col in file_df.columns:
                complete_row[col] = ""

            if not new_row:
                logger.warning(
                    "No new row information provided. Adding empty row " +
                    file_name + ", row " + str(complete_row))
            else:
                for row in new_row:
                    complete_row.update(row)
                    row = complete_row
                    line = pd.DataFrame(row, index=[start_index])
                    file_df = file_df.append(line, ignore_index=False)
                    file_df = file_df.sort_index().reset_index(drop=True)
                    start_index += 1

            file_df = file_df.replace(np.nan, '', regex=True)
            message = write_tsv(file_df, file_name)

        # Get an indexed header row
        df_header = get_table_header(file_df)

        # Get the updated data table
        try:
            df_data_dict = totuples(read_tsv(file_name), 'rows')
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        return {'header': df_header, 'data': df_data_dict, 'message': message}
Exemplo n.º 17
0
    def put(self, study_id, assay_file_name):
        parser = reqparse.RequestParser()
        parser.add_argument(
            'row_num',
            help="The row number of the cell to update (exclude header)")
        parser.add_argument('column_name',
                            help="The column name of the cell to update")
        parser.add_argument('cell_value',
                            help="The column name of the cell to update")
        row_num = None
        column_name = None
        cell_value = None
        if request.args:
            args = parser.parse_args(req=request)
            row_num = args['row_num']
            column_name = args['column_name']
            cell_value = args['cell_value']

        # param validation
        if study_id is None or assay_file_name is None or row_num is None or column_name is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)

        try:
            row = int(row_num)
            column = assay_df.columns.get_loc(column_name)
            assay_df.iloc[row, column] = cell_value
        except Exception:
            logger.warning('Could not find row (' + row_num +
                           '( and/or column (' + column_name +
                           ') in the table')

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x),
                        inplace=True)

        # Write the new empty columns back in the file
        assay_df.to_csv(assay_file_name,
                        sep="\t",
                        encoding='utf-8',
                        index=False)

        # Convert panda DataFrame (single row) to json tuples object
        def totuples(df, rown):
            d = [
                dict([(colname, row[rown])
                      for rown, colname in enumerate(df.columns)])
                for row in df.values
            ]
            return {'assaydata': d}

        df_dict = totuples(assay_df.reset_index(), row)

        return df_dict
Exemplo n.º 18
0
    def post(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_columns = data_dict['data']
        except KeyError:
            new_columns = None

        if new_columns is None:
            abort(417, "Please provide valid key-value pairs for the new columns."
                       "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and/or file name')
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        # Get an indexed header row
        df_header = get_table_header(table_df)

        for column in new_columns:
            new_column_default_value = column['value']
            new_column_name = column['name']
            new_column_position = column['index']

            #  Need to add values for each existing row (not header)
            new_col = []
            for row_val in range(table_df.shape[0]):
                new_col.append(new_column_default_value)

            # Check if we already have the column in the current position
            try:
                header_name = table_df.iloc[:, new_column_position].name
            except:
                header_name = ""

            if header_name == new_column_name:  # We should update the existing column
                table_df.iloc[:, new_column_position] = new_col
            else:
                # Add new column to the spreadsheet
                table_df.insert(loc=int(new_column_position), column=new_column_name,
                                value=new_col, allow_duplicates=True)

        # Get an (updated) indexed header row
        df_header = get_table_header(table_df)

        # Get all indexed rows
        df_data_dict = totuples(table_df.reset_index(), 'rows')

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}
Exemplo n.º 19
0
    def put(self, study_id, assay_file_name):

        # param validation
        if study_id is None or assay_file_name is None:
            abort(404, 'Please provide valid parameters for study identifier and assay file name')
        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict['data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(404, "Please provide valid data for updated new row(s). "
                       "The JSON string has to have a 'assayData' element")

        for row in new_rows:
            try:
                row_index = row['index']  # Check if we have a value in the row number(s)
            except KeyError:
                row_index = None

            if new_rows is None or row_index is None:
                abort(404, "Please provide valid data for the updated row(s). "
                           "The JSON string has to have an 'index:n' element in each (JSON) row, "
                           "this is the original row number. The header row can not be updated")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN

        row_index_int = None

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            if row_index_int is not None:
                assay_df = assay_df.drop(assay_df.index[row_index_int])  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop('index', None)  # Remove "index:n" element from the (JSON) row, this is the original row number
                assay_df = insert_row(row_index_int, assay_df, row)  # Update the row in the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True)

        # Write the new row back in the file
        assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 20
0
    def put(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            abort(406, 'Please provide valid parameters for study identifier and TSV file name')
        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict['data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(404, "Please provide valid data for updated new row(s). "
                       "The JSON string has to have a 'data' element")

        for row in new_rows:
            try:
                row_index = row['index']  # Check if we have a value in the row number(s)
            except (KeyError, Exception):
                row_index = None

            if new_rows is None or row_index is None:
                abort(404, "Please provide valid data for the updated row(s). "
                           "The JSON string has to have an 'index:n' element in each (JSON) row. "
                           "The header row can not be updated")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            # Validate column names in new rows
            valid_column_name, message = validate_row(file_df, row, 'put')
            if not valid_column_name:
                abort(417, message)

            if row_index_int is not None:
                file_df = file_df.drop(file_df.index[row_index_int])  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop('index', None)  # Remove "index:n" element, this is the original row number
                file_df = insert_row(row_index_int, file_df, row)  # Update the row in the spreadsheet

        message = write_tsv(file_df, file_name)

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}
Exemplo n.º 21
0
    def put(self, study_id, assay_file_name):
        parser = reqparse.RequestParser()
        parser.add_argument('row_num', help="The row number of the cell to update (exclude header)")
        parser.add_argument('column_name', help="The column name of the cell to update")
        parser.add_argument('cell_value', help="The column name of the cell to update")
        row_num = None
        column_name = None
        cell_value = None
        if request.args:
            args = parser.parse_args(req=request)
            row_num = args['row_num']
            column_name = args['column_name']
            cell_value = args['cell_value']

        # param validation
        if study_id is None or assay_file_name is None or row_num is None or column_name is None:
            abort(404)
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)

        try:
            row = int(row_num)
            column = assay_df.columns.get_loc(column_name)
            assay_df.iloc[row, column] = cell_value
        except Exception:
            logger.warning('Could not find row (' + row_num + '( and/or column (' + column_name + ') in the table')

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True)

        # Write the new empty columns back in the file
        assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False)

        # Convert panda DataFrame (single row) to json tuples object
        def totuples(df, rown):
            d = [
                dict([
                    (colname, row[rown])
                    for rown, colname in enumerate(df.columns)
                ])
                for row in df.values
            ]
            return {'assaydata': d}

        df_dict = totuples(assay_df.reset_index(), row)

        return df_dict
Exemplo n.º 22
0
    def post(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_columns = data_dict['data']
        except KeyError:
            new_columns = None

        if new_columns is None:
            abort(
                417,
                "Please provide valid key-value pairs for the new columns."
                "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and/or file name'
            )
        study_id = study_id.upper()

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        audit_status, dest_path = write_audit_files(study_location)

        # Get an indexed header row
        df_header = get_table_header(table_df)

        for column in new_columns:
            new_column_default_value = column['value']
            new_column_name = column['name']
            new_column_position = column['index']

            #  Need to add values for each existing row (not header)
            new_col = []
            for row_val in range(table_df.shape[0]):
                new_col.append(new_column_default_value)

            # Check if we already have the column in the current position
            try:
                header_name = table_df.iloc[:, new_column_position].name
            except:
                header_name = ""

            if header_name == new_column_name:  # We should update the existing column
                table_df.iloc[:, new_column_position] = new_col
            else:
                # Add new column to the spreadsheet
                table_df.insert(loc=int(new_column_position),
                                column=new_column_name,
                                value=new_col,
                                allow_duplicates=True)

        # Get an (updated) indexed header row
        df_header = get_table_header(table_df)

        # Get all indexed rows
        df_data_dict = totuples(table_df.reset_index(), 'rows')

        message = write_tsv(table_df, file_name)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}
Exemplo n.º 23
0
    def put(self, study_id, file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            columns_rows = data_dict['data']
        except KeyError:
            columns_rows = None

        if columns_rows is None:
            abort(
                404, "Please provide valid key-value pairs for the cell value."
                "The JSON string has to have a 'data' element")

        # param validation
        if study_id is None or file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and/or file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)
        try:
            table_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for column in columns_rows:
            cell_value = column['value']
            row_index = column['row']
            column_index = column['column']
            #  Need to add values for column and row (not header)
            try:
                # for row_val in range(table_df.shape[0]):
                table_df.iloc[int(row_index), int(column_index)] = cell_value
            except ValueError as e:
                logger.error(
                    "(ValueError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column + ". " + str(e))
                abort(
                    417,
                    "(ValueError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column)
            except IndexError:
                logger.error(
                    "(IndexError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column + ". " + str(e))
                abort(
                    417,
                    "(IndexError) Unable to find the required 'value', 'row' and 'column' values. Value: "
                    + cell_value + ", row: " + row_index + ", column: " +
                    column)

        # Write the new row back in the file
        message = write_tsv(table_df, file_name)

        df_data_dict = totuples(table_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(table_df)

        return {'header': df_header, 'rows': df_data_dict, 'message': message}
Exemplo n.º 24
0
    def post(self, study_id, assay_file_name):

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_row = data_dict['data']
        except KeyError:
            new_row = None

        if new_row is None:
            abort(
                404, "Please provide valid data for updated new row(s). "
                "The JSON string has to have a 'assayData' element")

        try:
            for element in new_row:
                element.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
        except:
            logger.info('No index (row num) supplied, ignoring')

        # param validation
        if study_id is None or assay_file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and assay file name'
            )
        study_id = study_id.upper()

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN
        assay_df = assay_df.append(
            new_row, ignore_index=True)  # Add new row to the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x),
                        inplace=True)

        # Write the new row back in the file
        assay_df.to_csv(assay_file_name,
                        sep="\t",
                        encoding='utf-8',
                        index=False)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}
Exemplo n.º 25
0
    def put(self, study_id, file_name):
        # param validation
        if study_id is None or file_name is None:
            abort(
                406,
                'Please provide valid parameters for study identifier and TSV file name'
            )

        fname, ext = os.path.splitext(file_name)
        ext = ext.lower()
        if ext not in ('.tsv', '.csv', '.txt'):
            abort(400,
                  "The file " + file_name + " is not a valid TSV or CSV file")

        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict[
                'data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(
                404, "Please provide valid data for updated new row(s). "
                "The JSON string has to have a 'data' element")

        for row in new_rows:
            try:
                row_index = row[
                    'index']  # Check if we have a value in the row number(s)
            except (KeyError, Exception):
                row_index = None

            if new_rows is None or row_index is None:
                abort(
                    404, "Please provide valid data for the updated row(s). "
                    "The JSON string has to have an 'index:n' element in each (JSON) row. "
                    "The header row can not be updated")

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
        study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        file_name = os.path.join(study_location, file_name)

        try:
            file_df = read_tsv(file_name)
        except FileNotFoundError:
            abort(400, "The file " + file_name + " was not found")

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            # Validate column names in new rows
            valid_column_name, message = validate_row(file_df, row, 'put')
            if not valid_column_name:
                abort(417, message)

            if row_index_int is not None:
                file_df = file_df.drop(
                    file_df.index[row_index_int]
                )  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop(
                    'index', None
                )  # Remove "index:n" element, this is the original row number
                file_df = insert_row(row_index_int, file_df,
                                     row)  # Update the row in the spreadsheet

        message = write_tsv(file_df, file_name)

        df_data_dict = totuples(file_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(file_df)

        return {'header': df_header, 'data': df_data_dict, 'message': message}
Exemplo n.º 26
0
    def put(self, study_id, assay_file_name):

        # param validation
        if study_id is None or assay_file_name is None:
            abort(
                404,
                'Please provide valid parameters for study identifier and assay file name'
            )
        study_id = study_id.upper()

        try:
            data_dict = json.loads(request.data.decode('utf-8'))
            new_rows = data_dict[
                'data']  # Use "index:n" element, this is the original row number
        except KeyError:
            new_rows = None

        if new_rows is None:
            abort(
                404, "Please provide valid data for updated new row(s). "
                "The JSON string has to have a 'assayData' element")

        for row in new_rows:
            try:
                row_index = row[
                    'index']  # Check if we have a value in the row number(s)
            except KeyError:
                row_index = None

            if new_rows is None or row_index is None:
                abort(
                    404, "Please provide valid data for the updated row(s). "
                    "The JSON string has to have an 'index:n' element in each (JSON) row, "
                    "this is the original row number. The header row can not be updated"
                )

        # User authentication
        user_token = None
        if "user_token" in request.headers:
            user_token = request.headers["user_token"]

        # check for access rights
        is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \
            study_status = wsc.get_permissions(study_id, user_token)
        if not write_access:
            abort(403)

        assay_file_name = os.path.join(study_location, assay_file_name)

        assay_df = pd.read_csv(assay_file_name,
                               sep="\t",
                               header=0,
                               encoding='utf-8')
        assay_df = assay_df.replace(np.nan, '', regex=True)  # Remove NaN

        row_index_int = None

        for row in new_rows:
            try:
                row_index_int = int(row['index'])
            except:
                row_index_int is None

            if row_index_int is not None:
                assay_df = assay_df.drop(
                    assay_df.index[row_index_int]
                )  # Remove the old row from the spreadsheet
                # pop the "index:n" from the new_row before updating
                row.pop(
                    'index', None
                )  # Remove "index:n" element from the (JSON) row, this is the original row number
                assay_df = insert_row(row_index_int, assay_df,
                                      row)  # Update the row in the spreadsheet

        # Remove all ".n" numbers at the end of duplicated column names
        assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x),
                        inplace=True)

        # Write the new row back in the file
        assay_df.to_csv(assay_file_name,
                        sep="\t",
                        encoding='utf-8',
                        index=False)

        df_data_dict = totuples(assay_df.reset_index(), 'rows')

        # Get an indexed header row
        df_header = get_table_header(assay_df)

        return {'header': df_header, 'data': df_data_dict}