def get(self, study_id, assay_file_name): # param validation if study_id is None or assay_file_name is None: logger.info('No study_id and/or assay file name given') abort(404) study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] logger.info('Assay Table: Getting ISA-JSON Study %s', study_id) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not read_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) logger.info('Trying to load Assay (%s) for Study %s', assay_file_name, study_id) # Get the Assay table or create a new one if it does not already exist assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') # Get rid of empty numerical values assay_df = assay_df.replace(np.nan, '', regex=True) df_data_dict = totuples(assay_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(assay_df) return {'header': df_header, 'data': df_data_dict}
def get(self, study_id): log_request(request) # param validation if study_id is None: abort(404) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] else: # user token is required abort(401) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions(study_id, user_token) if not read_access: abort(403) isa_study, isa_inv, std_path = iac.get_isa_study( study_id, user_token, skip_load_tables=True, study_location=study_location) samples = read_characteristics_from_sample_sheet( study_location, isa_study) return totuples(samples, 'organisms')
def get_dataframe(self): """ Yield an individual dataframe-as-a-dict. This is a generator method, with the idea being that with such massive files we want to limit how many dataframes we are holding in memory at once. We convert the dataframe to a dict in this method, and then yield it. This means we only have one dataframe open in memory at a time. The method also sorts through each of the maf files found in the study directory, attempting to cast off any that might correspond to other analytical methods. """ for i, study_id in enumerate(self.studies_to_combine): copy = repr(self.original_study_location).strip("'") study_location = copy.replace("MTBLS1", study_id) for maf in self.sort_mafs(study_location, study_id): maf_temp = None try: maf_temp = pandas.read_csv(os.path.join(study_location, maf), sep="\t", header=0, encoding='unicode_escape') except pandas.errors.EmptyDataError as e: logger.error(f'EmptyDataError Issue with opening maf file {maf}: {str(e)}') self.unopenable_maf_register.append(maf) continue except Exception as e: logger.error(f'Issue with opening maf file {maf}, cause of error unclear: {str(e)}') self.unopenable_maf_register.append(maf) continue cleanup_function = getattr(DataFrameUtils, f'{self.method}_maf_cleanup') maf_temp = cleanup_function(maf_temp, study_id, maf) maf_as_dict = totuples(df=maf_temp, text='dict')['dict'] yield maf_as_dict
def post(self, study_id, file_name): parser = reqparse.RequestParser() parser.add_argument('new_column_name', help="Name of new column") new_column_name = None parser.add_argument('new_column_position', help="The position (column #) of new column") new_column_position = None parser.add_argument('new_column_default_value', help="The (optional) default value of new column") new_column_default_value = None if request.args: args = parser.parse_args(req=request) new_column_name = args['new_column_name'] new_column_position = args['new_column_position'] new_column_default_value = args['new_column_default_value'] if new_column_name is None: abort(404, "Please provide valid name for the new column") # param validation if study_id is None or file_name is None: abort(404, 'Please provide valid parameters for study identifier and file name') study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: table_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") # Need to add values for each existing row (not header) new_col = [] for row_val in range(table_df.shape[0]): new_col.append(new_column_default_value) # Add new column to the spreadsheet table_df.insert(loc=int(new_column_position), column=new_column_name, value=new_col, allow_duplicates=True) df_data_dict = totuples(table_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(table_df) message = write_tsv(table_df, file_name) return {'header': df_header, 'data': df_data_dict, 'message': message}
def put(self, study_id, file_name): try: data_dict = json.loads(request.data.decode('utf-8')) columns_rows = data_dict['data'] except KeyError: columns_rows = None if columns_rows is None: abort(404, "Please provide valid key-value pairs for the cell value." "The JSON string has to have a 'data' element") # param validation if study_id is None or file_name is None: abort(404, 'Please provide valid parameters for study identifier and/or file name') study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: table_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") for column in columns_rows: cell_value = column['value'] row_index = column['row'] column_index = column['column'] # Need to add values for column and row (not header) try: #for row_val in range(table_df.shape[0]): table_df.iloc[int(row_index), int(column_index)] = cell_value except ValueError: abort(417, "Unable to find the required 'value', 'row' and 'column' values") # Write the new row back in the file message = write_tsv(table_df, file_name) df_data_dict = totuples(table_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(table_df) return {'header': df_header, 'rows': df_data_dict, 'message': message}
def delete(self, study_id, assay_file_name): # query validation parser = reqparse.RequestParser() parser.add_argument('row_num', help="The row number of the cell(s) to remove (exclude header)", location="args") args = parser.parse_args() row_num = args['row_num'] # param validation if study_id is None or assay_file_name is None or row_num is None: abort(404) study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) # Remove NaN row_nums = row_num.split(",") # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed sorted_num_rows = [int(x) for x in row_nums] sorted_num_rows.sort(reverse=True) for num in sorted_num_rows: assay_df = assay_df.drop(assay_df.index[num]) # Drop row(s) in the spreadsheet # Remove all ".n" numbers at the end of duplicated column names assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True) # Write the updated file assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False) # To be sure we read the file again assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) # Remove NaN df_data_dict = totuples(assay_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(assay_df) return {'header': df_header, 'data': df_data_dict}
def post(self, study_id, assay_file_name): try: data_dict = json.loads(request.data.decode('utf-8')) new_row = data_dict['data'] except KeyError: new_row = None if new_row is None: abort(404, "Please provide valid data for updated new row(s). " "The JSON string has to have a 'assayData' element") try: for element in new_row: element.pop('index', None) # Remove "index:n" element, this is the original row number except: logger.info('No index (row num) supplied, ignoring') # param validation if study_id is None or assay_file_name is None: abort(404, 'Please provide valid parameters for study identifier and assay file name') study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) # Remove NaN assay_df = assay_df.append(new_row, ignore_index=True) # Add new row to the spreadsheet # Remove all ".n" numbers at the end of duplicated column names assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True) # Write the new row back in the file assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False) df_data_dict = totuples(assay_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(assay_df) return {'header': df_header, 'data': df_data_dict}
def delete(self, study_id, file_name): # query validation parser = reqparse.RequestParser() parser.add_argument('row_num', help="The row number of the cell(s) to remove (exclude header)", location="args") args = parser.parse_args() row_num = args['row_num'] # param validation if study_id is None or file_name is None or row_num is None: abort(404) study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") row_nums = row_num.split(",") # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed sorted_num_rows = [int(x) for x in row_nums] sorted_num_rows.sort(reverse=True) for num in sorted_num_rows: file_df = file_df.drop(file_df.index[num]) # Drop row(s) in the spreadsheet message = write_tsv(file_df, file_name) # To be sure we read the file again try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") df_data_dict = totuples(file_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(file_df) return {'header': df_header, 'data': df_data_dict, 'message': message}
def get(self, study_id, file_name): # param validation if study_id is None or file_name is None: logger.info('No study_id and/or TSV file name given') abort(404) fname, ext = os.path.splitext(file_name) ext = ext.lower() if ext not in ('.tsv', '.csv', '.txt'): abort(400, "The file " + file_name + " is not a valid TSV or CSV file") study_id = study_id.upper() file_name_param = file_name # store the passed filename for simplicity # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] logger.info('Assay Table: Getting ISA-JSON Study %s', study_id) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not read_access: abort(403) if file_name == 'metabolights_zooma.tsv': # This will edit the MetaboLights Zooma mapping file if not is_curator: abort(403) file_name = app.config.get('MTBLS_ZOOMA_FILE') else: file_name = os.path.join(study_location, file_name) logger.info('Trying to load TSV file (%s) for Study %s', file_name, study_id) # Get the Assay table or create a new one if it does not already exist try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") df_data_dict = totuples(file_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(file_df, study_id, file_name_param) return {'header': df_header, 'data': df_data_dict}
def get(self, study_id, sample_file_name): # param validation if study_id is None: logger.info('No study_id given') abort(404) if sample_file_name is None: logger.info('No sample file name given') abort(404) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] logger.info( 'sample Table: Getting ISA-JSON Study %s, using API-Key %s', study_id, user_token) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, study_status = \ wsc.get_permissions(study_id, user_token) if not read_access: abort(403) sample_file_name = os.path.join(study_location, sample_file_name) logger.info('Trying to load sample (%s) for Study %s', sample_file_name, study_id) # Get the sample table or create a new one if it does not already exist sample_df = pd.read_csv(sample_file_name, sep="\t", header=0, encoding='utf-8') # Get rid of empty numerical values sample_df = sample_df.replace(np.nan, '', regex=True) df_data_dict = totuples(sample_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(sample_df) return {'header': df_header, 'data': df_data_dict}
def get(self, study_id, file_name): # param validation if study_id is None or file_name is None: logger.info('No study_id and/or TSV file name given') abort(404) study_id = study_id.upper() file_name_param = file_name # store the passed filename for simplicity # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] logger.info('Assay Table: Getting ISA-JSON Study %s', study_id) # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not read_access: abort(403) if file_name == 'metabolights_zooma.tsv': # This will edit the MetaboLights Zooma mapping file if not is_curator: abort(403) file_name = app.config.get('MTBLS_ZOOMA_FILE') else: file_name = os.path.join(study_location, file_name) logger.info('Trying to load TSV file (%s) for Study %s', file_name, study_id) # Get the Assay table or create a new one if it does not already exist try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") df_data_dict = totuples(file_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(file_df, study_id, file_name_param) return {'header': df_header, 'data': df_data_dict}
def post(self, study_id, file_name): parser = reqparse.RequestParser() parser.add_argument('new_column_name', help="Name of new column") new_column_name = None parser.add_argument('new_column_position', help="The position (column #) of new column") new_column_position = None parser.add_argument('new_column_default_value', help="The (optional) default value of new column") new_column_default_value = None if request.args: args = parser.parse_args(req=request) new_column_name = args['new_column_name'] new_column_position = args['new_column_position'] new_column_default_value = args['new_column_default_value'] if new_column_name is None: abort(404, "Please provide valid name for the new column") # param validation if study_id is None or file_name is None: abort( 404, 'Please provide valid parameters for study identifier and file name' ) study_id = study_id.upper() fname, ext = os.path.splitext(file_name) ext = ext.lower() if ext not in ('.tsv', '.csv', '.txt'): abort(400, "The file " + file_name + " is not a valid TSV or CSV file") # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: table_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") audit_status, dest_path = write_audit_files(study_location) # Need to add values for each existing row (not header) new_col = [] for row_val in range(table_df.shape[0]): new_col.append(new_column_default_value) # Add new column to the spreadsheet table_df.insert(loc=int(new_column_position), column=new_column_name, value=new_col, allow_duplicates=True) df_data_dict = totuples(table_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(table_df) message = write_tsv(table_df, file_name) return {'header': df_header, 'data': df_data_dict, 'message': message}
def delete(self, study_id, file_name): # query validation parser = reqparse.RequestParser() parser.add_argument( 'row_num', help="The row number of the cell(s) to remove (exclude header)", location="args") args = parser.parse_args() row_num = args['row_num'] # param validation if study_id is None or file_name is None or row_num is None: abort(404) fname, ext = os.path.splitext(file_name) ext = ext.lower() if ext not in ('.tsv', '.csv', '.txt'): abort(400, "The file " + file_name + " is not a valid TSV or CSV file") study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") row_nums = row_num.split(",") # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed sorted_num_rows = [int(x) for x in row_nums] sorted_num_rows.sort(reverse=True) for num in sorted_num_rows: file_df = file_df.drop( file_df.index[num]) # Drop row(s) in the spreadsheet message = write_tsv(file_df, file_name) # To be sure we read the file again try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") df_data_dict = totuples(file_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(file_df) return {'header': df_header, 'data': df_data_dict, 'message': message}
def post(self, study_id, file_name): log_request(request) try: data_dict = json.loads(request.data.decode('utf-8')) new_row = data_dict['data'] except KeyError: new_row = None if new_row is None: abort(417, "Please provide valid data for updated new row(s). The JSON string has to have a 'data' element") try: for element in new_row: element.pop('index', None) # Remove "index:n" element, this is the original row number except: logger.info('No index (row num) supplied, ignoring') # param validation if study_id is None or file_name is None: abort(404, 'Please provide valid parameters for study identifier and TSV file name') study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) if file_name == 'metabolights_zooma.tsv': # This will edit the MetaboLights Zooma mapping file if not is_curator: abort(403) file_name = app.config.get('MTBLS_ZOOMA_FILE') else: file_name = os.path.join(study_location, file_name) try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file name was not found") # Validate column names in new rows valid_column_name, message = validate_row(file_df, new_row, "post") if not valid_column_name: abort(417, message) if new_row[0]: file_df = file_df.append(new_row, ignore_index=True) # Add new row to the spreadsheet (TSV file) else: file_df = file_df.append(pd.Series(), ignore_index=True) message = write_tsv(file_df, file_name) # Get an indexed header row df_header = get_table_header(file_df) # Get the updated data table try: df_data_dict = totuples(read_tsv(file_name), 'rows') except FileNotFoundError: abort(400, "The file " + file_name + " was not found") return {'header': df_header, 'data': df_data_dict, 'message': message}
def delete(self, study_id, assay_file_name): # query validation parser = reqparse.RequestParser() parser.add_argument( 'row_num', help="The row number of the cell(s) to remove (exclude header)", location="args") args = parser.parse_args() row_num = args['row_num'] # param validation if study_id is None or assay_file_name is None or row_num is None: abort(404) study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) # Remove NaN row_nums = row_num.split(",") # Need to remove the highest row number first as the DataFrame dynamically re-orders when one row is removed sorted_num_rows = [int(x) for x in row_nums] sorted_num_rows.sort(reverse=True) for num in sorted_num_rows: assay_df = assay_df.drop( assay_df.index[num]) # Drop row(s) in the spreadsheet # Remove all ".n" numbers at the end of duplicated column names assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True) # Write the updated file assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False) # To be sure we read the file again assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) # Remove NaN df_data_dict = totuples(assay_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(assay_df) return {'header': df_header, 'data': df_data_dict}
def post(self, study_id, file_name): log_request(request) try: data_dict = json.loads(request.data.decode('utf-8')) data = data_dict['data'] new_row = data['rows'] except KeyError: new_row = None data = None if new_row is None: abort( 417, "Please provide valid data for updated new row(s). The JSON string has to have a 'rows' element" ) try: for element in new_row: element.pop( 'index', None ) # Remove "index:n" element, this is the original row number except: logger.info('No index (row num) supplied, ignoring') # param validation if study_id is None or file_name is None: abort( 404, 'Please provide valid parameters for study identifier and TSV file name' ) fname, ext = os.path.splitext(file_name) ext = ext.lower() if ext not in ('.tsv', '.csv', '.txt'): abort(400, "The file " + file_name + " is not a valid TSV or CSV file") study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) if file_name == 'metabolights_zooma.tsv': # This will edit the MetaboLights Zooma mapping file if not is_curator: abort(403) file_name = app.config.get('MTBLS_ZOOMA_FILE') else: file_name = os.path.join(study_location, file_name) try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file name was not found") # Validate column names in new rows valid_column_name, message = validate_row(file_df, new_row, "post") if not valid_column_name: abort(417, message) if data: try: start_index = data['index'] if start_index == -1: start_index = 0 start_index = start_index - 0.5 except KeyError: start_index = len(file_df.index) # Map the complete row first, update with new_row complete_row = {} for col in file_df.columns: complete_row[col] = "" if not new_row: logger.warning( "No new row information provided. Adding empty row " + file_name + ", row " + str(complete_row)) else: for row in new_row: complete_row.update(row) row = complete_row line = pd.DataFrame(row, index=[start_index]) file_df = file_df.append(line, ignore_index=False) file_df = file_df.sort_index().reset_index(drop=True) start_index += 1 file_df = file_df.replace(np.nan, '', regex=True) message = write_tsv(file_df, file_name) # Get an indexed header row df_header = get_table_header(file_df) # Get the updated data table try: df_data_dict = totuples(read_tsv(file_name), 'rows') except FileNotFoundError: abort(400, "The file " + file_name + " was not found") return {'header': df_header, 'data': df_data_dict, 'message': message}
def put(self, study_id, assay_file_name): parser = reqparse.RequestParser() parser.add_argument( 'row_num', help="The row number of the cell to update (exclude header)") parser.add_argument('column_name', help="The column name of the cell to update") parser.add_argument('cell_value', help="The column name of the cell to update") row_num = None column_name = None cell_value = None if request.args: args = parser.parse_args(req=request) row_num = args['row_num'] column_name = args['column_name'] cell_value = args['cell_value'] # param validation if study_id is None or assay_file_name is None or row_num is None or column_name is None: abort(404) study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) try: row = int(row_num) column = assay_df.columns.get_loc(column_name) assay_df.iloc[row, column] = cell_value except Exception: logger.warning('Could not find row (' + row_num + '( and/or column (' + column_name + ') in the table') # Remove all ".n" numbers at the end of duplicated column names assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True) # Write the new empty columns back in the file assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False) # Convert panda DataFrame (single row) to json tuples object def totuples(df, rown): d = [ dict([(colname, row[rown]) for rown, colname in enumerate(df.columns)]) for row in df.values ] return {'assaydata': d} df_dict = totuples(assay_df.reset_index(), row) return df_dict
def post(self, study_id, file_name): try: data_dict = json.loads(request.data.decode('utf-8')) new_columns = data_dict['data'] except KeyError: new_columns = None if new_columns is None: abort(417, "Please provide valid key-value pairs for the new columns." "The JSON string has to have a 'data' element") # param validation if study_id is None or file_name is None: abort(404, 'Please provide valid parameters for study identifier and/or file name') study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: table_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") # Get an indexed header row df_header = get_table_header(table_df) for column in new_columns: new_column_default_value = column['value'] new_column_name = column['name'] new_column_position = column['index'] # Need to add values for each existing row (not header) new_col = [] for row_val in range(table_df.shape[0]): new_col.append(new_column_default_value) # Check if we already have the column in the current position try: header_name = table_df.iloc[:, new_column_position].name except: header_name = "" if header_name == new_column_name: # We should update the existing column table_df.iloc[:, new_column_position] = new_col else: # Add new column to the spreadsheet table_df.insert(loc=int(new_column_position), column=new_column_name, value=new_col, allow_duplicates=True) # Get an (updated) indexed header row df_header = get_table_header(table_df) # Get all indexed rows df_data_dict = totuples(table_df.reset_index(), 'rows') message = write_tsv(table_df, file_name) return {'header': df_header, 'rows': df_data_dict, 'message': message}
def put(self, study_id, assay_file_name): # param validation if study_id is None or assay_file_name is None: abort(404, 'Please provide valid parameters for study identifier and assay file name') study_id = study_id.upper() try: data_dict = json.loads(request.data.decode('utf-8')) new_rows = data_dict['data'] # Use "index:n" element, this is the original row number except KeyError: new_rows = None if new_rows is None: abort(404, "Please provide valid data for updated new row(s). " "The JSON string has to have a 'assayData' element") for row in new_rows: try: row_index = row['index'] # Check if we have a value in the row number(s) except KeyError: row_index = None if new_rows is None or row_index is None: abort(404, "Please provide valid data for the updated row(s). " "The JSON string has to have an 'index:n' element in each (JSON) row, " "this is the original row number. The header row can not be updated") # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) # Remove NaN row_index_int = None for row in new_rows: try: row_index_int = int(row['index']) except: row_index_int is None if row_index_int is not None: assay_df = assay_df.drop(assay_df.index[row_index_int]) # Remove the old row from the spreadsheet # pop the "index:n" from the new_row before updating row.pop('index', None) # Remove "index:n" element from the (JSON) row, this is the original row number assay_df = insert_row(row_index_int, assay_df, row) # Update the row in the spreadsheet # Remove all ".n" numbers at the end of duplicated column names assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True) # Write the new row back in the file assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False) df_data_dict = totuples(assay_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(assay_df) return {'header': df_header, 'data': df_data_dict}
def put(self, study_id, file_name): # param validation if study_id is None or file_name is None: abort(406, 'Please provide valid parameters for study identifier and TSV file name') study_id = study_id.upper() try: data_dict = json.loads(request.data.decode('utf-8')) new_rows = data_dict['data'] # Use "index:n" element, this is the original row number except KeyError: new_rows = None if new_rows is None: abort(404, "Please provide valid data for updated new row(s). " "The JSON string has to have a 'data' element") for row in new_rows: try: row_index = row['index'] # Check if we have a value in the row number(s) except (KeyError, Exception): row_index = None if new_rows is None or row_index is None: abort(404, "Please provide valid data for the updated row(s). " "The JSON string has to have an 'index:n' element in each (JSON) row. " "The header row can not be updated") # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") for row in new_rows: try: row_index_int = int(row['index']) except: row_index_int is None # Validate column names in new rows valid_column_name, message = validate_row(file_df, row, 'put') if not valid_column_name: abort(417, message) if row_index_int is not None: file_df = file_df.drop(file_df.index[row_index_int]) # Remove the old row from the spreadsheet # pop the "index:n" from the new_row before updating row.pop('index', None) # Remove "index:n" element, this is the original row number file_df = insert_row(row_index_int, file_df, row) # Update the row in the spreadsheet message = write_tsv(file_df, file_name) df_data_dict = totuples(file_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(file_df) return {'header': df_header, 'data': df_data_dict, 'message': message}
def put(self, study_id, assay_file_name): parser = reqparse.RequestParser() parser.add_argument('row_num', help="The row number of the cell to update (exclude header)") parser.add_argument('column_name', help="The column name of the cell to update") parser.add_argument('cell_value', help="The column name of the cell to update") row_num = None column_name = None cell_value = None if request.args: args = parser.parse_args(req=request) row_num = args['row_num'] column_name = args['column_name'] cell_value = args['cell_value'] # param validation if study_id is None or assay_file_name is None or row_num is None or column_name is None: abort(404) study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) try: row = int(row_num) column = assay_df.columns.get_loc(column_name) assay_df.iloc[row, column] = cell_value except Exception: logger.warning('Could not find row (' + row_num + '( and/or column (' + column_name + ') in the table') # Remove all ".n" numbers at the end of duplicated column names assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True) # Write the new empty columns back in the file assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False) # Convert panda DataFrame (single row) to json tuples object def totuples(df, rown): d = [ dict([ (colname, row[rown]) for rown, colname in enumerate(df.columns) ]) for row in df.values ] return {'assaydata': d} df_dict = totuples(assay_df.reset_index(), row) return df_dict
def post(self, study_id, file_name): try: data_dict = json.loads(request.data.decode('utf-8')) new_columns = data_dict['data'] except KeyError: new_columns = None if new_columns is None: abort( 417, "Please provide valid key-value pairs for the new columns." "The JSON string has to have a 'data' element") # param validation if study_id is None or file_name is None: abort( 404, 'Please provide valid parameters for study identifier and/or file name' ) study_id = study_id.upper() fname, ext = os.path.splitext(file_name) ext = ext.lower() if ext not in ('.tsv', '.csv', '.txt'): abort(400, "The file " + file_name + " is not a valid TSV or CSV file") # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: table_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") audit_status, dest_path = write_audit_files(study_location) # Get an indexed header row df_header = get_table_header(table_df) for column in new_columns: new_column_default_value = column['value'] new_column_name = column['name'] new_column_position = column['index'] # Need to add values for each existing row (not header) new_col = [] for row_val in range(table_df.shape[0]): new_col.append(new_column_default_value) # Check if we already have the column in the current position try: header_name = table_df.iloc[:, new_column_position].name except: header_name = "" if header_name == new_column_name: # We should update the existing column table_df.iloc[:, new_column_position] = new_col else: # Add new column to the spreadsheet table_df.insert(loc=int(new_column_position), column=new_column_name, value=new_col, allow_duplicates=True) # Get an (updated) indexed header row df_header = get_table_header(table_df) # Get all indexed rows df_data_dict = totuples(table_df.reset_index(), 'rows') message = write_tsv(table_df, file_name) return {'header': df_header, 'rows': df_data_dict, 'message': message}
def put(self, study_id, file_name): try: data_dict = json.loads(request.data.decode('utf-8')) columns_rows = data_dict['data'] except KeyError: columns_rows = None if columns_rows is None: abort( 404, "Please provide valid key-value pairs for the cell value." "The JSON string has to have a 'data' element") # param validation if study_id is None or file_name is None: abort( 404, 'Please provide valid parameters for study identifier and/or file name' ) fname, ext = os.path.splitext(file_name) ext = ext.lower() if ext not in ('.tsv', '.csv', '.txt'): abort(400, "The file " + file_name + " is not a valid TSV or CSV file") study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: table_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") for column in columns_rows: cell_value = column['value'] row_index = column['row'] column_index = column['column'] # Need to add values for column and row (not header) try: # for row_val in range(table_df.shape[0]): table_df.iloc[int(row_index), int(column_index)] = cell_value except ValueError as e: logger.error( "(ValueError) Unable to find the required 'value', 'row' and 'column' values. Value: " + cell_value + ", row: " + row_index + ", column: " + column + ". " + str(e)) abort( 417, "(ValueError) Unable to find the required 'value', 'row' and 'column' values. Value: " + cell_value + ", row: " + row_index + ", column: " + column) except IndexError: logger.error( "(IndexError) Unable to find the required 'value', 'row' and 'column' values. Value: " + cell_value + ", row: " + row_index + ", column: " + column + ". " + str(e)) abort( 417, "(IndexError) Unable to find the required 'value', 'row' and 'column' values. Value: " + cell_value + ", row: " + row_index + ", column: " + column) # Write the new row back in the file message = write_tsv(table_df, file_name) df_data_dict = totuples(table_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(table_df) return {'header': df_header, 'rows': df_data_dict, 'message': message}
def post(self, study_id, assay_file_name): try: data_dict = json.loads(request.data.decode('utf-8')) new_row = data_dict['data'] except KeyError: new_row = None if new_row is None: abort( 404, "Please provide valid data for updated new row(s). " "The JSON string has to have a 'assayData' element") try: for element in new_row: element.pop( 'index', None ) # Remove "index:n" element, this is the original row number except: logger.info('No index (row num) supplied, ignoring') # param validation if study_id is None or assay_file_name is None: abort( 404, 'Please provide valid parameters for study identifier and assay file name' ) study_id = study_id.upper() # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) # Remove NaN assay_df = assay_df.append( new_row, ignore_index=True) # Add new row to the spreadsheet # Remove all ".n" numbers at the end of duplicated column names assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True) # Write the new row back in the file assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False) df_data_dict = totuples(assay_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(assay_df) return {'header': df_header, 'data': df_data_dict}
def put(self, study_id, file_name): # param validation if study_id is None or file_name is None: abort( 406, 'Please provide valid parameters for study identifier and TSV file name' ) fname, ext = os.path.splitext(file_name) ext = ext.lower() if ext not in ('.tsv', '.csv', '.txt'): abort(400, "The file " + file_name + " is not a valid TSV or CSV file") study_id = study_id.upper() try: data_dict = json.loads(request.data.decode('utf-8')) new_rows = data_dict[ 'data'] # Use "index:n" element, this is the original row number except KeyError: new_rows = None if new_rows is None: abort( 404, "Please provide valid data for updated new row(s). " "The JSON string has to have a 'data' element") for row in new_rows: try: row_index = row[ 'index'] # Check if we have a value in the row number(s) except (KeyError, Exception): row_index = None if new_rows is None or row_index is None: abort( 404, "Please provide valid data for the updated row(s). " "The JSON string has to have an 'index:n' element in each (JSON) row. " "The header row can not be updated") # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) file_name = os.path.join(study_location, file_name) try: file_df = read_tsv(file_name) except FileNotFoundError: abort(400, "The file " + file_name + " was not found") for row in new_rows: try: row_index_int = int(row['index']) except: row_index_int is None # Validate column names in new rows valid_column_name, message = validate_row(file_df, row, 'put') if not valid_column_name: abort(417, message) if row_index_int is not None: file_df = file_df.drop( file_df.index[row_index_int] ) # Remove the old row from the spreadsheet # pop the "index:n" from the new_row before updating row.pop( 'index', None ) # Remove "index:n" element, this is the original row number file_df = insert_row(row_index_int, file_df, row) # Update the row in the spreadsheet message = write_tsv(file_df, file_name) df_data_dict = totuples(file_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(file_df) return {'header': df_header, 'data': df_data_dict, 'message': message}
def put(self, study_id, assay_file_name): # param validation if study_id is None or assay_file_name is None: abort( 404, 'Please provide valid parameters for study identifier and assay file name' ) study_id = study_id.upper() try: data_dict = json.loads(request.data.decode('utf-8')) new_rows = data_dict[ 'data'] # Use "index:n" element, this is the original row number except KeyError: new_rows = None if new_rows is None: abort( 404, "Please provide valid data for updated new row(s). " "The JSON string has to have a 'assayData' element") for row in new_rows: try: row_index = row[ 'index'] # Check if we have a value in the row number(s) except KeyError: row_index = None if new_rows is None or row_index is None: abort( 404, "Please provide valid data for the updated row(s). " "The JSON string has to have an 'index:n' element in each (JSON) row, " "this is the original row number. The header row can not be updated" ) # User authentication user_token = None if "user_token" in request.headers: user_token = request.headers["user_token"] # check for access rights is_curator, read_access, write_access, obfuscation_code, study_location, release_date, submission_date, \ study_status = wsc.get_permissions(study_id, user_token) if not write_access: abort(403) assay_file_name = os.path.join(study_location, assay_file_name) assay_df = pd.read_csv(assay_file_name, sep="\t", header=0, encoding='utf-8') assay_df = assay_df.replace(np.nan, '', regex=True) # Remove NaN row_index_int = None for row in new_rows: try: row_index_int = int(row['index']) except: row_index_int is None if row_index_int is not None: assay_df = assay_df.drop( assay_df.index[row_index_int] ) # Remove the old row from the spreadsheet # pop the "index:n" from the new_row before updating row.pop( 'index', None ) # Remove "index:n" element from the (JSON) row, this is the original row number assay_df = insert_row(row_index_int, assay_df, row) # Update the row in the spreadsheet # Remove all ".n" numbers at the end of duplicated column names assay_df.rename(columns=lambda x: re.sub(r'\.[0-9]+$', '', x), inplace=True) # Write the new row back in the file assay_df.to_csv(assay_file_name, sep="\t", encoding='utf-8', index=False) df_data_dict = totuples(assay_df.reset_index(), 'rows') # Get an indexed header row df_header = get_table_header(assay_df) return {'header': df_header, 'data': df_data_dict}