def set_profile_id(self, profile_id): p_id = profile_id if not p_id and self.description_token: description = Description().GET(self.description_token) p_id = description.get("profile_id", str()) return p_id
def align_columns(self): """ function compares ingested columns to generated columns - they should align :return: """ result = dict(status='success', message='') if not os.path.exists(self.get_object_file_path()): result["status"] = "error" result["message"] = "Couldn't locate uploaded CSV. Try re-uploading." return result with open(self.get_object_file_path(), 'r') as fobject: ingested_columns = (next(csv.reader(fobject))) description = Description().GET(self.description_token) stored_columns = description.get("meta", dict()).get("generated_columns", list()) ingested_columns = [x.strip().lower() for x in ingested_columns if x.strip()] stored_columns = [x['title'].strip().lower() for x in stored_columns if x['title'].strip()] if not ingested_columns == stored_columns: result["status"] = "error" result["message"] = "Headers from uploaded CSV do not match displayed columns." return result return result
def get_cg_subtype(self, next_stage_index): """ stage callback function to resolve type list for cg core description :param next_stage_index: :return: """ stage = dict() description = Description().GET(self.__wzh.description_token) stages = description["stages"] attributes = description["attributes"] if next_stage_index < len(stages): stage = stages[next_stage_index] count_bundle_items = len(self.__wzh.get_description_bundle()) item = [ x for x in stage.get('items', list()) if x['id'] == 'subtype' ] item = item[0] if item else dict() type = attributes.get("cg_type", dict()).get("type", str()) item['option_values'] = CgCoreSchemas().get_cg_subtypes(type) save_dict = dict(attributes=attributes, stages=stages) Description().edit_description(self.__wzh.description_token, save_dict) return stage
def align_rows(self): """ function compares ingested sample names to generated names - they should align :return: """ result = dict(status='success', message='') ingested_df = pd.read_csv(self.get_object_file_path()) ingested_df.columns = [x.lower() for x in list(ingested_df.columns)] ingested_names = list(ingested_df.name) description = Description().GET(self.description_token) stored_names = description.get("meta", dict()).get("generated_names", str()).split(",") ingested_names.sort() stored_names.sort() if not ingested_names == stored_names: result["status"] = "error" result["message"] = "Sample names from uploaded CSV do not match displayed names." return result return result
def create_rename_description_bundle(self): """ function creates a new description bundle or renames an existing one :return: """ target_id = self.param_dict.get("target_id", str()) bundle_name = self.param_dict.get("bundle_name", str()) result = dict(status="success", message="") if Description().get_description_handle().find({ "name": { '$regex': "^" + bundle_name + "$", "$options": 'i' } }).count() >= 1: result["status"] = "error" result["message"] = "Bundle name must be unique" elif target_id: # updating existing bundle Description().edit_description(target_id, {"name": bundle_name}) try: Description().edit_description(target_id, {"name": bundle_name}) except Exception as e: message = "Couldn't update bundle: " + bundle_name + " " + str( e) result["status"] = "error" result["message"] = message else: # new bundle being created try: bundle = Description().create_description( profile_id=self.profile_id, component=self.component, name=bundle_name) result["data"] = dict(id=str(bundle["_id"]), name=bundle["name"]) except Exception as e: message = "Couldn't create bundle: " + bundle_name + " " + str( e) result["status"] = "error" result["message"] = message self.context["result"] = result return self.context
def get_unique_bundle_names(self, next_stage_index): """ stage callback function: sets unique_items list for bundle names :param next_stage_index: :return: """ stage = dict() description = Description().GET(self.__wzh.description_token) stages = description["stages"] # get existing names to form unique list unique_list = list() projection = dict(name=1) filter_by = dict(profile_id=self.__wzh.profile_id, component=self.__wzh.component) records = Description().get_all_records_columns(projection=projection, filter_by=filter_by) records_df = pd.DataFrame(records).dropna() if len(records_df): records_df["_id2"] = records_df._id.astype(str) records_df = records_df[ records_df._id2 != self.__wzh.description_token] records_df = records_df[records_df.name != str()] unique_list = list(records_df.name) if next_stage_index < len(stages): stage = stages[next_stage_index] for item in stage["items"]: if "unique_items" in item: item["unique_items"] = unique_list return stage
def perform_datafile_generation(self, next_stage_index): """ stage callback function: to initiate display of attributes for files in bundle :param next_stage_index: :return: """ stage = dict() description = Description().GET(self.__wzh.description_token) stages = description["stages"] if next_stage_index < len(stages): stage = stages[next_stage_index] return stage
def do_description_summary(self): record = DataFile().get_record(self.param_dict.get("target_id")) self.context['description'] = htags.resolve_description_data( record.get("description", dict()), dict()) description_token = record.get('description_token', str()) self.context['description']['description_record'] = dict() if description_token: description_record = Description().GET(description_token) if description_record: if not description_record["name"]: description_record["name"] = "N/A" self.context['description']['description_record'] = dict( name=description_record["name"], id=str(description_record["_id"])) return self.context
def do_clone_description_bundle(self): """ function creates a new description by cloning an existing (specified) bundle :return: """ target_id = self.param_dict.get("target_id", str()) bundle_name = self.param_dict.get("bundle_name", str()) result = dict(status="success", message="") if Description().get_description_handle().find({ "name": { '$regex': "^" + bundle_name + "$", "$options": 'i' } }).count() >= 1: result["status"] = "error" result["message"] = "Bundle name must be unique" self.context["result"] = result return self.context # retrieve clone target description = Description().GET(target_id) # new bundle being created try: bundle = Description().create_description( profile_id=self.profile_id, component=self.component, name=bundle_name, stages=description.get('stages', list()), attributes=description.get('attributes', dict()), meta=description.get('meta', dict())) result["data"] = dict(id=str(bundle["_id"]), name=bundle["name"]) except Exception as e: message = "Couldn't create bundle: " + bundle_name + " " + str(e) result["status"] = "error" result["message"] = message self.context["result"] = result return self.context
def generate_server_side_table_records(profile_id=str(), component=str(), request=dict()): # function generates component records for building an UI table using server-side processing # - please note that for effective data display, # all array and object-type fields (e.g., characteristics) are deferred to sub-table display. # please define such in the schema as "show_in_table": false and "show_as_attribute": true data_set = list() n_size = int(request.get("length", 10)) # assumes 10 records per page if length not set draw = int(request.get("draw", 1)) start = int(request.get("start", 0)) # instantiate data access object da_object = DAComponent(profile_id, component) return_dict = dict() records_total = da_object.get_collection_handle().count( {'profile_id': profile_id, 'deleted': data_utils.get_not_deleted_flag()}) # retrieve and process records filter_by = dict() if component == "datafile": # get all active bundles in the profile existing_bundles = Description().get_all_records_columns(projection=dict(_id=1), filter_by=dict(profile_id=profile_id, component=component)) existing_bundles = [str(x["_id"]) for x in existing_bundles] records_total = da_object.get_collection_handle().count({"$and": [ {"profile_id": profile_id, 'deleted': data_utils.get_not_deleted_flag()}, {"$or": [ {"description_token": {"$in": [None, False, ""]}}, {"description_token": {"$nin": existing_bundles}}]} ]}) filter_by = {"$or": [ {"description_token": {"$in": [None, False, ""]}}, {"description_token": {"$nin": existing_bundles}}]} # get and filter schema elements based on displayable columns schema = [x for x in da_object.get_schema().get("schema_dict") if x.get("show_in_table", True)] # build db column projection projection = [(x["id"].split(".")[-1], 1) for x in schema] # order by sort_by = request.get('order[0][column]', '0') sort_by = request.get('columns[' + sort_by + '][data]', '') sort_direction = request.get('order[0][dir]', 'asc') sort_by = '_id' if not sort_by else sort_by sort_direction = 1 if sort_direction == 'asc' else -1 # search search_term = request.get('search[value]', '').strip() records = da_object.get_all_records_columns_server(sort_by=sort_by, sort_direction=sort_direction, search_term=search_term, projection=dict(projection), limit=n_size, skip=start, filter_by=filter_by) records_filtered = records_total if search_term: records_filtered = da_object.get_collection_handle().count( {'profile_id': profile_id, 'deleted': data_utils.get_not_deleted_flag(), 'name': {'$regex': search_term, "$options": 'i'}}) if records: df = pd.DataFrame(records) df['record_id'] = df._id.astype(str) df["DT_RowId"] = df.record_id df.DT_RowId = 'row_' + df.DT_RowId df = df.drop('_id', axis='columns') for x in schema: x["id"] = x["id"].split(".")[-1] df[x["id"]] = df[x["id"]].apply(resolve_control_output_apply, args=(x,)).astype(str) data_set = df.to_dict('records') return_dict["records_total"] = records_total return_dict["records_filtered"] = records_filtered return_dict["data_set"] = data_set return_dict["draw"] = draw return return_dict
def remove_pairing_info(self, stage_ref, attributes, meta): attributes[stage_ref] = list() meta[stage_ref + "_paired_candidates"] = list() save_dict = dict(attributes=attributes, meta=meta) Description().edit_description(self.__wzh.description_token, save_dict)
def perform_datafile_pairing(self, next_stage_index): """ stage callback function: determines if the pairing of datafiles should be performed given the 'library_layout' :param next_stage_index: :return: """ description = Description().GET(self.__wzh.description_token) stages = description["stages"] attributes = description["attributes"] meta = description.get("meta", dict()) # validate stage stage = dict() if next_stage_index < len(stages): stage = stages[next_stage_index] # first, target repository relevant_repos = [ "ena" ] # add a repo to this list if it requires datafile pairing target_repository = attributes.get("target_repository", dict()).get("deposition_context", str()) if target_repository not in relevant_repos: # no items to pair, clear any previous pairing information self.remove_pairing_info(stage["ref"], attributes, meta) return False # get records in bundle records = cursor_to_list(DataFile().get_collection_handle().find( { "$and": [{ "description_token": self.__wzh.description_token, 'deleted': d_utils.get_not_deleted_flag() }, { 'description.attributes': { "$exists": True } }] }, { 'description.attributes': 1, 'name': 1 })) if not records: # no items to pair, clear any previous pairing information self.remove_pairing_info(stage["ref"], attributes, meta) return False for rec in records: datafile_attributes = [ v for k, v in rec['description'].get('attributes', dict()).items() ] new_dict = dict() for d in datafile_attributes: new_dict.update(d) rec['attributes'] = new_dict rec['pairing'] = rec['attributes'].get('library_layout', '').upper() df = pd.DataFrame(records) df._id = df['_id'].astype(str) df.index = df._id df = df[df['pairing'] == 'PAIRED'] if not len(df): # no items to pair, clear any previous pairing information self.remove_pairing_info(stage["ref"], attributes, meta) return False # remove extraneous columns df = df.drop(columns=['description']) if not len(df) % 2 == 0: stage["error"] = "Pairing requires even number of datafiles!" stage["refresh_wizard"] = True else: # get previously pairing candidates paired_candidates_old = meta.get( stage["ref"] + "_paired_candidates", list()) paired_candidates = list(df.index) paired_candidates_old.sort() paired_candidates.sort() if not paired_candidates_old == paired_candidates: stage["refresh_wizard"] = True # if there's a valid stored map, use it stage_data = list() saved_copy = attributes.get(stage["ref"], list()) if saved_copy: stored_pairs_df = pd.DataFrame(saved_copy) stored_pairs_list = list(stored_pairs_df._id) + list( stored_pairs_df._id2) stored_pairs_list.sort() if stored_pairs_list == paired_candidates: df_dict = df.to_dict() df_dict = df_dict["name"] stored_pairs_df["name"] = stored_pairs_df['_id'].apply( lambda x: str(df_dict[x])) stored_pairs_df["name2"] = stored_pairs_df['_id2'].apply( lambda x: str(df_dict[x])) df_result = stored_pairs_df[['name', 'name2']] df_result.columns = ['file1', 'file2'] stage_data = df_result.to_dict('records') if not stage_data: # define fresh pairing map # sort by file name to reflect pairing df = df.sort_values(by=['name']) s_even = df._id.iloc[1::2] s_odd = df._id.iloc[::2] df_odd = df[df.index.isin(s_odd)].copy() df_even = df[df.index.isin(s_even)].copy() df_even['_id2'] = df_even['_id'] df_even['name2'] = df_even['name'] df_even = df_even[['_id2', 'name2']] df_odd = df_odd[['_id', 'name']] df_odd.index = range(0, len(df_odd)) df_even.index = range(0, len(df_even)) df_result = pd.concat([df_odd, df_even], axis=1).reindex(df_odd.index) saved_copy = df_result[['_id', '_id2']].to_dict('records') df_result = df_result[['name', 'name2']] df_result.columns = ['file1', 'file2'] stage_data = df_result.to_dict('records') stage["data"] = stage_data # save state attributes[stage["ref"]] = saved_copy meta[stage["ref"] + "_paired_candidates"] = paired_candidates save_dict = dict(attributes=attributes, meta=meta) Description().edit_description(self.__wzh.description_token, save_dict) stage["message"] = self.__wzh.wiz_message[ "datafiles_pairing_message"]["text"] return stage
def get_ena_sequence_stages(self, next_stage_index): """ stage callback function: resolves stages based on study type value :param next_stage_index: :return: """ stage = dict() description = Description().GET(self.__wzh.description_token) stages = description["stages"] attributes = description["attributes"] if next_stage_index < len(stages): stage = stages[next_stage_index] study_type = attributes.get("study_type", dict()).get("study_type", str()) if not study_type: # no study type specified, we can't really do anything but signal abort return dict() # re-validate dependency if necessary meta = description.get("meta", dict()) study_type_old = meta.get(stage["ref"] + "_study_type", None) # remove stages dependent on 'study_type' - remove resolved stages preceding study_type if not study_type_old == study_type: cleared_stages = self.__wzh.remove_stage_dependency( next_stage_index) # get new dynamic stages based on user current choice new_stages = list() # get protocols protocols = ISAHelpers().get_protocols_parameter_values(study_type) # get study assay schema schema_fields = DataSchemas("COPO").get_ui_template_node( study_type) # get message dictionary message_dict = self.__wzh.wiz_message for pr in protocols: if len(pr.get("parameterValues", list())) > 0: title = pr.get("name", str()).title() ref = pr.get("name", str()).replace(" ", "_") message = message_dict.get(ref + "_message", dict()).get("text", str()) stage_dict = dict(title=title, ref=ref, message=message, items=list()) for f in schema_fields: if f['ref'] in pr.get("parameterValues", list()): if f.get('show_in_form', False): f["id"] = f['id'].strip(".").rsplit(".", 1)[1] f["label"] = htags.trim_parameter_value_label( f["label"]) # convert select type controls to copo custom select if f.get("control", str()) == "select": f["control"] = "copo-single-select" stage_dict.get("items").append(f) new_stages.append(stage_dict) # retain user choice for future reference meta[stage["ref"] + "_study_type"] = study_type # save meta Description().edit_description(self.__wzh.description_token, dict(meta=meta)) if not new_stages: # no resolved stages; signal abort return dict() # resolve type and data source for generated stages self.__wzh.sanitise_stages(new_stages) # register dependency self.__wzh.set_stage_dependency(new_stages) # insert new stages to stage list stage_gap = next_stage_index + 1 stages = cleared_stages[:stage_gap] + new_stages + cleared_stages[ stage_gap:] # update description record Description().edit_description(self.__wzh.description_token, dict(stages=stages)) return False
def get_cg_dynamic_stages(self, next_stage_index): """ stage callback function: resolves stages for cg core based on type/subtype :param next_stage_index: :return: """ stage = dict() description = Description().GET(self.__wzh.description_token) stages = description["stages"] attributes = description["attributes"] if next_stage_index < len(stages): stage = stages[next_stage_index] # get type and subtype cg_type = attributes.get("cg_type", dict()).get("type", str()) cg_subtype = attributes.get("cg_subtype", dict()).get("subtype", str()) if cg_subtype: # if there's a subtype defined, use that to resolve the stages cg_type = cg_subtype if not cg_type: # no type specified, we can't really do anything - signal abort return dict() # re-validate dependency if necessary meta = description.get("meta", dict()) cg_type_old = meta.get(stage["ref"] + "_cg_type", None) # remove stages dependent on 'cg_type' - remove resolved stages preceding cg_type if not cg_type_old == cg_type: cleared_stages = self.__wzh.remove_stage_dependency( next_stage_index) # get new dynamic stages based on the cgcore type/subtype selected new_stages = list() cgcore_object = CgCoreSchemas() # get fields schema schema_df = cgcore_object.get_type_constraints(cg_type) # get constraint ranking constraint_to_rank = cgcore_object.get_constraint_ranking() # set type ranking - used to sort and select parent's type ranking for dependencies type_ranking = dict(array=1, string=2) # build dependency map dependency_series = schema_df.dependency.copy() dependencies = list(dependency_series.fillna('').unique()) dependencies = [x for x in dependencies if x] schema_df["create_new_item"] = np.nan schema_df['option_component'] = np.nan for dp in dependencies: children_df = schema_df[schema_df.dependency == dp] # get reference child index - used to build parent properties parent_indx = children_df.index[0] # set parent field constraint - derived from children constraints with the highest ranking schema_df.loc[parent_indx, 'field_constraint'] = sorted( set(children_df.field_constraint), key=lambda x: constraint_to_rank.get(x, 100))[0] # set parent type using children's - if at least one child # is an array that should be passed onto the parent using the data_maxItems property value_type = sorted(set(children_df.type), key=lambda x: type_ranking.get(x, 100))[0] schema_df.loc[ parent_indx, 'data_maxItems'] = 1 if value_type == "string" else -1 schema_df.loc[parent_indx, 'type'] = "string" schema_df.loc[parent_indx, 'create_new_item'] = True schema_df.loc[parent_indx, 'option_component'] = "cgcore" schema_df.loc[parent_indx, 'control'] = "copo-lookup2" schema_df.loc[parent_indx, 'data_source'] = "cg_dependency_lookup" schema_df.loc[parent_indx, 'ref'] = dp schema_df.loc[parent_indx, 'dependency'] = np.nan # filter out dependent items - build dependency map schema_df['dependency'] = schema_df['dependency'].fillna('') schema_df = schema_df[schema_df['dependency'].isin([''])] # get stage id stage_ids = list(schema_df.stage_id.unique()) stage_ids = pd.Series(stage_ids).astype(int).sort_values() stage_ids = stage_ids[stage_ids >= 0].astype(str) columns = list(schema_df.columns) for col in columns: schema_df[col].fillna('n/a', inplace=True) wizard_stage_maps = cgcore_object.get_wizard_stages_df() wizard_stage_maps_list = list(wizard_stage_maps['stage_id']) for s_id in stage_ids: # filter out items without valid stage reference if str(s_id) not in wizard_stage_maps_list: continue stage_df = wizard_stage_maps[wizard_stage_maps.stage_id == str( s_id)] # couldn't find corresponding stage information if not len(stage_df): continue stage_df = stage_df.to_dict('records')[0] title = stage_df["stage_label"] ref = "cg_stage_" + s_id message = stage_df["stage_message"] # items = schema_df[schema_df.stage_id == s_id].sort_values( # by=['field_constraint_rank']).to_dict('records') # todo: temporary measure for demo - display only required fields items = schema_df[(schema_df.stage_id == s_id) & ( schema_df.field_constraint == 'required')].sort_values( by=['field_constraint_rank']).to_dict('records') # if not items, don't display stage if not len(items): continue # delete non-relevant attributes for item in items: for k in columns: if item[k] == 'n/a': del item[k] stage_dict = dict(title=title, ref=ref, message=message, items=items) new_stages.append(stage_dict) # retain user choice for future reference meta[stage["ref"] + "_cg_type"] = cg_type # save meta Description().edit_description(self.__wzh.description_token, dict(meta=meta)) if not new_stages: # no resolved stages; signal abort return dict() # resolve type and data source for generated stages self.__wzh.sanitise_stages(new_stages) # register dependency self.__wzh.set_stage_dependency(new_stages) # insert new stages to stage list stage_gap = next_stage_index + 1 stages = cleared_stages[:stage_gap] + new_stages + cleared_stages[ stage_gap:] # update description record Description().edit_description(self.__wzh.description_token, dict(stages=stages)) return False
def get_description_stages(self, next_stage_index): """ stage callback function: resolves stages based on repository value :param next_stage_index: :return: """ stage = dict() description = Description().GET(self.__wzh.description_token) stages = description["stages"] attributes = description["attributes"] if next_stage_index < len(stages): stage = stages[next_stage_index] target_repository = attributes.get("target_repository", dict()).get("deposition_context", str()) if not target_repository: # no target repository specified, we can't really do anything but signal abort return dict() # re-validate dependency if necessary meta = description.get("meta", dict()) target_repository_old = meta.get(stage["ref"] + "_target_repository", None) # remove dependency - remove resolved stages preceding target_repository if not target_repository_old == target_repository: cleared_stages = self.__wzh.remove_stage_dependency( next_stage_index) # get new dynamic stages based on user current choice new_stages = WizardSchemas().get_wizard_template(target_repository) # retain user choice for future reference meta[stage["ref"] + "_target_repository"] = target_repository # save meta Description().edit_description(self.__wzh.description_token, dict(meta=meta)) if not new_stages: # no resolved stages; signal abort return dict() # resolve type and data source for generated stages self.__wzh.sanitise_stages(new_stages) # register dependency self.__wzh.set_stage_dependency(new_stages) # insert new stages to stage list stage_gap = next_stage_index + 1 stages = cleared_stages[:stage_gap] + new_stages + cleared_stages[ stage_gap:] # update description record Description().edit_description(self.__wzh.description_token, dict(stages=stages)) return False