def get_component_help_messages(self): self.context['context_help'] = dict() self.context['help_messages'] = dict() paths_dict = lkup.MESSAGES_LKUPS['HELP_MESSAGES'] if self.component in paths_dict: self.context['help_messages'] = d_utils.json_to_pytype( lkup.MESSAGES_LKUPS['HELP_MESSAGES'][self.component]) # context help, relevant to the current component (e.g., datafile) if "context_help" in paths_dict: help_dict = d_utils.json_to_pytype( lkup.MESSAGES_LKUPS['HELP_MESSAGES']["context_help"]) properties_temp = help_dict['properties'] v = [ x for x in properties_temp if len(x['context']) > 0 and x['context'][0] == self.component ] if v: help_dict['properties'] = v self.context['context_help'] = help_dict # get user email self.context = self.do_user_has_email() return self.context
def generate_stage_items(self): sample_types = list() for s_t in d_utils.get_sample_type_options(): sample_types.append(s_t["value"]) wizard_stages = dict() # get start stages start = d_utils.json_to_pytype(lkup.WIZARD_FILES["sample_start"])['properties'] wizard_stages['start'] = start # if required, resolve data source for select-type controls, # i.e., if a callback is defined on the 'option_values' field for stage in wizard_stages['start']: if "items" in stage: for st in stage['items']: if "option_values" in st: st["option_values"] = htags.get_control_options(st) # get sample types for s_t in sample_types: s_stages = d_utils.json_to_pytype(lkup.WIZARD_FILES["sample_attributes"])['properties'] form_schema = list() for f in self.schema: # get relevant attributes based on sample type if f.get("show_in_form", True) and s_t in f.get("specifications", sample_types): # if required, resolve data source for select-type controls, # i.e., if a callback is defined on the 'option_values' field if "option_values" in f: f["option_values"] = htags.get_control_options(f) # change sample-source control to wizard-compliant version if f.get("control", str()) == "copo-sample-source": f["control"] = "copo-sample-source-2" # get short-form id f["id"] = f["id"].split(".")[-1] # might not need to include name if f["id"] == "name": continue form_schema.append(f) for p in s_stages: if p["ref"] == "sample_attributes": p["items"] = form_schema wizard_stages[s_t] = s_stages return wizard_stages
def do_sample_wizard_components(self): self.context['wiz_message'] = d_utils.json_to_pytype(lkup.MESSAGES_LKUPS["sample_wizard_messages"])[ "properties"] self.context['wiz_howtos'] = d_utils.json_to_pytype(lkup.MESSAGES_LKUPS["sample_wizard_howto"]) self.context['wizard_stages'] = self.wizard_helper.generate_stage_items() # get all records: used in the UI for 'cloning' and other purposes profile_id = ThreadLocal.get_current_request().session['profile_id'] self.context["component_records"] = htags.generate_component_records("sample", profile_id) return self.context
def get_resolve(self, component=str()): da_object = DAComponent(component=component) message_display_templates = d_utils.json_to_pytype( lkup.MESSAGES_LKUPS["message_templates"])["templates"] lookup_messages = d_utils.json_to_pytype( lkup.MESSAGES_LKUPS["lookup_messages"])["properties"] component_dict = dict() message_dict = dict() resolved_dict = self.publication_metadata() if resolved_dict.get("status") == "success": message_dict = message_display_templates.get("success", dict()) message_dict["text"] = lookup_messages.get( "doi_metadata_crosscheck", str()).get("text", str()) for f in da_object.get_schema().get("schema"): data_dict = resolved_dict.get("data", dict()) key = f.id.split(".")[-1] if key in data_dict: val = data_dict[key] # reconcile schema type mismatch if isinstance(val, list) and f.type == "string": val = ','.join(str(e) for e in val) # account for numbers if isinstance(val, str) and f.type == "object": object_type_control = d_utils.object_type_control_map( ).get(f.control.lower(), str()) if object_type_control == "ontology_annotation": object_schema = d_utils.get_db_json_schema( object_type_control) value_dict = dict(annotationValue=val) for k in object_schema: object_schema[k] = value_dict.get( k, d_utils.default_jsontype( object_schema.get(k, dict()).get( "type", "object"))) val = object_schema component_dict[key] = val if key not in component_dict: # set default values based on type component_dict[key] = d_utils.default_jsontype(f.type) else: error_list = resolved_dict.get("error", list()) message_dict = message_display_templates.get("danger", dict()) message_dict["text"] = '; '.join( str(e) for e in error_list) + lookup_messages.get( "doi_metadata_error", str()).get("text", str()) return dict(component_dict=component_dict, message_dict=message_dict)
def create_transfer(self, submission_id, file_path=None): # before creating a new transfer record for this submission, remove all others remote_record = self.get_by_sub_id(submission_id) if remote_record: self.delete_transfer(str(remote_record["_id"])) fields = data_utils.json_to_pytype( DB_TEMPLATES['REMOTE_FILE_COLLECTION']) fields['submission_id'] = submission_id fields['profile_id'] = self.profile_id fields['file_path'] = file_path transfer_time = datetime.now().strftime("%d-%m-%Y %H:%M:%S") fields["commenced_on"] = transfer_time fields["current_time"] = transfer_time fields["transfer_rate"] = "" if file_path: d = DataFile().GET(submission_id) chunked_upload = ChunkedUpload.objects.get(id=int(d['file_id'])) fields["file_size_bytes"] = u.filesize_toString( chunked_upload.offset) doc = self.RemoteFileCollection.insert(fields) # return inserted record df = self.GET(str(doc)) return df
def generate_ui_template(self): new_list = [] for file_name in self.get_mapping_files(): file_dict = d_utils.json_to_pytype(os.path.join(self.path_to_mappings, file_name)) a = dict(file_handle=os.path.join(self.path_to_mappings, file_name), file_dict=file_dict) self.resource_objects.append(a) new_list = new_list + self.dispatch[ file_dict['configuration']['provider'] + "_" + file_dict['configuration']['type']](file_dict) if new_list: self.generated_controls = new_list # set some default fields self.set_type() self.set_deprecation() self.set_versioning() self.set_form_display() self.set_table_display() self.set_ontologies() # self.update_original_resource() self.refactor_deprecated_controls() out_dict = self.objectify() out_dict = {"status": "success", "data": out_dict} else: out_dict = {} out_dict = {"status": "failed", "messages": self.error_messages, "data": out_dict} return out_dict
def get_description_stages(self): stages = list() target_repository = self.get_batch_attributes()["target_repository"][0] if target_repository: stages = d_utils.json_to_pytype(lkup.WIZARD_FILES[target_repository['deposition_context']])['properties'] return stages
def save_stage_data(auto_fields): d = DataFile() datafile_id = auto_fields["datafile"] current_stage = auto_fields["current_stage"] description_stages = d.GET(datafile_id)["description"]["stages"] stage = [elem for elem in description_stages if elem["ref"] == current_stage] # get schema for resolving ontology terms onto_schema = d_utils.json_to_pytype(lkup.DB_TEMPLATES["ONTOLOGY_ANNOTATION"]) if stage: data = {} stage_items = stage[0]["items"] if stage_items: for sti in stage_items: # handle ontology term if sti["control"].lower() == "ontology term": a = {} for k in onto_schema["properties"]: if sti["id"] + "." + k in auto_fields.keys(): a[k] = auto_fields[sti["id"] + "." + k] data[sti["id"]] = a else: data[sti["id"]] = auto_fields[sti["id"]] d.save_description_stage(datafile_id, {"ref": current_stage, "data": data})
def rate_metadata(self, item_meta, repo): """ function matches input metadata (item_meta) against a rating template, to determine an item's rating level. basically, the rating template is a set of sequential/mutually exclusive rules used in matching user description to some rating level. ideally, rules should be listed in a descending order of ranking (e.g., good, fair, poor) :param item_meta: metadata schema of the item to be rated :return item_rating: the resolved rating """ # get repo label repo_name = [elem for elem in d_utils.get_repository_options() if elem["value"] == repo] if repo_name: repo_name = repo_name[0]["label"] else: repo_name = str() rating_template = d_utils.json_to_pytype(lkup.METADATA_RATING_TEMPLATE_LKUPS["rating_template"])["properties"] item_rating = dict() for level in rating_template: set_level = [] for k, v in level["matching_rules"][repo].items(): if v: set_level.append(getattr(MetadataRater, "validate_" + k)(self, v, item_meta)) set_level = set(set_level) if len(set_level) == 1 and set_level.pop(): item_rating["rating_level"] = level["rating_level"] item_rating["rating_level_description"] = level["rating_level_description"].format(**locals()) break return item_rating
def get_datafiles_rating(self): """ function handles the evaluation of metadata rating for datafiles :return: dictionary of datafiles with associated metadata rating """ datafiles_rating = list() for df_id in self.item_ids: default_rating = \ d_utils.json_to_pytype(lkup.METADATA_RATING_TEMPLATE_LKUPS["rating_template"])["properties"][-1] item_rating = dict() item_rating["rating_level"] = default_rating["rating_level"] item_rating["rating_level_description"] = default_rating["rating_level_description"] d_r = dict(item_id=df_id, item_rating=item_rating) attributes = DataFile().get_record_property(df_id, "description_attributes") deposition_context = DataFile().get_record_property(df_id, "target_repository") if deposition_context: d_r["item_rating"] = self.rate_metadata(attributes, deposition_context) datafiles_rating.append(d_r) return datafiles_rating
def get_datafiles_rating(self): """ function handles the evaluation of metadata rating for datafiles :return: dictionary of datafiles with associated metadata rating """ datafiles_rating = list() for df_id in self.item_ids: default_rating = \ d_utils.json_to_pytype(lkup.METADATA_RATING_TEMPLATE_LKUPS["rating_template"])["properties"][-1] item_rating = dict() item_rating["rating_level"] = default_rating["rating_level"] item_rating["rating_level_description"] = default_rating[ "rating_level_description"] d_r = dict(item_id=df_id, item_rating=item_rating) attributes = DataFile().get_record_property( df_id, "description_attributes") deposition_context = DataFile().get_record_property( df_id, "target_repository") if deposition_context: d_r["item_rating"] = self.rate_metadata( attributes, deposition_context) datafiles_rating.append(d_r) return datafiles_rating
def get_resolve(self, component=str()): da_object = DAComponent(component=component) message_display_templates = d_utils.json_to_pytype(lkup.MESSAGES_LKUPS["message_templates"])["templates"] lookup_messages = d_utils.json_to_pytype(lkup.MESSAGES_LKUPS["lookup_messages"])["properties"] component_dict = dict() message_dict = dict() resolved_dict = self.publication_metadata() if resolved_dict.get("status") == "success": message_dict = message_display_templates.get("success", dict()) message_dict["text"] = lookup_messages.get("doi_metadata_crosscheck", str()).get("text", str()) for f in da_object.get_schema().get("schema"): data_dict = resolved_dict.get("data", dict()) key = f.id.split(".")[-1] if key in data_dict: val = data_dict[key] # reconcile schema type mismatch if isinstance(val, list) and f.type == "string": val = ','.join(str(e) for e in val) # account for numbers if isinstance(val, str) and f.type == "object": object_type_control = d_utils.control_to_schema_name(f.control.lower()) if object_type_control == "ontology_annotation": object_schema = d_utils.get_isa_schema(object_type_control) value_dict = dict(annotationValue=val ) for k in object_schema: object_schema[k] = value_dict.get(k, d_utils.default_jsontype( object_schema.get(k, dict()).get("type", "object"))) val = object_schema component_dict[key] = val if key not in component_dict: # set default values based on type component_dict[key] = d_utils.default_jsontype(f.type) else: error_list = resolved_dict.get("error", list()) message_dict = message_display_templates.get("danger", dict()) message_dict["text"] = '; '.join( str(e) for e in error_list) + lookup_messages.get("doi_metadata_error", str()).get("text", str()) return dict(component_dict=component_dict, message_dict=message_dict )
def get_dynamic_elements_ena(self, args): """ function generates dynamic stages for ENA based on the study type :param args: :return: """ args = args.split(",") # args: stub_ref stub_ref = args[0] study_type = self.get_batch_attributes()["study_type"][0] if not study_type: return list() study_type = study_type['study_type'] # get protocols protocols = ISAHelpers().get_protocols_parameter_values(study_type) # get study assay schema schema_fields = getattr(DataSchemas("COPO").get_ui_template_as_obj().copo, study_type).fields # generate dynamic stages from protocols dynamic_stages = list() # get message dictionary message_dict = d_utils.json_to_pytype(lkup.MESSAGES_LKUPS["datafile_wizard"])["properties"] for pr in protocols: if len(pr.get("parameterValues", list())) > 0: title = pr.get("name", str()).title() ref = pr.get("name", str()).replace(" ", "_") message = message_dict.get(ref + "_message", dict()).get("text", str()) stage_dict = dict(title=title, ref=ref, message=message, content=str("get_stage_html"), dependent_on=str("study_type"), stub_ref=stub_ref, items=list() ) for f in schema_fields: if f.ref in pr.get("parameterValues", list()): if f.show_in_form: elem = htags.get_element_by_id(f.id) elem["id"] = elem['id'].strip(".").rsplit(".", 1)[1] del elem['ref'] stage_dict.get("items").append(elem) dynamic_stages.append(stage_dict) return dynamic_stages
def upload_to_dropbox(request): return_structure = {} # set up an Aspera collection handle AsperaCollection = get_collection_ref("AsperaCollections") transfer_token = "" task = request.POST['task'] if task == "initiate_transfer": # initiate the transfer process # get the target datafile and obtain the file reference study_id = request.POST["study_id"] ena_collection_id = request.POST["ena_collection_id"] data_file_id = request.POST["data_file_id"] data_file = EnaCollection().get_study_datafile(study_id, ena_collection_id, data_file_id) chunked_upload = ChunkedUpload.objects.get(id=int(data_file["fileId"])) # set a new document in the aspera collection, # thus obtaining a transfer token to orchestrate the transfer process db_template = d_utils.json_to_pytype(lkup.DB_TEMPLATES['ASPERA_COLLECTION']) transfer_token = AsperaCollection.insert(db_template) path_to_file = os.path.join(MEDIA_ROOT, chunked_upload.file.name) # update some initial fields # assume transfer_status is 'transferring' initially to allow the progress monitor to kick-start AsperaCollection.update({"_id": transfer_token}, {"$set": {"transfer_commenced": str(datetime.now()), "file_path": path_to_file, "transfer_status": "transferring", "pct_completed": 0} }) # instantiate an aspera transfer process process = Thread(target=AsperaTransfer, args=(transfer_token,)) process.start() return_structure['initiate_data'] = {"transfer_token": str(transfer_token)} elif task == "transfer_progress": tokens = ast.literal_eval(request.POST["tokens"]) progress_list = [] for key, value in tokens.items(): doc = AsperaCollection.find_one({"_id": ObjectId(key)}) if doc: progress = {"transfer_token": key, "pct_completed": doc["pct_completed"], "transfer_status": doc["transfer_status"]} progress_list.append(progress) return_structure['progress_data'] = progress_list return_structure['exit_status'] = 'success' out = jsonpickle.encode(return_structure) return HttpResponse(out, content_type='json')
def ena_description(auto_fields): # get current stage, output next stage stage_dict = {} datafile_id = auto_fields["datafile"] current_stage = auto_fields["current_stage"] static_list = d_utils.json_to_pytype(lkup.WIZARD_FILES["ena"])["properties"] unified_list = static_list # get stages from db if exist. stages (both static and dynamic) are held in the db, # to provide a single or unified point of reference d = DataFile() description = d.GET(datafile_id)["description"] description_stages = description["stages"] if len(description_stages) > 0: unified_list = description_stages else: description["stages"] = unified_list fields = {"description": description} d.edit_datafile(datafile_id, fields) # generate and merge dynamic stages with static if not already generated if unified_list == static_list: # only static stages exist, generate dynamic dynamic_elements = get_dynamic_elements_ena(datafile_id) # ENA dynamic stages, contingent upon study_type if dynamic_elements: unified_list = unified_list + dynamic_elements # merge and save stages description["stages"] = unified_list fields = {"description": description} d.edit_datafile(datafile_id, fields) # now, resolve stages for the wizard next_stage_indx = 0 listed_stage = [indx for indx, elem in enumerate(unified_list) if elem["ref"] == current_stage] if listed_stage: next_stage_indx = listed_stage[0] + 1 try: elem = unified_list[next_stage_indx] if not is_stage_present(datafile_id, elem["ref"]): stage_dict = get_stage_display(elem, datafile_id) except: pass if not stage_dict and current_stage == unified_list[-1]["ref"]: # reached last stage of wizard, 'review' now # stage_dict = wtags.get_review_html(get_stages_display(datafile_id)) pass return stage_dict
def get_metadata_template(self): schemas_utils_paths = RESOLVER["cg_core_utils"] try: template = data_utils.json_to_pytype( os.path.join(schemas_utils_paths, 'dataverse_dataset_template.json')) except Exception as e: self.report_error("Couldn't retrieve Dataverse template. " + str(e)) raise return template
def create_shared_group(self, name, description, owner_id=None): group_fields = data_utils.json_to_pytype(DB_TEMPLATES['COPO_GROUP']) if not owner_id: owner_id = data_utils.get_user_id() group_fields['owner_id'] = owner_id group_fields['name'] = name group_fields['description'] = description group_fields['data_created'] = datetime.now().strftime( "%d-%m-%Y %H:%M:%S") uid = self.Group.insert(group_fields) if uid: return uid else: return False
def process_wizard_templates(self): """ function reads schema files and presents in an easy to use manner :return: """ template = dict() for k, v in self.wizard_paths.items(): try: template[k] = d_utils.json_to_pytype(v)['properties'] except Exception as e: pass return template
def generate_ui_template(self): new_list = [] for file_name in self.get_mapping_files(): file_dict = d_utils.json_to_pytype(os.path.join(self.path_to_mappings, file_name)) new_list = new_list + self.dispatch[ file_dict['configuration']['provider'] + "_" + file_dict['configuration']['type']](file_dict) if new_list: out_dict = self.objectify(new_list) out_dict = {"status": "success", "data": out_dict} else: out_dict = {} out_dict = {"status": "failed", "messages": self.error_messages, "data": out_dict} return out_dict
def agrovoc_datasource(self): """ function generates data source for Agrovoc terms lookup :return: """ data = d_utils.json_to_pytype(os.path.join(drop_downs_pth, 'agrovocLabels.json'))["bindings"] data_df = pd.DataFrame(data) data_df['accession'] = data_df['uri'].apply(lambda x: x.get('value', str())) data_df['label'] = data_df['label'].apply(lambda x: x.get('value', str())) data_df['description'] = '<table style="width:100%"><tr><td>Label</td><td>' + data_df[ 'label'] + '</td></tr><tr><td>Accession</td><td>' + data_df['accession'] + '</td></table>' data_df['tags'] = [''] * len(data_df) return data_df
def broker_data_source(self): """ function resolves dropdown list given a data source handle :return: """ pths_map = dict( select_yes_no=os.path.join(self.drop_downs_pth, 'select_yes_no.json'), select_start_end=os.path.join(self.drop_downs_pth, 'select_start_end.json'), cgiar_centres=os.path.join(self.drop_downs_pth, 'cgiar_centres.json'), crp_list=os.path.join(self.drop_downs_pth, 'crp_list.json'), languagelist=os.path.join(self.drop_downs_pth, 'language_list.json'), library_strategy=os.path.join(self.drop_downs_pth, 'library_strategy.json'), library_source=os.path.join(self.drop_downs_pth, 'library_source.json'), library_selection=os.path.join(self.drop_downs_pth, 'library_selection.json'), sequencing_instrument=os.path.join(self.drop_downs_pth, 'sequencing_instrument.json'), figshare_category_options=d_utils.get_figshare_category_options(), figshare_article_options=d_utils.get_figshare_article_options(), figshare_publish_options=d_utils.get_figshare_publish_options(), figshare_license_options=d_utils.get_figshare_license_options(), study_type_options=d_utils.get_study_type_options(), rooting_medium_options=d_utils.get_rooting_medium_options(), growth_area_options=d_utils.get_growth_area_options(), nutrient_control_options=d_utils.get_nutrient_control_options(), watering_control_options=d_utils.get_watering_control_options(), dataverse_subject_dropdown=d_utils.get_dataverse_subject_dropdown( ), repository_options=d_utils.get_repository_options()) data = pths_map.get(self.data_source, str()) if isinstance( data, str) and data: # it's only a path, resolve to get actual data data = d_utils.json_to_pytype(data) return data
def generate_ui_template(self): # update cg core CgCoreSchemas().process_schema() new_list = [] json_files_handle = self.get_mapping_files() for file_name in json_files_handle: file_dict = d_utils.json_to_pytype(file_name) self.resource_objects.append( dict(file_handle=file_name, file_dict=file_dict)) mapped_list = self.dispatch[ file_dict['configuration']['provider'] + "_" + file_dict['configuration']['type']](file_dict) if isinstance(mapped_list, list): new_list.extend(mapped_list) if new_list: self.generated_controls = new_list # set some default fields self.set_type() self.set_control_meta() self.set_deprecation() self.set_versioning() self.set_form_display() self.set_table_display() self.set_ontologies() self.set_option_values() # self.update_original_resource() self.refactor_deprecated_controls() out_dict = self.objectify() out_dict = {"status": "success", "data": out_dict} else: out_dict = {} out_dict = { "status": "failed", "messages": self.error_messages, "data": out_dict } return out_dict
def _convert_to_sra(self): self.validate_isajson() lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE) sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get( "properties", dict()) datafilehashes = self.collated_records["datafilehashes"] json2sra.convert(json_fp=open( os.path.join(self.json_path, 'isa_json.json')), path=self.xml_path, sra_settings=sra_settings, datafilehashes=datafilehashes, validate_first=False) self.context["ena_status"] = "converted_to_sra" return
def target_repo_change(self, args): args = args.split(",") # args: item_id, old_value, new_value if args[0] == args[1] or not args[0] or not args[1]: # no change in target repository return False # reset batch stages if self.get_batch_attributes(): stage_list = d_utils.json_to_pytype(lkup.WIZARD_FILES["start"])['properties'] self.set_batch_stages(stage_list) self.set_batch_attributes(dict()) # discard description for datafile description = self.get_datafile_description() description['stages'] = list() description['attributes'] = dict() self.update_description(description)
def rate_metadata(self, item_meta, repo): """ function matches input metadata (item_meta) against a rating template, to determine an item's rating level. basically, the rating template is a set of sequential/mutually exclusive rules used in matching user description to some rating level. ideally, rules should be listed in a descending order of ranking (e.g., good, fair, poor) :param item_meta: metadata schema of the item to be rated :return item_rating: the resolved rating """ # get repo label repo_name = [ elem for elem in d_utils.get_repository_options() if elem["value"] == repo ] if repo_name: repo_name = repo_name[0]["label"] else: repo_name = str() rating_template = d_utils.json_to_pytype( lkup.METADATA_RATING_TEMPLATE_LKUPS["rating_template"] )["properties"] item_rating = dict() for level in rating_template: set_level = [] for k, v in level["matching_rules"].get(repo, dict()).items(): if v: set_level.append( getattr(MetadataRater, "validate_" + k)(self, v, item_meta)) set_level = set(set_level) if len(set_level) == 1 and set_level.pop(): item_rating["rating_level"] = level["rating_level"] item_rating["rating_level_description"] = level[ "rating_level_description"].format(**locals()) break return item_rating
def create_transfer(self, submission_id, file_path=None): fields = data_utils.json_to_pytype(DB_TEMPLATES['REMOTE_FILE_COLLECTION']) fields['submission_id'] = submission_id fields['profile_id'] = self.profile_id fields['file_path'] = file_path transfer_time = datetime.now().strftime("%d-%m-%Y %H:%M:%S") fields["commenced_on"] = transfer_time fields["current_time"] = transfer_time fields["transfer_rate"] = [] if file_path: d = DataFile().GET(submission_id) chunked_upload = ChunkedUpload.objects.get(id=int(d['file_id'])) fields["file_size_bytes"] = u.filesize_toString(chunked_upload.offset) doc = self.RemoteFileCollection.insert(fields) # return inserted record df = self.GET(str(doc)) return df
def countrieslist_datasource(self): """ function generates data source for lookup of countries :return: """ data = d_utils.json_to_pytype(os.path.join(drop_downs_pth, 'countries.json'))["bindings"] data_df = pd.DataFrame(data) data_df['accession'] = data_df['name'] data_df['label'] = data_df['name'] data_df['description'] = '<table style="width:100%"><tr><td>Code</td><td>' + data_df[ 'country-code'] + '</td></tr><tr><td>Region</td><td>' + data_df[ 'region'] + '</td></tr><tr><td>Sub-region</td><td>' + data_df[ 'sub-region'] + '</td></tr></table>' data_df['tags'] = [''] * len(data_df) return data_df
def stage_description(self, current_stage): # get current stage, output next-in-line stage_dict = dict() if current_stage: stage_list = self.get_batch_stages() else: # likely no recorded stage stage_list = d_utils.json_to_pytype(lkup.WIZARD_FILES["start"])['properties'] self.set_batch_stages(stage_list) # next, determine the stage in line to be rendered next_stage_indx = 0 listed_stage = [indx for indx, elem in enumerate(stage_list) if elem['ref'] == current_stage] if listed_stage: next_stage_indx = listed_stage[0] + 1 if next_stage_indx < len(stage_list): # given a valid index, there is a stage to render! elem = stage_list[next_stage_indx] if not self.is_activated(elem): # stage not previously activated # now, the retrieved stage may very well be a stage_stub (metadata for bootstrapping actual stage(s)) # check for stage stubs and resolve accordingly new_stages = self.resolve_stage_stub(elem) if new_stages: self.activate_stage(elem) # insert generated stages into the stage list stage_gap = next_stage_indx + 1 stage_list = stage_list[:stage_gap] + new_stages + stage_list[stage_gap:] self.set_batch_stages(stage_list) elem = stage_list[stage_gap] # refresh elem # determine whether stage should be displayed based on the satisfaction of certain condition(s) if self.display_stage(elem): self.activate_stage(elem) stage_dict = self.get_stage_display(elem) return stage_dict
def process_schema(self): """ function builds schema fragments to file, which is later called to generate the complete schema in db :return: """ specs_df = self.get_schema_spec() # compose copo schema from cg-core spec df = specs_df.T.copy() df["ref"] = list(df.index) df["id"] = df['COPO_ID'].apply(lambda x: ".".join( ("copo", "cgCore", x))) df["label"] = df['LABEL'] df["help_tip"] = df['HELP_TIP'] df["dependency"] = df['DEPENDENCY'] df["control"] = df['COPO_CONTROL'] df["stage_id"] = df['Wizard_Stage_ID'] df["target_repo"] = df['REPO'] df["prefix"] = df['REPO_PREFIX'] df["data_maxItems"] = -1 # set max item for lookup control temp_df_1 = df[(df['control'] == 'copo-lookup2') & (df['TYPE'] == '1')] if len(temp_df_1): df.loc[temp_df_1.index, 'data_maxItems'] = 1 # set cardinality df["type"] = df['TYPE'].replace({'1': 'string', 'm': 'array'}) # set data source for relevant controls df['data_source'] = np.where( df['control'].isin([ 'copo-lookup2', 'copo-multi-select2', 'copo-button-list', 'copo-single-select' ]), df['COPO_DATA_SOURCE'], '') # reset 'type' to string for select2 controls temp_df_1 = df[df['control'].isin([ 'copo-lookup2', 'copo-multi-select2', 'copo-single-select', 'copo-select2' ])] df.loc[temp_df_1.index, 'type'] = 'string' filtered_columns = [ "ref", "id", "label", "help_tip", "control", "type", "stage_id", "data_source", "data_maxItems", "dependency", "target_repo", "prefix" ] df = df.loc[:, filtered_columns] df["required"] = False # this will be set later df["field_constraint"] = "optional" # this will be set later schema_list = df.to_dict('records') # update schema in file cg_schema = d_utils.json_to_pytype(self.path_to_json) cg_schema['properties'] = schema_list with open(self.path_to_json, 'w') as fout: json.dump(cg_schema, fout) return True
def _do_aspera_transfer(self, transfer_token=None, user_name=None, password=None, remote_path=None, file_path=None, path2library=None, sub_id=None): # check submission status submission_status = Submission().isComplete(sub_id) if not submission_status or submission_status == 'false': lg.log('Starting aspera transfer', level=Loglvl.INFO, type=Logtype.FILE) kwargs = dict(target_id=sub_id, commenced_on=str(datetime.now())) Submission().save_record(dict(), **kwargs) # k is a loop counter which keeps track of the number of files transfered k = -1 f_str = str() for f in file_path: f_str = f_str + ' ' + f cmd = "./ascp -d -QT -l300M -L- {f_str!s} {user_name!s}:{remote_path!s}".format(**locals()) lg.log(cmd, level=Loglvl.INFO, type=Logtype.FILE) os.chdir(path2library) try: thread = pexpect.spawn(cmd, timeout=None) thread.expect(["assword:", pexpect.EOF]) thread.sendline(password) cpl = thread.compile_pattern_list([pexpect.EOF, '(.+)']) while True: i = thread.expect_list(cpl, timeout=None) if i == 0: # EOF! Possible error point if encountered before transfer completion print("Process termination - check exit status!") break elif i == 1: pexp_match = thread.match.group(1) prev_file = '' tokens_to_match = ["Mb/s"] units_to_match = ["KB", "MB"] time_units = ['d', 'h', 'm', 's'] end_of_transfer = False if all(tm in pexp_match.decode("utf-8") for tm in tokens_to_match): fields = { "transfer_status": "transferring", "current_time": datetime.now().strftime("%d-%m-%Y %H:%M:%S") } tokens = pexp_match.decode("utf-8").split(" ") for token in tokens: if not token == '': if "file" in token: fields['file_path'] = token.split('=')[-1] if prev_file != fields['file_path']: k = k + 1 prev_file == fields['file_path'] elif '%' in token: pct = float((token.rstrip("%"))) # pct = (1/len(file_path) * pct) + (k * 1/len(file_path) * 100) fields['pct_completed'] = pct # flag end of transfer print(str(transfer_token) + ": " + str(pct) + '% transfered') if token.rstrip("%") == 100: end_of_transfer = True elif any(um in token for um in units_to_match): fields['amt_transferred'] = token elif "Mb/s" in token or "Mbps" in token: t = token[:-4] if '=' in t: fields['transfer_rate'] = t[t.find('=') + 1:] else: fields['transfer_rate'] = t elif "status" in token: fields['transfer_status'] = token.split('=')[-1] elif "rate" in token: fields['transfer_rate'] = token.split('=')[-1] elif "elapsed" in token: fields['elapsed_time'] = token.split('=')[-1] elif "loss" in token: fields['bytes_lost'] = token.split('=')[-1] elif "size" in token: fields['file_size_bytes'] = token.split('=')[-1] elif "ETA" in token: eta = tokens[-2] estimated_completion = "" eta_split = eta.split(":") t_u = time_units[-len(eta_split):] for indx, eta_token in enumerate(eta.split(":")): if eta_token == "00": continue estimated_completion += eta_token + t_u[indx] + " " fields['estimated_completion'] = estimated_completion RemoteDataFile().update_transfer(transfer_token, fields) kwargs = dict(target_id=sub_id, completed_on=datetime.now()) Submission().save_record(dict(), **kwargs) # close thread thread.close() lg.log('Aspera Transfer completed', level=Loglvl.INFO, type=Logtype.FILE) except OSError: return redirect('web.apps.web_copo.views.goto_error', request=HttpRequest(), message='There appears to be an issue with EBI.') # setup paths for conversion directories conv_dir = os.path.join(self._dir, sub_id) if not os.path.exists(os.path.join(conv_dir, 'json')): os.makedirs(os.path.join(conv_dir, 'json')) json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json') xml_dir = conv_dir xml_path = os.path.join(xml_dir, 'run_set.xml') # Convert COPO JSON to ISA JSON lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) conv = cnv.Investigation(submission_token=sub_id) meta = conv.get_schema() json_file = open(json_file_path, '+w') # dump metadata to output file json_file.write(dumps(meta)) json_file.close() # Validate ISA_JSON lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) with open(json_file_path) as json_file: v = isajson.validate(json_file) lg.log(v, level=Loglvl.INFO, type=Logtype.FILE) # convert to SRA with isatools converter lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE) sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get("properties", dict()) datafilehashes = conv.get_datafilehashes() json2sra.convert2(json_fp=open(json_file_path), path=conv_dir, sra_settings=sra_settings, datafilehashes=datafilehashes, validate_first=False) # finally submit to SRA lg.log('Submitting XMLS to ENA via CURL', level=Loglvl.INFO, type=Logtype.FILE) submission_file = os.path.join(xml_dir, 'submission.xml') project_file = os.path.join(xml_dir, 'project_set.xml') sample_file = os.path.join(xml_dir, 'sample_set.xml') experiment_file = os.path.join(xml_dir, 'experiment_set.xml') run_file = os.path.join(xml_dir, 'run_set.xml') curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \ -F "PROJECT=@' + os.path.join(remote_path, project_file) + '" \ -F "SAMPLE=@' + os.path.join(remote_path, sample_file) + '" \ -F "EXPERIMENT=@' + os.path.join(remote_path, experiment_file) + '" \ -F "RUN=@' + os.path.join(remote_path, run_file) + '" \ "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA%20Webin-39233%20Apple123"' output = subprocess.check_output(curl_cmd, shell=True) lg.log(output, level=Loglvl.INFO, type=Logtype.FILE) lg.log("Extracting fields from receipt", level=Loglvl.INFO, type=Logtype.FILE) xml = ET.fromstring(output) accessions = dict() # get project accessions project = xml.find('./PROJECT') project_accession = project.get('accession', default='undefined') project_alias = project.get('alias', default='undefined') accessions['project'] = {'accession': project_accession, 'alias': project_alias} # get experiment accessions experiment = xml.find('./EXPERIMENT') experiment_accession = experiment.get('accession', default='undefined') experiment_alias = experiment.get('alias', default='undefined') accessions['experiment'] = {'accession': experiment_accession, 'alias': experiment_alias} # get submission accessions submission = xml.find('./SUBMISSION') submission_accession = submission.get('accession', default='undefined') submission_alias = submission.get('alias', default='undefined') accessions['submission'] = {'accession': submission_accession, 'alias': submission_alias} # get run accessions run = xml.find('./RUN') run_accession = run.get('accession', default='undefined') run_alias = run.get('alias', default='undefined') accessions['run'] = {'accession': run_accession, 'alias': run_alias} # get sample accessions samples = xml.findall('./SAMPLE') sample_accessions = list() for sample in samples: sample_accession = sample.get('accession', default='undefined') sample_alias = sample.get('alias', default='undefined') s = {'sample_accession': sample_accession, 'sample_alias': sample_alias} for bio_s in sample: s['biosample_accession'] = bio_s.get('accession', default='undefined') sample_accessions.append(s) accessions['sample'] = sample_accessions # save accessions to mongo profile record s = Submission().get_record(sub_id) s['accessions'] = accessions s['complete'] = True s['target_id'] = str(s.pop('_id')) Submission().save_record(dict(), **s)
def do_seq_reads_submission(self, sub_id, remote_path, transfer_token): # # setup paths for conversion directories conv_dir = os.path.join(self._dir, sub_id) if not os.path.exists(os.path.join(conv_dir, 'json')): os.makedirs(os.path.join(conv_dir, 'json')) json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json') xml_dir = conv_dir xml_path = os.path.join(xml_dir, 'run_set.xml') # # # Convert COPO JSON to ISA JSON # lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) conv = cnv.Investigation(submission_token=sub_id) meta = conv.get_schema() json_file = open(json_file_path, '+w') # # dump metadata to output file json_file.write(dumps(meta)) json_file.close() # Validate ISA_JSON lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) with open(json_file_path) as json_file: v = isajson.validate(json_file) lg.log(v, level=Loglvl.INFO, type=Logtype.FILE) # convert to SRA with isatools converter lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE) sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get( "properties", dict()) datafilehashes = conv.get_datafilehashes() json2sra.convert(json_fp=open(json_file_path), path=conv_dir, sra_settings=sra_settings, datafilehashes=datafilehashes, validate_first=False) # finally submit to SRA lg.log('Submitting XMLS to ENA via CURL', level=Loglvl.INFO, type=Logtype.FILE) submission_file = os.path.join(xml_dir, 'submission.xml') project_file = os.path.join(xml_dir, 'project_set.xml') sample_file = os.path.join(xml_dir, 'sample_set.xml') experiment_file = os.path.join(xml_dir, 'experiment_set.xml') run_file = os.path.join(xml_dir, 'run_set.xml') pass_word = resolve_env.get_env('WEBIN_USER_PASSWORD') user_token = resolve_env.get_env('WEBIN_USER') ena_service = resolve_env.get_env('ENA_SERVICE') user_token = user_token.split("@")[0] ena_uri = "{ena_service!s}/ena/submit/drop-box/submit/?auth=ENA%20{user_token!s}%20{pass_word!s}".format( **locals()) curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \ -F "PROJECT=@' + os.path.join(remote_path, project_file) + '" \ -F "SAMPLE=@' + os.path.join(remote_path, sample_file) + '" \ -F "EXPERIMENT=@' + os.path.join(remote_path, experiment_file) + '" \ -F "RUN=@' + os.path.join(remote_path, run_file) + '"' \ + ' "' + ena_uri + '"' output = subprocess.check_output(curl_cmd, shell=True) lg.log(output, level=Loglvl.INFO, type=Logtype.FILE) lg.log("Extracting fields from receipt", level=Loglvl.INFO, type=Logtype.FILE) accessions = self.get_accessions(output, sub_id, transfer_token) return True
def do_wizard_messages(self): self.context['wiz_message'] = d_utils.json_to_pytype( lkup.MESSAGES_LKUPS["wizards_messages"])["properties"] return self.context
def do_wizard_messages(self): self.context['wiz_message'] = d_utils.json_to_pytype(lkup.MESSAGES_LKUPS["datafile_wizard"])["properties"] self.context['wiz_howtos'] = d_utils.json_to_pytype(lkup.MESSAGES_LKUPS["datafile_wizard_howto"]) return self.context