def convert_study_mdata(study): STUDY_PREFIXED_FIELDS_LIST = ['internal_id', 'name', 'accession_number', 'description'] STUDY_NONPREFIXED_FIELDS_LIST = ['study_type', 'study_title', 'faculty_sponsor', 'ena_project_id', 'pi_list', 'study_visibility'] STUDY_FIELDS_MAPPING = {'pi_list' : 'pi_user_id'} irods_study_mdata = [] for field_name in STUDY_PREFIXED_FIELDS_LIST: if hasattr(study, field_name) and getattr(study, field_name) != None: field_val = getattr(study, field_name) field_val = utils.unicode2string(field_val) irods_study_mdata.append(('study_'+field_name, field_val)) else: logging.warning("This field is NONE! field-name: %s", field_name) for field_name in STUDY_NONPREFIXED_FIELDS_LIST: if hasattr(study, field_name) and getattr(study, field_name) != None: field_val = getattr(study, field_name) if isinstance(field_val, list): if field_name in STUDY_FIELDS_MAPPING: field_name = STUDY_FIELDS_MAPPING[field_name] for elem in field_val: elem = utils.unicode2string(elem) irods_study_mdata.append((field_name, elem)) else: field_val = utils.unicode2string(field_val) if field_name == 'ena_project_id' and field_val == '0': continue irods_study_mdata.append((field_name, field_val)) return irods_study_mdata
def convert_BAMFile(bamfile): BAMFILE_FIELDS_MAPPING = {'seq_centers' : "seq_center", 'lane_list' : 'lane_nr', 'tag_list' : 'tag', 'run_list' : 'run_id', 'platform_list' : 'platform', 'seq_date_list' : 'seq_date', 'library_well_list' : 'library_well_id', 'multiplex_lib_list' : 'multiplex_lib_id' } irods_file_mdata = [] for field_name in list(BAMFILE_FIELDS_MAPPING.keys()): if hasattr(bamfile, field_name) and getattr(bamfile, field_name) != None: field_val = getattr(bamfile, field_name) if isinstance(field_val, list): for elem in field_val: #elem = unicodedata.normalize('NFKD', elem).encode('ascii','ignore') elem = utils.unicode2string(elem) irods_field_name = BAMFILE_FIELDS_MAPPING[field_name] irods_file_mdata.append((irods_field_name, elem)) else: field_val = utils.unicode2string(field_val) irods_file_mdata.append((irods_field_name, field_val)) # print "BAM FIELDS: ", vars(irods_file_mdata) return irods_file_mdata
def convert_sample_mdata(sample): ''' Method which takes a models.Sample object and makes a list of (key, value) from all the object's fields, and adds the tuples to a list. Adjusts the naming of the fields where it's necessary and returns the list of tuples.''' SAMPLE_PREFIXED_FIELDS_LIST = [ 'internal_id', 'name', 'accession_number', 'public_name' ] #, 'common_name' SAMPLE_NONPREFIXED_FIELDS_LIST = [ 'sanger_sample_id', 'sample_tissue_type', 'taxon_id', 'gender', 'cohort', 'ethnicity', 'country_of_origin', 'geographical_region', 'organism' ] # 'reference_genome', => TODO - add the logic for this one...still unclear if it's the same for all files or should be per sample irods_sampl_mdata = [ ] # This will hold a list of tuples: [(K, V), (K,V), ...] for field_name in SAMPLE_PREFIXED_FIELDS_LIST: if hasattr(sample, field_name) and getattr(sample, field_name) != None: field_val = getattr(sample, field_name) #field_val = unicodedata.normalize('NFKD', field_val).encode('ascii','ignore') field_val = utils.unicode2string(field_val) irods_sampl_mdata.append(('sample_' + field_name, field_val)) for field_name in SAMPLE_NONPREFIXED_FIELDS_LIST: if hasattr(sample, field_name) and getattr(sample, field_name) != None: field_val = getattr(sample, field_name) field_val = utils.unicode2string(field_val) if field_name == 'gender': field_name = 'sex' irods_sampl_mdata.append((field_name, field_val)) return irods_sampl_mdata
def convert_index_file_mdata(indexed_file_id, file_md5, indexed_file_md5): irods_file_mdata = [] irods_file_mdata.append(('indexed_file_id', utils.unicode2string(indexed_file_id))) irods_file_mdata.append(('file_md5', utils.unicode2string(file_md5))) irods_file_mdata.append(('indexed_file_md5', utils.unicode2string(indexed_file_md5))) # irods_file_mdata.append(('submitter_user_id', utils.unicode2string(user_id))) # irods_file_mdata.append(('submission_date', utils.unicode2string(submission_date))) # irods_file_mdata.append(('submission_id'), utils.unicode2string(submission_id)) return irods_file_mdata #def test_file_meta_pairs(tuple_list): # key_occ_dict = defaultdict(int) # for item in tuple_list: # print "ITEM: ", item[0], item[1] # key_occ_dict[item[0]] += 1 # #key_count = Counter(tuple_list) # for k, v in key_occ_dict.iteritems(): # print k+" : "+str(v)+"\n" #print key_occ_dict #test_file_meta_pairs([('cohort', 'ef'), ('sample_id', '123')])
def convert_sample_mdata(sample): ''' Method which takes a models.Sample object and makes a list of (key, value) from all the object's fields, and adds the tuples to a list. Adjusts the naming of the fields where it's necessary and returns the list of tuples.''' SAMPLE_PREFIXED_FIELDS_LIST = ['internal_id', 'name', 'accession_number', 'public_name'] #, 'common_name' SAMPLE_NONPREFIXED_FIELDS_LIST = ['sanger_sample_id', 'sample_tissue_type', 'taxon_id', 'gender', 'cohort', 'ethnicity', 'country_of_origin', 'geographical_region', 'organism'] # 'reference_genome', => TODO - add the logic for this one...still unclear if it's the same for all files or should be per sample irods_sampl_mdata = [] # This will hold a list of tuples: [(K, V), (K,V), ...] for field_name in SAMPLE_PREFIXED_FIELDS_LIST: if hasattr(sample, field_name) and getattr(sample, field_name) != None: field_val = getattr(sample, field_name) #field_val = unicodedata.normalize('NFKD', field_val).encode('ascii','ignore') field_val = utils.unicode2string(field_val) irods_sampl_mdata.append(('sample_' + field_name, field_val)) for field_name in SAMPLE_NONPREFIXED_FIELDS_LIST: if hasattr(sample, field_name) and getattr(sample, field_name) != None: field_val = getattr(sample, field_name) field_val = utils.unicode2string(field_val) if field_name == 'gender': field_name = 'sex' irods_sampl_mdata.append((field_name, field_val)) return irods_sampl_mdata
def convert_BAMFile(bamfile): BAMFILE_FIELDS_MAPPING = { 'seq_centers': "seq_center", 'lane_list': 'lane_nr', 'tag_list': 'tag', 'run_list': 'run_id', 'platform_list': 'platform', 'seq_date_list': 'seq_date', 'library_well_list': 'library_well_id', 'multiplex_lib_list': 'multiplex_lib_id' } irods_file_mdata = [] for field_name in list(BAMFILE_FIELDS_MAPPING.keys()): if hasattr(bamfile, field_name) and getattr(bamfile, field_name) != None: field_val = getattr(bamfile, field_name) if isinstance(field_val, list): for elem in field_val: #elem = unicodedata.normalize('NFKD', elem).encode('ascii','ignore') elem = utils.unicode2string(elem) irods_field_name = BAMFILE_FIELDS_MAPPING[field_name] irods_file_mdata.append((irods_field_name, elem)) else: field_val = utils.unicode2string(field_val) irods_file_mdata.append((irods_field_name, field_val)) # print "BAM FIELDS: ", vars(irods_file_mdata) return irods_file_mdata
def convert_study_mdata(study): STUDY_PREFIXED_FIELDS_LIST = [ 'internal_id', 'name', 'accession_number', 'description' ] STUDY_NONPREFIXED_FIELDS_LIST = [ 'study_type', 'study_title', 'faculty_sponsor', 'ena_project_id', 'pi_list', 'study_visibility' ] STUDY_FIELDS_MAPPING = {'pi_list': 'pi_user_id'} irods_study_mdata = [] for field_name in STUDY_PREFIXED_FIELDS_LIST: if hasattr(study, field_name) and getattr(study, field_name) != None: field_val = getattr(study, field_name) field_val = utils.unicode2string(field_val) irods_study_mdata.append(('study_' + field_name, field_val)) else: logging.warning("This field is NONE! field-name: %s", field_name) for field_name in STUDY_NONPREFIXED_FIELDS_LIST: if hasattr(study, field_name) and getattr(study, field_name) != None: field_val = getattr(study, field_name) if isinstance(field_val, list): if field_name in STUDY_FIELDS_MAPPING: field_name = STUDY_FIELDS_MAPPING[field_name] for elem in field_val: elem = utils.unicode2string(elem) irods_study_mdata.append((field_name, elem)) else: field_val = utils.unicode2string(field_val) if field_name == 'ena_project_id' and field_val == '0': continue irods_study_mdata.append((field_name, field_val)) return irods_study_mdata
def convert_index_file_mdata(indexed_file_id, file_md5, indexed_file_md5): irods_file_mdata = [] irods_file_mdata.append( ('indexed_file_id', utils.unicode2string(indexed_file_id))) irods_file_mdata.append(('file_md5', utils.unicode2string(file_md5))) irods_file_mdata.append( ('indexed_file_md5', utils.unicode2string(indexed_file_md5))) # irods_file_mdata.append(('submitter_user_id', utils.unicode2string(user_id))) # irods_file_mdata.append(('submission_date', utils.unicode2string(submission_date))) # irods_file_mdata.append(('submission_id'), utils.unicode2string(submission_id)) return irods_file_mdata
def convert_library_mdata(lib): ''' ''' LIBRARY_PREFIXED_FIELDS_LIST = ['internal_id', 'name', 'public_name'] LIBRARY_NONPREFIXED_FIELDS_LIST = ['library_type', 'library_source', 'library_selection', 'library_strategy', "coverage"] irods_lib_mdata = [] for field_name in LIBRARY_PREFIXED_FIELDS_LIST: if hasattr(lib, field_name) and getattr(lib, field_name) != None: field_val = getattr(lib, field_name) field_val = utils.unicode2string(field_val) irods_lib_mdata.append(('library_' + field_name, field_val)) for field_name in LIBRARY_NONPREFIXED_FIELDS_LIST: if hasattr(lib, field_name) and getattr(lib, field_name) != None: field_val = getattr(lib, field_name) field_val = utils.unicode2string(field_val) irods_lib_mdata.append((field_name, field_val)) return irods_lib_mdata
def test_unicode2string(self): task_dict={u'400f65eb-16d4-4e6b-80d5-4d1113fcfdf4': {u'status': u'SUCCESS', u'type': u'serapis.worker.tasks.UpdateFileMdataTask'}, u'397df5da-7dd1-4068-9a67-9ebac1a64472': {u'status': u'SUCCESS', u'type': u'serapis.worker.tasks.ParseBAMHeaderTask'}, u'033f350c-6961-4eb5-9b0d-5cda99dbe7e9': {u'status': u'SUCCESS', u'type': u'serapis.worker.tasks.UploadFileTask'}, u'257da594-bc55-4735-8200-67ce9447ba0b': {u'status': u'SUCCESS', u'type': u'serapis.worker.tasks.CalculateMD5Task'}} task_dict_str = utils.unicode2string(task_dict) print "TASK DICT AFTER UNICODE CONVERT: %s" % repr(task_dict_str) self.assertDictEqual(task_dict, task_dict_str)
def test_unicode2string(self): task_dict={'400f65eb-16d4-4e6b-80d5-4d1113fcfdf4': {'status': 'SUCCESS', 'type': 'serapis.worker.tasks.UpdateFileMdataTask'}, '397df5da-7dd1-4068-9a67-9ebac1a64472': {'status': 'SUCCESS', 'type': 'serapis.worker.tasks.ParseBAMHeaderTask'}, '033f350c-6961-4eb5-9b0d-5cda99dbe7e9': {'status': 'SUCCESS', 'type': 'serapis.worker.tasks.UploadFileTask'}, '257da594-bc55-4735-8200-67ce9447ba0b': {'status': 'SUCCESS', 'type': 'serapis.worker.tasks.CalculateMD5Task'}} task_dict_str = utils.unicode2string(task_dict) print("TASK DICT AFTER UNICODE CONVERT: %s" % repr(task_dict_str)) self.assertDictEqual(task_dict, task_dict_str)
def convert_library_mdata(lib): ''' ''' LIBRARY_PREFIXED_FIELDS_LIST = ['internal_id', 'name', 'public_name'] LIBRARY_NONPREFIXED_FIELDS_LIST = [ 'library_type', 'library_source', 'library_selection', 'library_strategy', "coverage" ] irods_lib_mdata = [] for field_name in LIBRARY_PREFIXED_FIELDS_LIST: if hasattr(lib, field_name) and getattr(lib, field_name) != None: field_val = getattr(lib, field_name) field_val = utils.unicode2string(field_val) irods_lib_mdata.append(('library_' + field_name, field_val)) for field_name in LIBRARY_NONPREFIXED_FIELDS_LIST: if hasattr(lib, field_name) and getattr(lib, field_name) != None: field_val = getattr(lib, field_name) field_val = utils.unicode2string(field_val) irods_lib_mdata.append((field_name, field_val)) return irods_lib_mdata
def convert_reference_genome_mdata(ref_genome): #REF_PREFIXED_FIELDS = ['md5', 'name'] => Josh required to take the name out REF_PREFIXED_FIELDS = ['md5', 'name'] irods_ref_mdata = [] for field_name in REF_PREFIXED_FIELDS: if hasattr(ref_genome, field_name): field_val = getattr(ref_genome, field_name) field_val = utils.unicode2string(field_val) if field_name == 'name': irods_ref_mdata.append(('ref_' + field_name, field_val)) else: irods_ref_mdata.append(('ref_file_' + field_name, field_val)) return irods_ref_mdata
def convert_reference_genome_mdata(ref_genome): #REF_PREFIXED_FIELDS = ['md5', 'name'] => Josh required to take the name out REF_PREFIXED_FIELDS = ['md5', 'name'] irods_ref_mdata = [] for field_name in REF_PREFIXED_FIELDS: if hasattr(ref_genome, field_name): field_val = getattr(ref_genome, field_name) field_val = utils.unicode2string(field_val) if field_name == 'name': irods_ref_mdata.append(('ref_'+field_name, field_val)) else: irods_ref_mdata.append(('ref_file_'+field_name, field_val)) return irods_ref_mdata
def convert_file_mdata(subm_file, submission_date, ref_genome=None, sanger_user_id='external'): FILE_FIELDS_LIST = ['submission_id', 'file_type', 'study_list', 'library_list', 'sample_list', 'data_type', 'data_subtype_tags', 'hgi_project', 'pmid_list', 'security_level' ] FILE_PREFIXED_FIELDS_LIST = ['md5', 'id'] irods_file_mdata = [] for field_name in FILE_PREFIXED_FIELDS_LIST: if hasattr(subm_file, field_name) and getattr(subm_file, field_name) not in [None, ' ']: field_val = getattr(subm_file, field_name) field_val = utils.unicode2string(str(field_val)) if field_name == 'id': field_name = 'file_id' field_val = str(field_val) else: field_name = 'file_'+field_name irods_file_mdata.append((field_name, field_val)) for field_name in FILE_FIELDS_LIST: if hasattr(subm_file, field_name) and getattr(subm_file, field_name) != None: field_val = getattr(subm_file, field_name) if field_name == 'study_list': for study in field_val: irods_study_mdata = convert_study_mdata(study) irods_file_mdata.extend(irods_study_mdata) elif field_name == 'library_list': for lib in field_val: irods_lib_mdata = convert_library_mdata(lib) irods_file_mdata.extend(irods_lib_mdata) elif field_name == 'sample_list': for sample in field_val: irods_sampl_mdata = convert_sample_mdata(sample) irods_file_mdata.extend(irods_sampl_mdata) elif field_name == 'file_type': field_val = utils.unicode2string(field_val) file_specific_mdata = convert_specific_file_mdata(field_val, subm_file) if file_specific_mdata: irods_file_mdata.extend(file_specific_mdata) irods_file_mdata.append((field_name, field_val)) elif field_name == 'data_subtype_tags': field_val = utils.unicode2string(field_val) for tag_val in list(field_val.values()): irods_file_mdata.append(('data_subtype_tag', utils.unicode2string(tag_val))) elif field_name == 'pmid_list': field_val = utils.unicode2string(field_val) for pmid in field_val: irods_file_mdata.append(('pmid', utils.unicode2string(pmid))) elif field_name == 'security_level': field_val = utils.unicode2string(field_val) irods_file_mdata.append(('security_level', field_val)) elif field_name == 'hgi_project': field_val = utils.unicode2string(field_val) irods_file_mdata.append(('hgi_project', utils.unicode2string(field_val))) else: field_val = utils.unicode2string(field_val) irods_file_mdata.append((field_name, field_val)) if ref_genome != None: irods_file_mdata.extend(convert_reference_genome_mdata(ref_genome)) if subm_file.abstract_library != None: irods_lib_mdata = convert_library_mdata(subm_file.abstract_library) irods_file_mdata.extend(irods_lib_mdata) if hasattr(subm_file.index_file, 'file_path_client'): irods_file_mdata.append(('index_file_md5', utils.unicode2string(subm_file.index_file.md5))) irods_file_mdata.append(('submitter_user_id', utils.unicode2string(sanger_user_id))) irods_file_mdata.append(('submission_date', utils.unicode2string(submission_date))) result_list = list(set(irods_file_mdata)) result_list.sort(key=lambda tup: tup[0]) return result_list
def convert(self, request_data): ''' This function converts the data from the request to a different format.''' return utils.unicode2string(request_data)
def convert(self, request_data): return utils.unicode2string(request_data)
def convert_file_mdata(subm_file, submission_date, ref_genome=None, sanger_user_id='external'): FILE_FIELDS_LIST = [ 'submission_id', 'file_type', 'study_list', 'library_list', 'sample_list', 'data_type', 'data_subtype_tags', 'hgi_project', 'pmid_list', 'security_level' ] FILE_PREFIXED_FIELDS_LIST = ['md5', 'id'] irods_file_mdata = [] for field_name in FILE_PREFIXED_FIELDS_LIST: if hasattr(subm_file, field_name) and getattr( subm_file, field_name) not in [None, ' ']: field_val = getattr(subm_file, field_name) field_val = utils.unicode2string(str(field_val)) if field_name == 'id': field_name = 'file_id' field_val = str(field_val) else: field_name = 'file_' + field_name irods_file_mdata.append((field_name, field_val)) for field_name in FILE_FIELDS_LIST: if hasattr(subm_file, field_name) and getattr(subm_file, field_name) != None: field_val = getattr(subm_file, field_name) if field_name == 'study_list': for study in field_val: irods_study_mdata = convert_study_mdata(study) irods_file_mdata.extend(irods_study_mdata) elif field_name == 'library_list': for lib in field_val: irods_lib_mdata = convert_library_mdata(lib) irods_file_mdata.extend(irods_lib_mdata) elif field_name == 'sample_list': for sample in field_val: irods_sampl_mdata = convert_sample_mdata(sample) irods_file_mdata.extend(irods_sampl_mdata) elif field_name == 'file_type': field_val = utils.unicode2string(field_val) file_specific_mdata = convert_specific_file_mdata( field_val, subm_file) if file_specific_mdata: irods_file_mdata.extend(file_specific_mdata) irods_file_mdata.append((field_name, field_val)) elif field_name == 'data_subtype_tags': field_val = utils.unicode2string(field_val) for tag_val in list(field_val.values()): irods_file_mdata.append( ('data_subtype_tag', utils.unicode2string(tag_val))) elif field_name == 'pmid_list': field_val = utils.unicode2string(field_val) for pmid in field_val: irods_file_mdata.append( ('pmid', utils.unicode2string(pmid))) elif field_name == 'security_level': field_val = utils.unicode2string(field_val) irods_file_mdata.append(('security_level', field_val)) elif field_name == 'hgi_project': field_val = utils.unicode2string(field_val) irods_file_mdata.append( ('hgi_project', utils.unicode2string(field_val))) else: field_val = utils.unicode2string(field_val) irods_file_mdata.append((field_name, field_val)) if ref_genome != None: irods_file_mdata.extend(convert_reference_genome_mdata(ref_genome)) if subm_file.abstract_library != None: irods_lib_mdata = convert_library_mdata(subm_file.abstract_library) irods_file_mdata.extend(irods_lib_mdata) if hasattr(subm_file.index_file, 'file_path_client'): irods_file_mdata.append( ('index_file_md5', utils.unicode2string(subm_file.index_file.md5))) irods_file_mdata.append( ('submitter_user_id', utils.unicode2string(sanger_user_id))) irods_file_mdata.append( ('submission_date', utils.unicode2string(submission_date))) result_list = list(set(irods_file_mdata)) result_list.sort(key=lambda tup: tup[0]) return result_list