def get_study_ids(self, cohort_id_array): """ Returns: The user study identifiers associated with the samples in all given cohorts. """ if self._study_ids is not None: return self._study_ids study_ids = () for cohort_id in cohort_id_array: try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute( "SELECT study_id FROM cohorts_samples WHERE cohort_id = %s GROUP BY study_id", (cohort_id,) ) for row in cursor.fetchall(): if row["study_id"] is not None: study_ids += (row["study_id"],) except Exception as e: if db: db.close() if cursor: cursor.close() raise e self._study_ids = study_ids return self._study_ids
def get_cohort_barcodes(cls, cohort_id_array): # Generate the 'IN' statement string: (%s, %s, ..., %s) cohort_id_stmt = ', '.join(['%s' for x in xrange(len(cohort_id_array))]) query = 'SELECT sample_id AS barcode FROM {cohort_table} WHERE cohort_id IN ({cohort_id_stmt})'.format( cohort_table=DJANGO_COHORT_TABLE, cohort_id_stmt=cohort_id_stmt) values = cohort_id_array try: db = sql_connection() cursor = db.cursor(DictCursor) cursor.execute(query, tuple(values)) result = cursor.fetchall() barcodes = [] for row in result: barcodes.append(row['barcode']) cursor.close() db.close() # Return only unique barcodes return list(set(barcodes)) except Exception as e: raise CohortException('get_cohort_barcodes CloudSQL error, cohort IDs {cohort_ids}: {message}'.format( cohort_ids=cohort_id_array, message=str(e.message))) raise CohortException('bad cohort: ' + str(cohort_id_array))
def get_cohorts_for_datapoints(cls, cohort_id_array): # Generate the 'IN' statement string: (%s, %s, ..., %s) cohort_id_stmt = ', '.join(['%s' for x in xrange(len(cohort_id_array))]) query = 'SELECT sample_id, cohort_id FROM {cohort_samples_table} WHERE cohort_id IN ({cohort_id_stmt})'.format( cohort_samples_table=DJANGO_COHORT_SAMPLES_TABLE, cohort_id_stmt=cohort_id_stmt) values = cohort_id_array try: db = sql_connection() cursor = db.cursor(DictCursor) cursor.execute(query, tuple(values)) result = cursor.fetchall() cohort_per_samples = {} for row in result: cohort_id, sample_id = row['cohort_id'], row['sample_id'] if sample_id not in cohort_per_samples: cohort_per_samples[sample_id] = [] cohort_per_samples[sample_id].append(cohort_id) cursor.close() db.close() return cohort_per_samples except Exception as e: raise CohortException('get_cohorts_for_datapoints CloudSQL error, cohort IDs {cohort_ids}: {message}'.format( cohort_ids=cohort_id_array, message=str(e.message)))
def user_feature_handler(feature_id, cohort_id_array): include_tcga = False user_studies = () for cohort_id in cohort_id_array: try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute("SELECT project_id FROM cohorts_samples WHERE cohort_id = %s GROUP BY project_id", (cohort_id,)) for row in cursor.fetchall(): if row['project_id'] is None: include_tcga = True else: user_studies += (row['project_id'],) except Exception as e: if db: db.close() if cursor: cursor.close() raise e user_feature_id = None if feature_id.startswith('USER:'******'t include TCGA include_tcga = False return { 'converted_feature_id': feature_id, 'include_tcga': include_tcga, 'user_studies': user_studies, 'user_feature_id': user_feature_id }
def search(self, keyword, field): self.validate_search_field(keyword, field) query = ( "SELECT mirna_name, platform, value_field, internal_feature_id " "FROM {table_name} WHERE {search_field} LIKE %s LIMIT %s".format( table_name=self.get_table_name(), search_field=field ) ) # Format the keyword for MySQL string matching sql_keyword = "%" + keyword + "%" query_args = [sql_keyword, FOUND_FEATURE_LIMIT] try: db = sql_connection() cursor = db.cursor(DictCursor) cursor.execute(query, tuple(query_args)) items = [] for row in cursor.fetchall(): items.append(row) # Generate human readable labels for item in items: item["feature_type"] = MIRN_FEATURE_TYPE item["label"] = build_feature_label(item) return items except MySQLError: raise BackendException("database error", keyword, field)
def convert_user_feature_id(cls, feature_id): bq_id = None try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute( """ SELECT feature_name, bq_map_id, shared_map_id FROM projects_user_feature_definitions WHERE id = %s """, (int(feature_id.split(":")[-1]),), ) for row in cursor.fetchall(): bq_id = row["shared_map_id"] cursor.close() db.close() logging.debug("UserFeatureProvider.convert_user_feature_id {0} -> {1}".format(feature_id, bq_id)) return bq_id except Exception as e: if db: db.close() if cursor: cursor.close() raise e
def cloud_storage_file_paths(self, request): """ Takes a sample barcode as a required parameter and returns cloud storage paths to files associated with that sample. """ cursor = None db = None sample_barcode = request.get_assigned_value('sample_barcode') platform = request.get_assigned_value('platform') pipeline = request.get_assigned_value('pipeline') if are_there_bad_keys(request): err_msg = construct_parameter_error_message(request, False) raise endpoints.BadRequestException(err_msg) query_str, query_tuple = CohortsSamplesFilesQueryBuilder().build_query( platform=platform, pipeline=pipeline, sample_barcode=sample_barcode) try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute(query_str, query_tuple) cursor_rows = cursor.fetchall() # add 'cloud_storage_path' to cursor_rows bad_repo_count, bad_repo_set = CohortsSamplesFilesMessageBuilder().get_GCS_file_paths_and_bad_repos(cursor_rows) cloud_storage_path_list = [row['cloud_storage_path'] for row in cursor_rows] if bad_repo_count > 0: logger.warn("not returning {count} row(s) in sample_details due to repositories: {bad_repo_list}" .format(count=bad_repo_count, bad_repo_list=list(bad_repo_set))) return GCSFilePathList(cloud_storage_file_paths=cloud_storage_path_list, count=len(cloud_storage_path_list)) except (IndexError, TypeError), e: logger.warn(e) raise endpoints.NotFoundException("File paths for sample {} not found.".format(sample_barcode))
def field_value_search(self, keyword, field): self.validate_field_search_input(keyword, field) query = 'SELECT DISTINCT {search_field} FROM {table_name} WHERE {search_field} LIKE %s LIMIT %s'.format( table_name=self.get_table_name(), search_field=field ) # Format the keyword for MySQL string matching sql_keyword = '%' + keyword + '%' query_args = [sql_keyword, FOUND_FEATURE_LIMIT] logging.debug("CLOUDSQL_QUERY_GEXP_FIELDS: {}".format(query)) try: db = sql_connection() cursor = db.cursor(DictCursor) cursor.execute(query, tuple(query_args)) items = [] for row in cursor.fetchall(): items.append(row[field]) return items except MySQLError as mse: raise BackendException("MySQLError: {}".format(str(mse)))
def get_binary_log_filenames(): db = sql_connection() try: cursor = db.cursor() cursor.execute('SHOW BINARY LOGS;') filenames = [] for row in cursor.fetchall(): filenames.append(row[0]) return filenames except (TypeError, IndexError) as e: logger.warn('Error in retrieving binary log filenames: {}'.format(e)) finally: if cursor: cursor.close() if db: db.close()
def from_feature_id(cls, feature_id, project_id=None): logging.debug("UserFeatureDef.from_feature_id: {0}".format(str([feature_id, project_id]))) if feature_id is None: raise FeatureNotFoundException(feature_id) # ID breakdown: project ID:Feature ID # Example ID: USER:1:6 regex = re_compile("^USER:"******"([0-9]+):" # Feature ID "([0-9]+)$" ) feature_fields = regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) project_id, user_feature_id = feature_fields[0] try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute(""" SELECT bq_map_id, project_id, is_numeric FROM projects_user_feature_definitions WHERE id = %s """, (user_feature_id,)) results = [] for row in cursor.fetchall(): bq_table, column_name, symbol = cls.get_table_and_field(row['bq_map_id']) filters = None if symbol is not None: filters = { 'Symbol': symbol } results.append(cls(bq_table, column_name, row['project_id'], row['is_numeric'] == 1, filters)) cursor.close() db.close() return results except Exception as e: if db: db.close() if cursor: cursor.close() raise e
def get(self, request): """ Returns information about a specific patient, including a list of samples and aliquots derived from this patient. Takes a patient barcode (of length 12, *eg* TCGA-B9-7268) as a required parameter. User does not need to be authenticated. """ cursor = None db = None patient_barcode = request.get_assigned_value('patient_barcode') query_tuple = (str(patient_barcode),) clinical_query_str, sample_query_str, aliquot_query_str = PatientsGetQueryBuilder().build_queries() try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) # build clinical data message cursor.execute(clinical_query_str, query_tuple) row = cursor.fetchone() if row is None: cursor.close() db.close() logger.warn("Patient barcode {} not found in metadata_clinical table.".format(patient_barcode)) raise endpoints.NotFoundException("Patient barcode {} not found".format(patient_barcode)) constructor_dict = build_constructor_dict_for_message(MetadataItem(), row) clinical_data_item = MetadataItem(**constructor_dict) # get list of samples cursor.execute(sample_query_str, query_tuple) sample_list = [row['sample_barcode'] for row in cursor.fetchall()] # get list of aliquots cursor.execute(aliquot_query_str, query_tuple) aliquot_list = [row['AliquotBarcode'] for row in cursor.fetchall()] return PatientDetails(clinical_data=clinical_data_item, samples=sample_list, aliquots=aliquot_list) except (IndexError, TypeError), e: logger.info("Patient {} not found. Error: {}".format(patient_barcode, e)) raise endpoints.NotFoundException("Patient {} not found.".format(patient_barcode))
def search(self, parameters): self.validate_feature_search_input(parameters) query = 'SELECT gene_name, probe_name, platform, relation_to_gene, relation_to_island, ' \ 'value_field, internal_feature_id ' \ 'FROM {table_name} ' \ 'WHERE gene_name LIKE %s ' \ 'AND probe_name LIKE %s ' \ 'AND platform LIKE %s ' \ 'AND relation_to_gene LIKE %s ' \ 'AND relation_to_island LIKE %s ' \ 'LIMIT %s'.format(table_name=self.get_table_name() ) # Fills in '' for fields that were not specified in the parameters input = defaultdict(lambda: '', parameters) # Format the keyword for MySQL string matching query_args = ['%' + input['gene_name'] + '%', '%' + input['probe_name'] + '%', '%' + input['platform'] + '%', '%' + input['relation_to_gene'] + '%', '%' + input['relation_to_island'] + '%', FOUND_FEATURE_LIMIT] try: db = sql_connection() cursor = db.cursor(DictCursor) cursor.execute(query, tuple(query_args)) items = [] for row in cursor.fetchall(): items.append(row) # Generate human readable labels for item in items: item['feature_type'] = METH_FEATURE_TYPE item['label'] = self.build_feature_label(item) return items except MySQLError: raise BackendException('database error')
def get_cohort_info(cls, cohort_id_array): # Generate the 'IN' statement string: (%s, %s, ..., %s) cohort_id_stmt = ', '.join(['%s' for x in xrange(len(cohort_id_array))]) query_template = ("SELECT ti.id AS cohort_id, ti.name, COUNT(ts.sample_barcode) AS size " "FROM {cohort_info_table} ti " " LEFT JOIN {cohort_samples_table} ts ON ts.cohort_id = ti.id " "WHERE ti.id IN ({cohort_id_stmt}) " "GROUP BY ti.id, ti.name") query = query_template.format( cohort_info_table=DJANGO_COHORT_INFO_TABLE, cohort_samples_table=DJANGO_COHORT_SAMPLES_TABLE, cohort_id_stmt=cohort_id_stmt) try: db = sql_connection() cursor = db.cursor(DictCursor) cursor.execute(query, tuple(cohort_id_array)) result = [] for row in cursor.fetchall(): result.append({ 'id': row['cohort_id'], 'name': row['name'], 'size': row['size'] }) cursor.close() db.close() return result except Exception as e: print >> sys.stdout, "[ERROR] In get_cohort_info: " print >> sys.stdout, e print >> sys.stdout, traceback.format_exc() raise CohortException('get_cohort_info CloudSQL error, cohort IDs {cohort_ids}: {message}'.format( cohort_ids=cohort_id_array, message=str(e.message)))
def search(self, parameters): self.validate_feature_search_input(parameters) query = 'SELECT gene_name, platform, center, value_label, internal_feature_id' \ ' FROM {table_name}' \ ' WHERE gene_name LIKE %s'\ ' AND platform LIKE %s' \ ' AND center LIKE %s'\ ' LIMIT %s'.format(table_name=self.get_table_name() ) logging.debug("CLOUDSQL_QUERY_GEXP_SEARCH: {}".format(query)) # Fills in '' for fields that were not specified in the parameters input = defaultdict(lambda: '', parameters) # Format the keyword for MySQL string matching # sql_keyword = '%' + keyword + '%' query_args = ['%' + input['gene_name'] + '%', '%' + input['platform'] + '%', '%' + input['center'] + '%', FOUND_FEATURE_LIMIT] try: db = sql_connection() cursor = db.cursor(DictCursor) cursor.execute(query, tuple(query_args)) items = [] for row in cursor.fetchall(): items.append(row) # Generate human readable labels for item in items: item['feature_type'] = GEXP_FEATURE_TYPE item['label'] = build_feature_label(item['gene_name'], item) return items except MySQLError as mse: raise BackendException("MySQLError: {}".format(str(mse)))
def preview(self, request): """ Takes a JSON object of filters in the request body and returns a "preview" of the cohort that would result from passing a similar request to the cohort **save** endpoint. This preview consists of two lists: the lists of participant (aka patient) barcodes, and the list of sample barcodes. Authentication is not required. """ patient_cursor = None sample_cursor = None if are_there_bad_keys(request) or are_there_no_acceptable_keys(request): err_msg = construct_parameter_error_message(request, True) raise endpoints.BadRequestException(err_msg) query_dict, gte_query_dict, lte_query_dict = CohortsCreatePreviewQueryBuilder().build_query_dictionaries( request ) patient_query_str, sample_query_str, value_tuple = CohortsCreatePreviewQueryBuilder().build_query( query_dict, gte_query_dict, lte_query_dict ) patient_barcodes = [] sample_barcodes = [] try: db = sql_connection() patient_cursor = db.cursor(MySQLdb.cursors.DictCursor) patient_cursor.execute(patient_query_str, value_tuple) for row in patient_cursor.fetchall(): patient_barcodes.append(row["case_barcode"]) sample_cursor = db.cursor(MySQLdb.cursors.DictCursor) sample_cursor.execute(sample_query_str, value_tuple) for row in sample_cursor.fetchall(): sample_barcodes.append(row["sample_barcode"]) except (IndexError, TypeError), e: logger.warn(e) raise endpoints.NotFoundException("Error retrieving samples or patients: {}".format(e))
def googlegenomics(self, request): """ Takes a sample barcode as a required parameter and returns the Google Genomics dataset id and readgroupset id associated with the sample, if any. """ cursor = None db = None sample_barcode = request.get_assigned_value('sample_barcode') query_str = 'SELECT SampleBarcode, GG_dataset_id, GG_readgroupset_id ' \ 'FROM metadata_data ' \ 'WHERE SampleBarcode=%s ' \ 'AND GG_dataset_id !="" AND GG_readgroupset_id !="" ' \ 'GROUP BY SampleBarcode, GG_dataset_id, GG_readgroupset_id;' query_tuple = (sample_barcode,) try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute(query_str, query_tuple) google_genomics_items = [ GoogleGenomics( SampleBarcode=row['SampleBarcode'], GG_dataset_id=row['GG_dataset_id'], GG_readgroupset_id=row['GG_readgroupset_id'] ) for row in cursor.fetchall() ] return GoogleGenomicsList(items=google_genomics_items, count=len(google_genomics_items)) except (IndexError, TypeError), e: logger.warn(e) raise endpoints.NotFoundException( "Google Genomics dataset and readgroupset id's for sample {} not found." .format(sample_barcode))
def from_feature_id(cls, feature_id, study_id=None): logging.debug("UserFeatureDef.from_feature_id: {0}".format(str([feature_id, study_id]))) if feature_id is None: raise FeatureNotFoundException(feature_id) try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute( """ SELECT bq_map_id, study_id, is_numeric FROM projects_user_feature_definitions WHERE shared_map_id = %s """, (feature_id,), ) results = [] for row in cursor.fetchall(): bq_table, column_name, symbol = cls.get_table_and_field(row["bq_map_id"]) filters = None if symbol is not None: filters = {"Symbol": symbol} results.append(cls(bq_table, column_name, row["study_id"], row["is_numeric"] == 1, filters)) cursor.close() db.close() return results except Exception as e: if db: db.close() if cursor: cursor.close() raise e
def fetch_isbcgc_project_set(): try: cursor = None db = sql_connection() if not ISB_CGC_PROJECTS['list'] or len(ISB_CGC_PROJECTS['list']) <= 0: cursor = db.cursor() cursor.execute("SELECT COUNT(SPECIFIC_NAME) FROM INFORMATION_SCHEMA.ROUTINES WHERE SPECIFIC_NAME = 'get_isbcgc_project_set';") # Only try to fetch the study set if the sproc exists if cursor.fetchall()[0][0] > 0: cursor.execute("CALL get_isbcgc_project_set();") ISB_CGC_PROJECTS['list'] = [] for row in cursor.fetchall(): ISB_CGC_PROJECTS['list'].append(row[0]) else: # Otherwise just warn logger.warn("[WARNING] Stored procedure get_isbcgc_project_set was not found!") return ISB_CGC_PROJECTS['list'] except Exception as e: logger.error(e) logger.error(traceback.format_exc()) finally: if cursor: cursor.close() if db and db.open: db.close()
Cohort_Perms.objects.get(cohort_id=cohort_id, user_id=user_id) except (ObjectDoesNotExist, MultipleObjectsReturned), e: logger.warn(e) err_msg = "Error retrieving cohort {} for user {}: {}".format(cohort_id, user_email, e) if 'Cohort_Perms' in e.message: err_msg = "User {} does not have permissions on cohort {}. " \ "Error: {}".format(user_email, cohort_id, e) raise endpoints.UnauthorizedException(err_msg) finally: request_finished.send(self) query_str, query_tuple = CohortsSamplesFilesQueryBuilder().build_query( platform=platform, pipeline=pipeline, limit=limit, cohort_id=cohort_id) try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute(query_str, query_tuple) cursor_rows = cursor.fetchall() bad_repo_count, bad_repo_set = CohortsSamplesFilesMessageBuilder().get_GCS_file_paths_and_bad_repos(cursor_rows) cloud_storage_path_list = [row['cloud_storage_path'] for row in cursor_rows] if bad_repo_count > 0: logger.warn("not returning {count} row(s) in sample_details due to repositories: {bad_repo_list}" .format(count=bad_repo_count, bad_repo_list=list(bad_repo_set))) return GCSFilePathList(cloud_storage_file_paths=cloud_storage_path_list, count=len(cloud_storage_path_list)) except (IndexError, TypeError), e: logger.warn(e) raise endpoints.NotFoundException("File paths for cohort {} not found.".format(cohort_id)) except MySQLdb.ProgrammingError as e: logger.warn("Error retrieving file paths. {}".format(e))
def seqpeek_view_data(self, request): try: hugo_symbol = request.hugo_symbol cohort_id_array = request.cohort_id gnab_feature_id = self.build_gnab_feature_id(hugo_symbol) logging.debug("GNAB feature ID for SeqPeke: {0}".format(gnab_feature_id)) # Lifted from api/data_access.py line 509+ # Get the study IDs these cohorts' samples come from cohort_vals = () cohort_params = "" for cohort in cohort_id_array: cohort_params += "%s," cohort_vals += (cohort,) cohort_params = cohort_params[:-1] db = sql_connection() cursor = db.cursor() tcga_studies = fetch_isbcgc_project_set() cursor.execute("SELECT DISTINCT study_id FROM cohorts_samples WHERE cohort_id IN (" + cohort_params + ");", cohort_vals) # Only samples whose source studies are TCGA studies, or extended from them, should be used confirmed_study_ids = [] unconfirmed_study_ids = [] for row in cursor.fetchall(): if row[0] in tcga_studies: if row[0] not in confirmed_study_ids: confirmed_study_ids.append(row[0]) elif row[0] not in unconfirmed_study_ids: unconfirmed_study_ids.append(row[0]) if len(unconfirmed_study_ids) > 0: studies = Study.objects.filter(id__in=unconfirmed_study_ids) for study in studies: if study.get_my_root_and_depth()['root'] in tcga_studies: confirmed_study_ids.append(study.id) async_params = [ProviderClassQueryDescription(SeqPeekDataProvider, gnab_feature_id, cohort_id_array, confirmed_study_ids)] maf_data_result = get_feature_vectors_tcga_only(async_params, skip_formatting_for_plot=True) maf_data_vector = maf_data_result[gnab_feature_id]['data'] if len(maf_data_vector) > 0: # Since the gene (hugo_symbol) parameter is part of the GNAB feature ID, # it will be sanity-checked in the SeqPeekMAFDataAccess instance. seqpeek_data = SeqPeekMAFDataFormatter().format_maf_vector_for_view(maf_data_vector, cohort_id_array) seqpeek_maf_vector = seqpeek_data.maf_vector seqpeek_cohort_info = seqpeek_data.cohort_info removed_row_statistics_dict = seqpeek_data.removed_row_statistics seqpeek_view_data = SeqPeekViewDataBuilder().build_view_data(hugo_symbol, seqpeek_maf_vector, seqpeek_cohort_info, cohort_id_array, removed_row_statistics_dict) response = self.create_response(seqpeek_view_data) return response else: # No data found return SeqPeekViewRecord(plot_data=SeqPeekViewPlotDataRecord(tracks=[], protein=None, regions=[]), hugo_symbol=hugo_symbol, cohort_id_list=[str(i) for i in cohort_id_array], removed_row_statistics=[]) except Exception as e: logging.exception(e) raise InternalServerErrorException()
def get_feature_vector(feature_id, cohort_id_array): include_tcga = False user_studies = () for cohort_id in cohort_id_array: try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute("SELECT project_id FROM cohorts_samples WHERE cohort_id = %s GROUP BY project_id", (cohort_id,)) for row in cursor.fetchall(): if row['project_id'] is None: include_tcga = True else: user_studies += (row['project_id'],) except Exception as e: if db: db.close() if cursor: cursor.close() raise e # ex: feature_id 'CLIN:Disease_Code' user_feature_id = None if feature_id.startswith('USER:'******'t include TCGA include_tcga = False items = [] type = None result = [] cohort_settings = settings.GET_BQ_COHORT_SETTINGS() if include_tcga: provider = FeatureProviderFactory.from_feature_id(feature_id) result = provider.get_data(cohort_id_array, cohort_settings.dataset_id, cohort_settings.table_id) # ex: result[0] # {'aliquot_id': None, 'case_id': u'TCGA-BH-A0B1', 'sample_id': u'TCGA-BH-A0B1-10A', 'value': u'BRCA'} for data_point in result: data_item = {key: data_point[key] for key in ['case_id', 'sample_id', 'aliquot_id']} value = provider.process_data_point(data_point) # TODO refactor missing value logic if value is None: value = 'NA' data_item['value'] = value items.append(data_item) type = provider.get_value_type() if len(user_studies) > 0: # Query User Data user_provider = UserFeatureProvider(feature_id, user_feature_id=user_feature_id) user_result = user_provider.get_data(cohort_id_array, cohort_settings.dataset_id, cohort_settings.table_id) result.extend(user_result) for data_point in user_result: data_item = {key: data_point[key] for key in ['case_id', 'sample_id', 'aliquot_id']} value = provider.process_data_point(data_point) # TODO refactor missing value logic if value is None: value = 'NA' data_item['value'] = value items.append(data_item) if not type: type = user_provider.get_value_type() return type, items
def from_user_feature_id(cls, feature_id): logging.debug("UserFeatureDef.from_user_feature_id {0}".format(str([feature_id]))) # ID breakdown: Study ID:Feature ID # Example ID: USER:1:6 regex = re_compile( "^USER:"******"([0-9]+):" # Feature ID "([0-9]+)$" ) feature_fields = regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) study_id, user_feature_id = feature_fields[0] bq_id = None shared_id = None is_numeric = False try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute( """ SELECT feature_name, bq_map_id, shared_map_id, is_numeric FROM projects_user_feature_definitions WHERE id = %s """, (user_feature_id,), ) for row in cursor.fetchall(): if row["shared_map_id"]: shared_id = row["shared_map_id"] bq_id = row["bq_map_id"] is_numeric = row["is_numeric"] == 1 cursor.close() db.close() except Exception as e: if db: db.close() if cursor: cursor.close() raise e if shared_id is not None: return cls.from_feature_id(bq_id, study_id) if bq_id is None: raise FeatureNotFoundException(feature_id) # Else we're querying a very specific feature from a specific study bq_table, column_name, symbol = cls.get_table_and_field(bq_id) if bq_table is None or column_name is None: raise FeatureNotFoundException(feature_id) logging.debug("{0} {1} {2}".format(bq_table, column_name, symbol)) filters = None if symbol is not None: filters = {"Symbol": symbol} return [cls(bq_table, column_name, study_id, is_numeric, filters)]
def data_access_for_plot(self, request): """ Used by the web application.""" try: x_id = request.x_id y_id = request.y_id c_id = request.c_id logTransform = json.loads(request.log_transform) cohort_id_array = request.cohort_id # Check that all requested feature identifiers are valid. Do not check for y_id if it is not # supplied in the request. feature_ids_to_check = [x_id] if c_id is not None: feature_ids_to_check.append(c_id) if y_id is not None: feature_ids_to_check.append(y_id) valid_features = self.get_feature_id_validity_for_array(feature_ids_to_check) for feature_id, is_valid in valid_features: logging.info((feature_id, is_valid)) if not is_valid: logging.error("Invalid internal feature ID '{}'".format(feature_id)) raise NotFoundException() # Get the project IDs these cohorts' samples come from cohort_vals = () cohort_params = "" for cohort in cohort_id_array: cohort_params += "%s," cohort_vals += (cohort,) cohort_params = cohort_params[:-1] db = sql_connection() cursor = db.cursor() tcga_studies = fetch_isbcgc_project_set() cursor.execute("SELECT DISTINCT project_id FROM cohorts_samples WHERE cohort_id IN ("+cohort_params+");",cohort_vals) # Only samples whose source studies are TCGA studies, or extended from them, should be used confirmed_study_ids = [] unconfirmed_study_ids = [] for row in cursor.fetchall(): if row[0] in tcga_studies: if row[0] not in confirmed_study_ids: confirmed_study_ids.append(row[0]) elif row[0] not in unconfirmed_study_ids: unconfirmed_study_ids.append(row[0]) if len(unconfirmed_study_ids) > 0: projects = Project.objects.filter(id__in=unconfirmed_study_ids) for project in projects: if project.get_my_root_and_depth()['root'] in tcga_studies: confirmed_study_ids.append(project.id) return self.get_merged_feature_vectors(x_id, y_id, c_id, cohort_id_array, logTransform, confirmed_study_ids) except NotFoundException as nfe: # Pass through NotFoundException so that it is not handled as Exception below. raise nfe except Exception as e: logger.exception(e) raise InternalServerErrorException()
def annotations(self, request): """ Returns TCGA annotations about a specific aliquot, Takes a aliquot barcode (of length 28 *eg* TCGA-01-0628-11A-01D-0358-06) as a required parameter. User does not need to be authenticated. """ cursor = None db = None aliquot_barcode = request.get_assigned_value('aliquot_barcode') query_tuple = (str(aliquot_barcode),) # check to make sure aliquot_barcode is in correct form try: parts = aliquot_barcode.split('-') assert len(parts) == 7 assert len(parts[0]) == 4 assert len(parts[1]) == 2 assert len(parts[2]) == 4 assert len(parts[3]) == 3 assert len(parts[4]) in [2, 3] assert len(parts[5]) == 4 assert len(parts[6]) == 2 except AssertionError: raise endpoints.BadRequestException('{} is not the correct format for a aliquot barcode. ' 'Aliquot barcodes must be of the form XXXX-XX-XXXX-XXX-XXX-XXXX-XX ' 'or XXXX-XX-XXXX-XXX-XX-XXXX-XX.'.format(aliquot_barcode)) item_type_name = request.get_assigned_value('item_type_name') # check to make sure each item_type_name is valid if len(item_type_name) > 0: for itm in item_type_name: itm = itm.strip() if itm.lower() not in ['patient', 'aliquot', 'analyte', 'shipped portion', 'portion', 'slide', 'sample']: raise endpoints.BadRequestException("'{}' is not a valid entry for item_type_name. " "Valid entries include 'Patient', 'Aliquot', 'Analyte', 'Shipped Portion', " "'Portion', 'Slide', and 'Sample'".format(itm)) query_tuple += (itm,) query_str = AliquotsAnnotationsQueryBuilder().build_query(item_type_name=item_type_name) metadata_samples_query_str = AliquotsAnnotationsQueryBuilder().build_metadata_samples_query() try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) # build annotation message cursor.execute(query_str, query_tuple) rows = cursor.fetchall() cursor.execute(metadata_samples_query_str, (str(aliquot_barcode),)) metadata_sample_rows = cursor.fetchall() if len(rows) == 0: cursor.close() db.close() if len(metadata_sample_rows) == 0: msg = "Aliquot barcode {} not found in the database.".format(aliquot_barcode) logger.info(msg) else: msg = "No annotations found for aliquot barcode {}".format(aliquot_barcode) if item_type_name is not None: msg += " and item type name {}. Item type name must be one of the following: " \ "'Patient', 'Aliquot', 'Analyte', 'Shipped Portion', 'Portion', 'Slide', 'Sample'.".format(item_type_name) logger.info(msg) raise endpoints.NotFoundException(msg) items = [] for row in rows: constructor_dict = build_constructor_dict_for_message(MetadataAnnotationItem(), row) items.append(MetadataAnnotationItem(**constructor_dict)) return MetadataAnnotationList(items=items, count=len(items)) except (IndexError, TypeError), e: logger.info("Aliquot {} not found. Error: {}".format(aliquot_barcode, e)) raise endpoints.NotFoundException("Aliquot {} not found.".format(aliquot_barcode))
def get(self, request): """ Given a sample barcode (of length 16, *eg* TCGA-B9-7268-01A), this endpoint returns all available "biospecimen" information about this sample, the associated patient barcode, a list of associated aliquots, and a list of "data_details" blocks describing each of the data files associated with this sample """ cursor = None db = None sample_barcode = request.get_assigned_value('sample_barcode') pipeline = request.get_assigned_value('pipeline') platform = request.get_assigned_value('platform') aliquot_query_str = SamplesGetQueryBuilder().build_aliquot_query(platform=platform, pipeline=pipeline) biospecimen_query_str = SamplesGetQueryBuilder().build_biospecimen_query() data_query_str = SamplesGetQueryBuilder().build_data_query(platform=platform, pipeline=pipeline) patient_query_str = SamplesGetQueryBuilder().build_patient_query() query_tuple = (str(sample_barcode),) extra_query_tuple = query_tuple if pipeline is not None: extra_query_tuple += (pipeline,) if platform is not None: extra_query_tuple += (platform,) try: db = sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) # build biospecimen data message cursor.execute(biospecimen_query_str, query_tuple) row = cursor.fetchone() if row is None: cursor.close() db.close() error_message = "Sample barcode {} not found in metadata_biospecimen table.".format(sample_barcode) raise endpoints.NotFoundException(error_message) constructor_dict = build_constructor_dict_for_message(MetadataItem(), row) biospecimen_data_item = MetadataItem(**constructor_dict) # get list of aliquots cursor.execute(aliquot_query_str, extra_query_tuple) aliquot_list = [row['AliquotBarcode'] for row in cursor.fetchall()] # get patient barcode (superfluous?) cursor.execute(patient_query_str, query_tuple) row = cursor.fetchone() patient_barcode = str(row["case_barcode"]) # prepare to build list of data details messages cursor.execute(data_query_str, extra_query_tuple) cursor_rows = cursor.fetchall() # update every dictionary in cursor_rows to contain the full cloud_storage_path for each sample bad_repo_count, bad_repo_set = \ CohortsSamplesFilesMessageBuilder().get_GCS_file_paths_and_bad_repos(cursor_rows) if bad_repo_count > 0: logger.warn("not returning {count} row(s) in sample_details due to repositories: {bad_repo_list}" .format(count=bad_repo_count, bad_repo_list=list(bad_repo_set))) # build a data details message for each row returned from metadata_data table data_details_list = [] for row in cursor_rows: constructor_dict = build_constructor_dict_for_message(DataDetails(), row) data_details_item = DataDetails(**constructor_dict) data_details_list.append(data_details_item) if bad_repo_count > 0: logger.warn("not returning {count} row(s) in sample_details due to repositories: {bad_repo_list}" .format(count=bad_repo_count, bad_repo_list=list(bad_repo_set))) return SampleDetails(aliquots=aliquot_list, biospecimen_data=biospecimen_data_item, data_details=data_details_list, data_details_count=len(data_details_list), patient=patient_barcode) except (IndexError, TypeError) as e: logger.info("Sample details for barcode {} not found. Error: {}".format(sample_barcode, e)) raise endpoints.NotFoundException( "Sample details for barcode {} not found.".format(sample_barcode)) except MySQLdb.ProgrammingError as e: logger.warn(e) raise endpoints.BadRequestException("Error retrieving biospecimen, patient, or other data. {}".format(e)) finally: if cursor: cursor.close() if db and db.open: db.close()