def do_query(project_id, project_name, dataset_name, table_name, gene_symbol, value_field, cohort_dataset, cohort_table, cohort_id_array): bigquery_service = authorize_credentials_with_Google() query = build_query(project_name, dataset_name, table_name, gene_symbol, value_field, cohort_dataset, cohort_table, cohort_id_array) query_body = { 'query': query } table_data = bigquery_service.jobs() query_response = table_data.query(projectId=project_id, body=query_body).execute() result = [] num_result_rows = int(query_response['totalRows']) if num_result_rows == 0: return result for row in query_response['rows']: result.append({ 'patient_id': row['f'][0]['v'], 'sample_id': row['f'][1]['v'], 'aliquot_id': row['f'][2]['v'], 'value': float(row['f'][3]['v']) }) return result
def _streaming_insert(self, rows): bigquery_service = authorize_credentials_with_Google() table_data = bigquery_service.tabledata() body = self._build_request_body_from_rows(rows) response = table_data.insertAll(projectId=self.project_id, datasetId=self.dataset_id, tableId=self.table_id, body=body).execute() return response
def get_feature_vectors_tcga_only(params_array, poll_retry_limit=20, skip_formatting_for_plot=False): bigquery_service = authorize_credentials_with_Google() provider_array = [] cohort_settings = settings.GET_BQ_COHORT_SETTINGS() # Submit jobs for parameter_object in params_array: job_item = submit_tcga_job(parameter_object, bigquery_service, cohort_settings) provider_array.append(job_item) project_id = settings.BQ_PROJECT_ID result = get_submitted_job_results(provider_array, project_id, poll_retry_limit, skip_formatting_for_plot) return result
def do_query( project_id, project_name, dataset_name, table_name, gene_label, value_field, cohort_dataset, cohort_table, cohort_id_array, ): bigquery_service = authorize_credentials_with_Google() query = build_query( project_name, dataset_name, table_name, gene_label, value_field, cohort_dataset, cohort_table, cohort_id_array ) query_body = {"query": query} table_data = bigquery_service.jobs() query_response = table_data.query(projectId=project_id, body=query_body).execute() result = [] num_result_rows = int(query_response["totalRows"]) if num_result_rows == 0: return result for row in query_response["rows"]: result.append( { "patient_id": row["f"][0]["v"], "sample_id": row["f"][1]["v"], "aliquot_id": row["f"][2]["v"], "value": row["f"][5]["v"], } ) result.append( { "patient_id": row["f"][0]["v"], "sample_id": row["f"][3]["v"], "aliquot_id": row["f"][4]["v"], "value": row["f"][5]["v"], } ) return result
def precomputed_results(self, request): """ Used by the web application.""" bq_table = "brca_pwpv" query = ( "SELECT A_valueType, A_chr, A_startPos, A_endPos, A_featureName, A_N, A_dataType," "B_valueType, B_chr, B_startPos, B_endPos, B_featureName, B_N, B_dataType," "logP FROM [isb-cgc:test.brca_pwpv] " 'where B_chr != "null" ' 'and A_chr != "null"' 'and A_startPos != "null" and A_endPos != "null"' 'and B_startPos != "null" and B_endPos != "null"' "LIMIT 50;" ) query_body = {"query": query} bigquery_service = authorize_credentials_with_Google() table_data = bigquery_service.jobs() query_response = table_data.query(projectId=settings.BQ_PROJECT_ID, body=query_body).execute() association_list = [] feature_list = [] for row in query_response["rows"]: node1 = Feature( annotated_type=row["f"][0]["v"].encode("utf-8") if row["f"][0]["v"] else None, chr=row["f"][1]["v"].encode("utf-8").replace("chr", "") if row["f"][1]["v"] else None, start=int(row["f"][2]["v"]) if row["f"][2]["v"] else None, end=int(row["f"][3]["v"]) if row["f"][3]["v"] else None, label=row["f"][4]["v"].encode("utf-8") if row["f"][4]["v"] else "", mutation_count=int(row["f"][5]["v"]) if row["f"][5]["v"] else None, source=row["f"][6]["v"].encode("utf-8") if row["f"][6]["v"] else None, ) node2 = Feature( annotated_type=row["f"][7]["v"].encode("utf-8") if row["f"][7]["v"] else None, chr=row["f"][8]["v"].encode("utf-8").replace("chr", "") if row["f"][8]["v"] else None, start=int(row["f"][9]["v"]) if row["f"][9]["v"] else None, end=int(row["f"][10]["v"]) if row["f"][10]["v"] else None, label=row["f"][11]["v"].encode("utf-8") if row["f"][11]["v"] else "", mutation_count=int(row["f"][12]["v"]) if row["f"][12]["v"] else None, source=row["f"][13]["v"].encode("utf-8") if row["f"][13]["v"] else None, ) logP = float(row["f"][14]["v"]) association_list.append(Association(node1=node1, node2=node2, logged_pvalue=logP)) feature_list.append(node1) feature_list.append(node2) return CircvizOutput(items=association_list)
def submit_jobs_with_user_data(params_array): bigquery_service = authorize_credentials_with_Google() provider_array = [] cohort_settings = settings.GET_BQ_COHORT_SETTINGS() # Submit jobs for parameter_object in params_array: feature_id = parameter_object.feature_id cohort_id_array = parameter_object.cohort_id_array user_data = user_feature_handler(feature_id, cohort_id_array) if user_data['include_tcga']: job_item = submit_tcga_job(parameter_object, bigquery_service, cohort_settings) provider_array.append(job_item) if len(user_data['user_studies']) > 0: converted_feature_id = user_data['converted_feature_id'] user_feature_id = user_data['user_feature_id'] logging.debug("user_feature_id: {0}".format(user_feature_id)) provider = UserFeatureProvider(converted_feature_id, user_feature_id=user_feature_id) # The UserFeatureProvider instance might not generate a BigQuery query and job at all given the combination # of cohort(s) and feature identifiers. The provider is not added to the array, and therefore to the # polling loop below, if it would not submit a BigQuery job. if provider.is_queryable(cohort_id_array): job_reference = provider.get_data_job_reference(cohort_id_array, cohort_settings.dataset_id, cohort_settings.table_id) logging.info("Submitted USER {job_id}: {fid} - {cohorts}".format(job_id=job_reference['jobId'], fid=feature_id, cohorts=str(cohort_id_array))) provider_array.append({ 'feature_id': feature_id, 'provider': provider, 'ready': False, 'job_reference': job_reference }) else: logging.debug("No UserFeatureDefs for '{0}'".format(converted_feature_id)) return provider_array
def get_bq_service(self): if self.bigquery_service is None: self.bigquery_service = authorize_credentials_with_Google() return self.bigquery_service