def transform_ref_seq(self, to_type): ''' Transform the reference sequence to target type :param to_type: type of reference sequence :return: ''' if self.ref_type == to_type: return self.ref_seq ''' All transformation: myvariant''' # MyVariantInfo(url='http://myvariant.info/v1') mv_res = self.get_myvariant_res() if mv_res: res = MyVariantUtil.extract(mv_res, to_type) if res: return res ''' Fall back : EnsemblTranscriptDict ''' if to_type == GeneVariant.REF_TYPE_ENST or self.ref_type == GeneVariant.REF_TYPE_ENST: ''' If ENST transformation, first try DB:EnsemblTranscriptDict ''' cancer_db = config.connect_db(config.CANCER_DB_KEY) from_ENST = self.ref_type == GeneVariant.REF_TYPE_ENST query = "SELECT Ref_Seq FROM EnsemblTranscriptDict WHERE ENST_ID = %s" \ if from_ENST else "SELECT ENST_ID FROM EnsemblTranscriptDict WHERE Ref_Seq = %s" result_key = 'Ref_Seq' if from_ENST else 'ENST_ID' params = (self.ref_seq, ) cursor = cancer_db.cursor() cursor.execute(query, params) res = cursor.fetchone() if res: to_ref_seq = res.get(result_key, None) if to_ref_seq and GeneReference.get_ref_type( to_ref_seq) == to_type: return to_ref_seq return None
def search_variant_db(variant, only_count=False, start=0, limit=0): res = None if isinstance(variant, GeneVariant): cancer_db = config.connect_db(config.CANCER_DB_KEY) cursor = cancer_db.cursor() # try myvariant directly mv_res = variant.get_myvariant_res() if mv_res: cosmic_id = MyVariantUtil.extract(mv_res, 'cosmic.cosmic_id') if cosmic_id: base_query = "SELECT " + ("COUNT(*) count" if only_count else "*") \ + " FROM COSMICMutantExport WHERE ID_Mutation=%s" base_params = (cosmic_id,) query, params = util_query.add_limit(base_query, base_params, start, limit) cursor.execute(query, params) res = cursor.fetchall() if not res or (only_count and res[0]['count'] == 0): # try chromosome position search chr_pos = variant.transform_variant(GeneVariant.TRANSFORM_CHR_POS) mutation_genome_pos = str(map_chr_to_num(chr_pos[0])) + ':' + chr_pos[1] base_query = "SELECT " + ("COUNT(*) count" if only_count else "*") \ + " FROM COSMICMutantExport WHERE Mutation_Genome_Position LIKE %s" base_params = (mutation_genome_pos + '%',) query, params = util_query.add_limit(base_query, base_params, start, limit) cursor.execute(query, params) res = cursor.fetchall() if not res or (only_count and res[0]['count'] == 0): var_infos = [variant.transform_variant(variant.ref_type, variant.INFO_TYPE_C)[1], map_aa_3to1(variant.transform_variant(variant.ref_type, variant.INFO_TYPE_P)[1])] info_attrs = ['Mutation_CDS', 'Mutation_AA'] ref_seqs = [variant.transform_ref_seq(GeneVariant.REF_TYPE_ENST), variant.transform_ref_seq(GeneVariant.REF_TYPE_NM)] found = False for i in range(0, 2): if found: break var_info = var_infos[i] info_attr = info_attrs[i] base_query = "SELECT " + ("COUNT(*) count" if only_count else "*") \ + " FROM COSMICMutantExport WHERE Accession_Number=%s AND " + info_attr + " LIKE %s" for ref_seq in ref_seqs: if ref_seq: base_params = (ref_seq, var_info,) query, params = util_query.add_limit(base_query, base_params, start, limit) cursor.execute(query, params) res = cursor.fetchall() if (not only_count and res) or (only_count and res[0]['count'] > 0): found = True break if res: return res if not only_count else res[0]['count'] return [] if not only_count else 0
def search_variant_db(variant): if isinstance(variant, GeneVariant): chr_pos_ref_alt = variant.transform_variant( GeneVariant.TRANSFORM_CHR_POS_REF_ALT) if chr_pos_ref_alt: gnomad_id = '-'.join(chr_pos_ref_alt) chr_num = map_chr_to_str(chr_pos_ref_alt[0]) table_name = 'gnomad_variants_info_sites_' + chr_num query = "SELECT * FROM " + table_name + " WHERE Variant_ID=%s " params = (gnomad_id, ) cancer_db = config.connect_db(config.CANCER_DB_KEY) cursor = cancer_db.cursor() cursor.execute(query, params) res = cursor.fetchone() return res return None
def search_gene_db(reference, only_count=False, start=0, limit=0): res = None if isinstance(reference, GeneReference): gene = reference.ref_seq if reference.ref_type != GeneVariant.REF_TYPE_GENE: gene = reference.transform_ref_seq(GeneVariant.REF_TYPE_GENE) if gene: cancer_db = config.connect_db(config.CANCER_DB_KEY) cursor = cancer_db.cursor() base_query = "SELECT " + ("COUNT(*) count" if only_count else "*") \ + " FROM COSMICMutantExport WHERE Gene_Name LIKE %s" base_params = (gene,) query, params = util_query.add_limit(base_query, base_params, start, limit) cursor.execute(query, params) res = cursor.fetchall() if res: return res if not only_count else res[0]['count'] return [] if not only_count else 0
def search_transcript_db(reference, only_count=False, start=0, limit=0): res = None if isinstance(reference, GeneReference): base_query = "SELECT " + ("COUNT(*) count" if only_count else "*") \ + " FROM COSMICMutantExport WHERE Accession_Number LIKE %s" cancer_db = config.connect_db(config.CANCER_DB_KEY) cursor = cancer_db.cursor() ref_seq1 = reference.transform_ref_seq(GeneVariant.REF_TYPE_ENST) ref_seq2 = reference.transform_ref_seq(GeneVariant.REF_TYPE_NM) for ref_seq in (ref_seq1, ref_seq2): if ref_seq: base_params = (ref_seq,) query, params = util_query.add_limit(base_query, base_params, start, limit) cursor.execute(query, params) res = cursor.fetchall() if res: break if res: return res if not only_count else res[0]['count'] return [] if not only_count else 0