def check_login(form_data): db = DB() user = unicode(form_data['user']) result = db.execute(get_user_cmd.format(user.encode('utf-8')), get_all=False) if result: passwd = form_data['password'] password, is_active = db.execute(get_passwd_cmd.format(user.encode('utf-8')), get_all=False) if password == passwd and is_active == 'Y': return True, 'ok' elif password != passwd: return False, 'password error' return False, 'user not active' return False, 'user not found'
def get_locus_result(genename, blast_results): cds_seq_dict = blast_results.get('cds_seq', 'NA') pro_seq_dict = blast_results.get('pro_seq', 'NA') db = DB() locus_result = {} cmd = """select l.*, f.BLAST_Hit_Accession, f.Description, f.Pfam_ID, f.Interpro_ID, f.GO_ID from locus l left join func f on l.GENE_ID=f.GENE_ID where l.GENE_ID='{0}'; """.format(genename) result = db.execute(cmd, get_all=False) if result: gene_id, chr, pos_start, pos_end = result[1:5] blast_hit, description, pfam_id, interpro_id, go_id = result[5:] locus_result['gene_identification'] = { 'Gene Product Name': description, 'Locus Name': genename } locus_result['gene_attributes'] = { 'Chromosome': chr, "Gene Postion": '{start} - {end}'.format(start=pos_start, end=pos_end) } header = [ 'Accession', 'Description', 'Pfam_ID', 'Interpro_ID', 'GO_ID' ] locus_result['gene_annotation'] = {} locus_result['gene_annotation']['header'] = header locus_result['gene_annotation']['body'] = [ blast_hit, description, pfam_id, interpro_id, go_id ] locus_result['gene_cds_seq'] = cds_seq_dict locus_result['gene_pro_seq'] = pro_seq_dict return locus_result
def run_blast_result(genename): # before run blast rm last search results anyway if os.listdir(BLAST_OUT_PATH): rm_cmd = 'rm {}'.format(os.path.join(BLAST_OUT_PATH, '*')) subprocess.call(rm_cmd, shell=True) # get search trans genes db = DB() results = db.execute( "select GENE_TRANS from geneTrans_map where GENE='{gene}'".format( gene=genename)) results = [result[0] for result in results] if results: blast_cmd = "blastdbcmd -entry {genename} -db '{db}' -line_length 100 -out {out}" for gene_trans in results: run_cds_cmd = blast_cmd.format( genename=gene_trans, db=os.path.join(BASE_DB_DIR, BLAST_CDS_DB), out=os.path.join(BLAST_OUT_PATH, 'gene.cds.' + gene_trans)) run_protein_cmd = blast_cmd.format( genename=gene_trans, db=os.path.join(BASE_DB_DIR, BLAST_PROTEIN_DB), out=os.path.join(BLAST_OUT_PATH, 'gene.protein.' + gene_trans)) subprocess.call(run_cds_cmd, shell=True) subprocess.call(run_protein_cmd, shell=True) blast_results = get_blast_result(results) return blast_results return {}
def run_snp_variations(group_info, user): group_name = group_info.keys() groupA = group_info[group_name[0]] groupB = group_info[group_name[1]] create_group_info(groupA, groupB, filename='vs'.join(group_name)) cmd = "python {script} -i {input} -o {output} -g {group} -d {depth}".format( script=os.path.join(SNP_SCRIPT_DIR, SCRIPT_FILE), input=INPUT_TABLE, output=os.path.join(basedir, 'app', 'static', 'variation_results', 'vs'.join(group_name) + '_table'), group=os.path.join(SNP_SCRIPT_DIR, 'vs'.join(group_name)), depth='5') subprocess.call(cmd, shell=True) os.chdir(os.path.join(basedir, 'app', 'static', 'variation_results')) zip_cmd = 'zip {0} {1}'.format('vs'.join(group_name) + '_table.zip', 'vs'.join(group_name) + '_table') subprocess.call(zip_cmd, shell=True) ''' rm_cmd = 'rm -rf {0}'.format( 'vs'.join(group_name) + '_table' ) subprocess.call(rm_cmd, shell=True) ''' db = DB() results = db.execute( "select email from user where username='******'".format(user)) if results[0][0]: to = results[0][0] send_mail(to, 'Snp Variation Results', 'mail/variation_results', user=user, filename='vs'.join(group_name) + '_table') return 'done'
def fetch_blast_result(genename): MAX_ROW_LEN = 125 db = DB() command = "select GENE_ID,VAL from {table} where GENE_ID like '{gene}%'" pep_results = db.execute(command.format(table='pep_tb', gene=wildcard_gene(genename))) cds_results = db.execute(command.format(table='cds_tb', gene=wildcard_gene(genename))) if len(pep_results) == 0 and len(cds_results) == 0: return {} pro_seq = {k:v for k,v in pep_results} cds_seq = {k:v for k,v in cds_results} # print it pretty for k,v in pro_seq.items(): if len(v) > MAX_ROW_LEN: i = 0 over_len = math.ceil(len(v) / MAX_ROW_LEN) * MAX_ROW_LEN tmp_str = "" while i < over_len: tmp_str += v[i:i+MAX_ROW_LEN] + '\n' i += MAX_ROW_LEN pro_seq[k] = tmp_str for k,v in cds_seq.items(): if len(v) > MAX_ROW_LEN: i = 0 over_len = math.ceil(len(v) / MAX_ROW_LEN) * MAX_ROW_LEN tmp_str = "" while i < over_len: tmp_str += v[i:i+MAX_ROW_LEN] + '\n' i += MAX_ROW_LEN cds_seq[k] = tmp_str return {'pro_seq': pro_seq, 'cds_seq': cds_seq}
def batch_query_gene(genes, max_input=1000): ''' get a gene string by search locus database ''' if ',' in genes: gene_list = [] genes = [each.split(',') for each in genes.split()] for gene_part in genes: gene_list += gene_part else: gene_list = genes.split() if len(gene_list) > max_input: return [] if len(gene_list) == 1: _search = "('{0}')".format(gene_list[0]) else: _search = tuple([str(each) for each in gene_list]) db = DB() cmd = """select l.*, f.Description, f.Pfam_Description, f.Interpro_Description, f.GO_Description from locus l left join func f on l.GENE_ID=f.GENE_ID where l.GENE_ID in {0}; """.format(_search) result = db.execute(cmd) if result: result = [(each[1],) + each[5:] for each in result] df1 = pd.DataFrame(result) df2 = pd.DataFrame(gene_list) df3 = pd.merge(df2, df1, how='left').fillna("") return [list(df3.iloc[i,:]) for i in range(len(df3))] return []
def fetch_expression_data(gene_id, samples, table="iwgsc_refseq"): sample_str = ','.join(samples) cmd = "select {samples} from {table} where gene='{gene}'".format( samples=sample_str, table=table, gene=gene_id) db = DB() results = db.execute(cmd) if len(results) == 0: return [] return list(results[0])
def get_locus_result(genename, blast_results): cds_seq_dict = blast_results.get('cds_seq', 'NA') pro_seq_dict = blast_results.get('pro_seq', 'NA') db = DB() locus_result = {} cmd = """select l.*, f.Description, f.Pfam_Description, f.Interpro_Description, f.GO_Description from locus l left join func f on l.GENE_ID=f.GENE_ID where l.GENE_ID='{0}'; """.format(genename) result = db.execute(cmd, get_all=False) if result: locus_result['orthologous_gene'] = {} ortho_header = ['Arabidopsis_thaliana', 'Hordeum_vulgare', 'Oryza_sativa', 'Triticum_aestivum', 'Zea_mays'] locus_result['orthologous_gene']['header'] = ortho_header locus_result['orthologous_gene']['body'] = [] cmd = "select l.GENE_ID, o.* from locus l left join ortho o on l.GENE_ID=o.GENE_ID where l.GENE_ID='{0}';".format(genename) ortho_result = db.execute(cmd, get_all=False) if ortho_result: ortho_result_list = ortho_result[3:] ortho_result_list = [printPretty(each) for each in ortho_result_list if each is not None] locus_result['orthologous_gene']['body'] = ortho_result_list gene_id, chr, pos_start, pos_end = result[1:5] description, pfam_desc, interpro_desc, go_desc = result[5:] locus_result['gene_identification'] = {'Gene Product Name': description, 'Locus Name': genename} locus_result['gene_attributes'] = {'Chromosome': chr, "Gene Postion":'{start} - {end}'.format(start=pos_start, end=pos_end)} header = ['Description', 'Pfam_Description', 'Interpro_Description', 'GO_Description'] locus_result['gene_annotation'] = {} locus_result['gene_annotation']['header'] = header locus_result['gene_annotation']['body'] = [description, pfam_desc, interpro_desc, go_desc] # match 01G and 02G TraesCS1A02G000100 #result = db.execute("select * from tissue_expression where Gene_id='{0}'".format(genename)) result = db.execute("select * from tissue_expression where Gene_id like '{0}'".format(wildcard_gene(genename))) if result: row = [float(each) for each in result[0][2:]] else: row = [] locus_result['tissue_expression'] = row locus_result['gene_cds_seq'] = cds_seq_dict locus_result['gene_pro_seq'] = pro_seq_dict return locus_result
def get_expr_table(table, gene_ids, groupA, groupB, map_groupA, map_groupB): db = DB() select_columns = ['GENE_ID', 'CHR', 'POS_START', 'POS_END' ] + groupA + groupB select_columns_str = ','.join(select_columns) results = [] for gene in gene_ids: cmd = "select {columns} from {table} where GENE_ID='{gene_id}';".format( columns=select_columns_str, table=table, gene_id=gene) result = db.execute(cmd, get_all=False) if not result: return (gene, '') results.append(list(result)) return_select_columns = ['GENE_ID', 'CHR', 'POS_START', 'POS_END' ] + map_groupA + map_groupB return return_select_columns, results
def clean_db_cache(table): db = DB() db.execute("delete from {table};".format(table=table))
def fetch_sample(table, fixed_column_num): cmd = "select COLUMN_NAME from information_schema.COLUMNS where table_name='{table}';".format(table=table) db = DB() results = db.execute(cmd) results = [each[0] for each in results] return results[fixed_column_num:]