def create_user(email, password): with app.app_context(): db = get_db() with db.cursor() as cursor: sql = "INSERT INTO `users` (`email`, `password`) VALUES (%s, %s)" cursor.execute(sql, (email, generate_password_hash(password))) db.commit()
def create_cell_line(): from main import get_db rdb = get_db() new_cell_line = request.get_json() cell_lines = rdb.get('cell_lines') if cell_lines is not None: cell_lines = json.loads(cell_lines) df = pd.DataFrame(cell_lines) else: df = pd.DataFrame(columns=new_cell_line.keys()) # df['tubes_available'] = df['tubes_available'].fillna(0) # todo: later # # if name exists but id is different # if len(df.loc[df['Cell line'] == new_cell_line.get('Cell line')]) != 0: # existing = df.loc[df['Cell line'] == new_cell_line.get('Cell line')] # if new_cell_line.get('ID') # in case we added new fields for key in new_cell_line.keys(): if key not in df.columns: df[key] = '' # if exists - overwrite if len(df.loc[df['ID'] == new_cell_line.get('ID')]) != 0: df.loc[df['ID'] == new_cell_line.get('ID'), key] = new_cell_line[key] else: df = df.append(new_cell_line, ignore_index=True) try: rdb.set('cell_lines', json.dumps(df.to_dict('list'))) except Exception as e: return make_response({'status': 'error', 'error': str(e)}, 500) return make_response({'status': 'success'}, 200)
def change_password(self, new_password): from main import get_db rdb = get_db() self.password = sha256_crypt.hash(new_password) rdb.hmset('users', {self.email: self.password}) rdb.srem('not_activated_users', self.email) self.activated = True
def update_rack(): from main import get_db data = request.get_json() if 'status' not in data.keys(): data['status'] = 'pending' rdb = get_db() to_approve = rdb.get('to_approve') if to_approve is None: to_approve = pd.DataFrame(columns=[ 'tower', 'pos', 'Rack', 'x', 'y', 'Responsible person', 'Date', 'Comments', 'cell_line', 'prev_cell_line', 'prev_responsible', 'prev_comments', 'prev_date', 'status' ]) else: to_approve = json.loads(to_approve) to_approve = pd.DataFrame(to_approve) to_approve = to_approve.loc[to_approve['status'] == 'pending'] tower = data.get('tower') tower_data = rdb.get(tower) if tower_data is not None: tower_data = json.loads(tower_data) tower_data = pd.DataFrame(tower_data) pos = data.get('pos') if type(pos) == str: pos = [pos] # if there is already something on that positions, then ... to_overwrite = to_approve.loc[(to_approve['Rack'] == data.get('Rack')) & (to_approve['tower'] == data.get('tower')) & (to_approve['pos'].isin(pos))] # ... then drop it and ... if len(to_overwrite) != 0: to_approve = to_approve.drop(to_overwrite.index) # ... and add the new data for p in pos: cur_data = data cur_data['pos'] = p cur_data['y'] = p[0] cur_data['x'] = int(p[1:]) if tower_data is not None: current_pos_data = tower_data.loc[(tower_data['Rack'].astype(int) == int(data.get('Rack', 0))) & \ (tower_data['pos'] == p)] if len(current_pos_data) != 0: cur_data['prev_cell_line'] = current_pos_data.iloc[0]['ID'] cur_data['prev_responsible'] = current_pos_data.iloc[0][ 'Responsible person'] to_approve = to_approve.append(cur_data, ignore_index=True) # save to db try: rdb.set('to_approve', json.dumps(to_approve.to_dict('list'))) except Exception as e: return make_response({'status': 'error', 'error': str(e)}, 500) return make_response({'status': 'success'}, 200)
def get_reads_per_position(project_id): # this import has to be here from main import get_db rdb = get_db() key = "{}_reads_per_position".format(project_id) binary_data = rdb.get(key) if binary_data is None: return "NO DATA for project {}".format(project_id) df = pd.read_msgpack(binary_data) plot_names = [] plot_series = {} categories = {} gene_lengths = rdb.get('{}_rrna_genes'.format(project_id)) if gene_lengths is not None: gene_lengths = json.loads(gene_lengths) else: gene_lengths = { 'RNA18S5': 1869, 'RNA28S5': 5070, 'RNA5-8S5': 153, } genes = gene_lengths.keys() samples = df.get('sample').unique() for gene in genes: gene_length = int(gene_lengths.get(gene)) for sample in samples: current_df = df.loc[(df['gene'] == gene) & (df['sample'] == sample)] if current_df.empty: continue series_df = pd.DataFrame(columns=['x', 'y', 'reads_info'], index=range(1, gene_length + 1)) series_df['x'] = range(1, gene_length + 1) series_df['y'] = 0 series_df['reads_info'] = '' plot_name = "{}_{}".format(gene, sample) plot_names.append(plot_name) categories[plot_name] = list(range(1, gene_length + 1)) for row_id, row in current_df.iterrows(): position = row['start'] series_df.loc[position, 'x'] = position series_df.loc[position, 'y'] = row['counts'] series_df.loc[position, 'reads_info'] = row['reads_info'].replace( ',', '<br> • ').replace('. ', '<br> • ') plot_series[plot_name] = { 'name': plot_name, 'data': series_df.to_dict('records') } return render_template("reads_per_position.html", plot_names=plot_names, plot_series=plot_series, categories=categories, project_id=project_id)
def write_to_db(temperature): db = get_db() db.execute( """ INSERT INTO temperatures (read_datetime, temperature) VALUES (?, ?) """, (datetime.datetime.now(), temperature)) db.commit()
def export_data(): from main import get_db rdb = get_db() cell_lines = rdb.get('cell_lines') cell_lines = json.loads(cell_lines) cell_lines = pd.DataFrame(cell_lines) cell_lines = cell_lines.fillna('') to_approve = rdb.get('to_approve') to_approve = json.loads(to_approve) to_approve = pd.DataFrame(to_approve) to_approve = to_approve.fillna('') to_approve = pd.merge(to_approve, cell_lines, left_on='cell_line', right_on='ID') to_approve = to_approve[[ 'ID', 'Cell line', 'Rack', 'tower', 'pos', 'Media (Freezing Medium)', 'transfected plasmid', 'selection', 'Typ', 'Date', 'Responsible person', 'Biosafety level S1/S2', 'Comments', 'Mycoplasma checked', 'Source', 'status' ]] to_approve.columns = [ 'ID', 'Cell line', 'Rack', 'Tower', 'Position', 'Media (Freezing Medium)', 'transfected plasmid', 'selection', 'Typ', 'Date', 'Responsible person', 'Biosafety level S1/S2', 'Comments', 'Mycoplasma checked', 'Source', 'status' ] towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')] full_df = None for tower in towers: data = rdb.get(tower) data = json.loads(data) df = pd.DataFrame(data) df = df.fillna('') df['Tower'] = tower if full_df is None: full_df = df else: full_df = full_df.append(df) full_df = pd.merge(full_df, cell_lines, on='ID') full_df['Position'] = full_df['pos'] full_df = full_df.drop(['pos', 'x', 'y'], axis='columns') full_df['status'] = 'confirmed' full_df = full_df.append(to_approve, ignore_index=True) full_df = full_df[[ 'ID', 'Cell line', 'Rack', 'Tower', 'Position', 'Media (Freezing Medium)', 'transfected plasmid', 'selection', 'Typ', 'Date', 'Responsible person', 'Biosafety level S1/S2', 'Comments', 'Mycoplasma checked', 'Source', 'status' ]] content = full_df.to_csv(sep=";", index=False) return make_response({'status': 'success', 'csv_content': content}, 200)
def delete_cell_line(): from main import get_db rdb = get_db() data = request.get_data() if data is None: return make_response({ 'status': 'error', 'error': 'no data received' }, 200) data = json.loads(data.decode('utf-8')) cell_line_id = data.get('cell_line_id') to_approve = rdb.get('to_approve') if to_approve is not None: to_approve = json.loads(to_approve) to_approve = pd.DataFrame(to_approve) else: to_approve = pd.DataFrame(columns=[ 'tower', 'pos', 'Rack', 'x', 'y', 'Responsible person', 'Date', 'Comments', 'cell_line', 'prev_cell_line', 'prev_responsible', 'prev_comments', 'prev_date', 'status' ]) found = to_approve.loc[to_approve['cell_line'] == cell_line_id] to_approve = to_approve.drop(found.index) rdb.set('to_approve', json.dumps(to_approve.to_dict('list'))) towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')] for tower in towers: data = rdb.get(tower) if data is None: continue data = json.loads(data) df = pd.DataFrame(data) found = df.loc[df['ID'] == cell_line_id] if len(found) != 0: df = df.drop(found.index) rdb.set(tower, json.dumps(df.to_dict('list'))) cell_lines = rdb.get('cell_lines') if cell_lines is not None: cell_lines = rdb.get('cell_lines') cell_lines = json.loads(cell_lines) cell_lines = pd.DataFrame(cell_lines) found = cell_lines.loc[cell_lines['ID'] == cell_line_id] cell_lines = cell_lines.drop(found.index) rdb.set('cell_lines', json.dumps(cell_lines.to_dict('list'))) return make_response( { 'status': 'success', 'info': 'Cell line has been removed from the DB. All associated positions have been cleared' }, 200)
def delete_user(): from main import get_db rdb = get_db() data = request.get_json() email = data.get('email') rdb.hdel('users', email) rdb.srem('not_activated_users', email) return make_response({'status': 'success'}, 200)
def get_user_details(): from main import get_db rdb = get_db() app_admins = current_app.config.get('APP_ADMINS') if current_user.email in app_admins: users = rdb.hgetall('users') users = [] if users is None else users.keys() users = [user.decode('utf-8') for user in users] print(users) return render_template('user_details.html', is_admin=True, users=users) return render_template('user_details.html')
def get_translational_efficiency(project_id): from main import get_db rdb = get_db() rp = rdb.get("{}_rpkm_rp".format(project_id)) rna = rdb.get("{}_rpkm_rna".format(project_id)) list_of_samples = [] if rp is None or rna is None: return render_template("translational_efficiency.html", samples=list_of_samples, error="No data for project: {}".format(project_id)) rp_df = pd.read_msgpack(rp) rna_df = pd.read_msgpack(rna) samples = list(rp_df.columns) samples.remove('gene_name') list_of_samples = samples if request.method == "GET": return render_template("translational_efficiency.html", samples=list_of_samples) selected_samples = request.form.getlist('selected_samples') if not selected_samples: return render_template("translational_efficiency.html", samples=list_of_samples, error="No samples selected") apply_filter = request.form.get('apply_filter') == "True" min_y = request.form.get('min_y', -100) max_y = request.form.get('max_y', 100) min_y = int(min_y) max_y = int(max_y) plot_series = [] for sample in selected_samples: gene_names = rp_df['gene_name'].tolist() rp = rp_df[sample].astype(float).tolist() rna = rna_df[sample].astype(float).tolist() df = pd.DataFrame(columns=['gene_name', 'x', 'y']) df['gene_name'] = gene_names df['rpkm_rna'] = rna df['rpkm_rp'] = rp df['log2(rp)'] = np.log2(df['rpkm_rp']) df['log2(rna)'] = np.log2(df['rpkm_rna']) df['x'] = df['log2(rna)'] df['y'] = df['log2(rna)'] / df['log2(rp)'] if apply_filter: df = df.loc[df['y'] >= min_y] df = df.loc[df['y'] <= max_y] df = df.replace([np.inf, -np.inf], np.nan) df = df.dropna() series = { 'name': sample, 'data': df.to_dict('records') } plot_series.append(series) return render_template("translational_efficiency.html", samples=list_of_samples, selected_samples=selected_samples, apply_filter=apply_filter, min_y=min_y, max_y=max_y, plot_series=plot_series)
def get_projects(): from main import get_db rdb = get_db() projects = rdb.smembers('projects') projects = [p.decode('utf-8') for p in projects] project_info = {} for project_id in projects: rdb_data = rdb.get("project_info_{}".format(project_id)) if rdb_data is not None: project_info[project_id] = json.loads(rdb_data.decode('utf-8')) return render_template('projects.html', projects=projects, project_info=project_info)
def get_by_id(self, email): # for now get user by email and use email as an id from main import get_db rdb = get_db() password = rdb.hget('users', email) or '' not_activated_users = rdb.smembers('not_activated_users') if password: return User(email, email, password, activated=email not in not_activated_users) return None
def get_psite_dotplot(project_id): from main import get_db rdb = get_db() amino_acids = rdb.smembers('aa_dotplot_{}'.format(project_id)) or [] amino_acids = [aa.decode('utf-8') for aa in amino_acids] contrasts = rdb.smembers('contrasts_{}'.format(project_id)) or [] contrasts = [c.decode('utf-8') for c in contrasts] if request.method == 'GET': return render_template('psite_dotplot.html', amino_acids=amino_acids, contrasts=contrasts) # if POST selected = request.form.getlist('selected_contrasts') if len(selected) == 0: return render_template('psite_dotplot.html', error='Please select contrasts', contrasts=contrasts, amino_acids=amino_acids) aa = request.form.get('amino_acid') if aa is None or aa == 'select': return render_template('psite_dotplot.html', error='Please select amino acid', contrasts=contrasts, amino_acids=amino_acids) norm = request.form.get('norm', 'tpm') genes_highlight = request.form.get('genes_highlight') fc = float(request.form.get('fc_highlight', 0)) res = get_plot_series(project_id, aa, selected, fc, genes_highlight, norm) p_series = res['p_series'] a_series = res['a_series'] e_series = res['e_series'] all_genes = res['all_genes'] return render_template('psite_dotplot.html', amino_acids=amino_acids, contrasts=contrasts, selected_aa=aa, selected_contrasts=selected, p_series=p_series, a_series=a_series, e_series=e_series, norm=norm, fc_highlight=fc, genes_highlight=genes_highlight, all_genes=all_genes)
def search_projects(): from main import get_db to_search = request.get_data() if to_search is None: return make_response({'status': 'error', 'error': 'No input received'}) to_search = to_search.decode('utf-8').lower() rdb = get_db() projects_ids = rdb.smembers('projects') projects_ids = [proj.decode('utf-8') for proj in projects_ids] found = list(filter(lambda x: to_search in x.lower(), projects_ids)) return make_response({ 'status': 'success', 'matching_projects': found }, 200)
def get_cell_line_info(): from main import get_db rdb = get_db() data = request.get_data() if data is None: return make_response({ 'status': 'error', 'error': 'no data received' }, 200) data = json.loads(data.decode('utf-8')) cell_line_id = data.get('cell_line_id') to_approve = rdb.get('to_approve') if to_approve is not None: to_approve = json.loads(to_approve) to_approve = pd.DataFrame(to_approve) else: to_approve = pd.DataFrame(columns=[ 'tower', 'pos', 'Rack', 'x', 'y', 'Responsible person', 'Date', 'Comments', 'cell_line', 'prev_cell_line', 'prev_responsible', 'prev_comments', 'prev_date', 'status' ]) results = None found = to_approve.loc[(to_approve['cell_line'] == cell_line_id) | (to_approve['prev_cell_line'] == cell_line_id)] if len(found) != 0: found = found[['tower', 'Rack', 'pos', 'status']] results = found.to_dict('records') towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')] for tower in towers: data = rdb.get(tower) if data is None: continue data = json.loads(data) df = pd.DataFrame(data) found2 = df.loc[df['ID'] == cell_line_id] if len(found2) != 0: found2['tower'] = tower found2 = found2[['tower', 'Rack', 'pos', 'status']] if results is None: results = found2.to_dict('records') else: results.append(found2.to_dict('records')) if results is None: results = [] return make_response({'status': 'success', 'results': results}, 200)
def create_user(): from main import get_db rdb = get_db() data = request.get_json() email = data.get('email') password = data.get('password') error_message = "" if email is None: error_message += "<p>Email is empty</p>" if password is None: error_message += "<p>Password is empty</p>" if error_message: return make_response({'status': 'error', 'error': error_message}, 500) encrypted_password = sha256_crypt.hash(password) user_exists = rdb.hexists('users', email) if user_exists: error_message += '<p>User <b>{}</b> already exists</p>'.format(email) if error_message: return make_response({'status': 'error', 'error_message': error_message}, 500) rdb.hmset('users', {email: encrypted_password}) rdb.sadd('not_activated_users', email) # # Notify user - shitty yahoo does not allow it # from_email = current_app.config.get('YAHOO_EMAIL') # to = email # subj = 'Your account on b250 web-site has been created' # date = datetime.date.today() # message_text = 'Your account on b250 web-site has been created by the administrator. \nUsername: {}\nOne-time password:{}\n\ # Please note, when you login for the first time, you will be asked to change your password!' # # msg = "From: %s\nTo: %s\nSubject: %s\nDate: %s\n\n%s" % (from_email, to, subj, date, message_text) # # username = from_email # password = current_app.config.get('YAHOO_PASS') # # # try: # server = smtplib.SMTP("smtp.mail.yahoo.com", 587) # # server.ehlo() # server.starttls() # server.login(username, password) # server.sendmail(from_email, to, msg) # server.quit() # # except Exception as e: # # return make_response({'status': 'error', 'error_message': str(e)}, 500) return make_response({'status': 'success'}, 200)
def check_queue(ip, port): app = Flask(__name__) app.config['SERVER_NAME'] = conf.SERVER_NAME app.add_url_rule('/pathfinder/download/<uuid>', 'download', download) with app.app_context(): db = get_db() cur = db.cursor() cur.execute('select * from job where status == ?', ('R',)) numjobs = 0 mail = None for row in cur.fetchall(): uuid, email, date, status = row # finished? pid = int(open(os.path.join(get_job_folder(uuid), 'run.pid'), 'r').read()) if pid_exists(pid): numjobs += 1 else: mail = Mail(app) msg = Message('[PathFinder] Your job is finished.', sender='*****@*****.**', recipients=[email, '*****@*****.**']) if os.path.exists(os.path.join(get_job_folder(uuid), 'pathway.pdb')): cur.execute('update job set status = ? where uuid = ?', ('F',uuid)) msg.body = render_template('email.tpl', uuid=uuid, has_error=False) msg.attach('pathway.pdb', 'text/pdb', open(os.path.join(get_job_folder(uuid), 'pathway.pdb')).read()) else: cur.execute('update job set status = ? where uuid = ?', ('E',uuid)) msg.body = render_template('email.tpl', uuid=uuid, has_error=True) if numjobs < NUMJOBS_CONCUR: cur.execute('select * from job where status == ?', ('Q',)) for row in cur.fetchall(): uuid, email, date, status = row newpid = client(ip, port, "SPAWN:%s" % uuid) open(os.path.join(get_job_folder(uuid), 'run.pid'), 'w').write(newpid) cur.execute('update job set status = ? where uuid = ?', ('R',uuid)) numjobs += 1 if numjobs >= NUMJOBS_CONCUR: break db.commit() db.close() if mail: try: mail.send(msg) except: pass
def get_user(self, email, password): from main import get_db rdb = get_db() encrypted_password = rdb.hget('users', email) or b'' encrypted_password = encrypted_password.decode('utf-8') not_activated_users = [ user.decode('utf-8') for user in rdb.smembers('not_activated_users') ] if encrypted_password == '': return None if sha256_crypt.verify(password, encrypted_password): # for now we use email as an id return User(email, email, password, activated=email not in not_activated_users) return None
def get_periodicity_heatmap(project_id): from main import get_db rdb = get_db() result = rdb.get('{}_periodicity_heatmap'.format(project_id)) if not result: return "No data for dataset {} found".format(project_id) full_df = pd.read_msgpack(result) # columns: ['length', 'dist', 'count', 'region', 'end', 'sample'] # ['length', 'dist', 'count', 'region', 'end', 'sample'] full_df.columns = ['y', 'x', 'value', 'region', 'end', 'sample'] full_df = full_df.replace({'sample': '.'}, '_') # javascript doesn't like dots full_df['end'] = full_df['end'].str.replace("' ", 'p ') # javascript doesn't like single quotes full_df = full_df.sort_values(by=['x', 'y']) samples = list(full_df['sample'].unique()) start_3p_plots = {} start_5p_plots = {} stop_3p_plots = {} stop_5p_plots = {} for sample in samples: ## make 4 plots start_5p_df = full_df.loc[(full_df['region'] == "Distance from start (nt)") & (full_df['end'] == "5p end") & (full_df['sample'] == sample)] stop_5p_df = full_df.loc[(full_df['region'] == "Distance from stop (nt)") & (full_df['end'] == "5p end") & (full_df['sample'] == sample)] start_3p_df = full_df.loc[(full_df['region'] == "Distance from start (nt)") & (full_df['end'] == "3p end") & (full_df['sample'] == sample)] stop_3p_df = full_df.loc[(full_df['region'] == "Distance from stop (nt)") & (full_df['end'] == "3p end") & (full_df['sample'] == sample)] # getting plots start_5p_plots[sample] = start_5p_df.to_dict('records') start_3p_plots[sample] = start_3p_df.to_dict('records') stop_5p_plots[sample] = stop_5p_df.to_dict('records') stop_3p_plots[sample] = stop_3p_df.to_dict('records') return render_template("periodicity_heatmap.html", project_id=project_id, start_5p_plots=start_5p_plots, stop_5p_plots=stop_5p_plots, start_3p_plots=start_3p_plots, stop_3p_plots=stop_3p_plots, samples=samples)
def get_teams(): # makes a dictionary of teams to analyse and consolidate # i.e. put together 'Tottenham' and 'Tottenham Hotspur' db = get_db() d = set() result = db.find({}, {"_id": 0, 'winner.team': 1, 'description': 1}) for i in result: d.add((i['description'].split(' ')[0], sub_common_names(i['winner']['team']))) result = db.find({}, {"_id": 0, 'loser.team': 1, 'description': 1}) for i in result: d.add((i['description'].split(' ')[0], sub_common_names(i['loser']['team']))) to_remove = [] disclude = ['v. ', 'vs. ', ' v ', '@', ' or ', ' OR ', ' AND ', 'or Fewer', 'or Lower', 'or More', '+', 'Any Other', ',', '1st ', '2nd ', '3rd', '1-', '(USA)', ' - ', 'Touchdown', 'Strikeout'] for item in d: if any(i in item[1] for i in disclude): to_remove.append(item) elif item[1] == 'Yes' or item[1] == 'No': to_remove.append(item) for k in to_remove: d.remove(k) f = open("teams", "w") for sport, team in d: for s2, t2 in d: if team in t2 and not team == t2: j = sport + '|' + team + '-------' + s2 + '|' + t2 f.write("%s\n" % j.encode('ascii', 'ignore')) f.close()
def get_plot_series(project_id, aa, selected_contrasts, fc, genes_highlight, norm, search_genes=[]): from main import get_db rdb = get_db() amino_acids = rdb.smembers('aa_dotplot_{}'.format(project_id)) or [] amino_acids = [aa.decode('utf-8') for aa in amino_acids] contrasts = rdb.smembers('contrasts_{}'.format(project_id)) or [] contrasts = [c.decode('utf-8') for c in contrasts] psite = rdb.get('psite_dotplot_{}_{}'.format(project_id, aa)) if psite is None: return render_template('psite_dotplot.html', error='No P-site data for {}'.format(aa), contrasts=contrasts, amino_acids=amino_acids) asite = rdb.get('asite_dotplot_{}_{}'.format(project_id, aa)) if asite is None: return render_template('psite_dotplot.html', error='No A-site data for {}'.format(aa), contrasts=contrasts, amino_acids=amino_acids) esite = rdb.get('esite_dotplot_{}_{}'.format(project_id, aa)) if esite is None: return render_template('psite_dotplot.html', error='No E-site data for {}'.format(aa), contrasts=contrasts, amino_acids=amino_acids) psite = json.loads(psite.decode('utf-8')) psite = pd.DataFrame(psite) asite = json.loads(asite.decode('utf-8')) asite = pd.DataFrame(asite) esite = json.loads(esite.decode('utf-8')) esite = pd.DataFrame(esite) top_genes = [] if genes_highlight is not None: if genes_highlight != 'do_not_highlight': n = int(genes_highlight.replace('top', '')) top_df = rdb.get('{}_top1000'.format(aa)) if top_df is not None: top_df = json.loads(top_df) top_genes = pd.DataFrame(top_df).loc[:n]['gene'].tolist() p_series = {} a_series = {} e_series = {} all_genes = [] for c in selected_contrasts: sample, control = c.split('__vs__') sample = '{}_{}'.format(norm, sample) control = '{}_{}'.format(norm, control) # psite p_df = psite[['gene', 'Aa', 'codon', sample, control]] p_df = p_df.dropna() all_genes += p_df['gene'].unique().tolist() p_df = p_df.groupby(['gene']).agg({ sample: 'sum', control: 'sum' }).reset_index() p_df['x'] = np.log2(p_df[sample]).round(3) p_df['y'] = np.log2(p_df[control]).round(3) p_df[sample] = p_df[sample].round(3) p_df[control] = p_df[control].round(3) p_df[c] = np.log2(p_df[sample] / p_df[control]) search_p = p_df.loc[p_df['gene'].isin(search_genes)].drop(c, axis=1) p_above_fc = p_df.loc[(p_df[c].abs() >= fc) & (~p_df['gene'].isin(search_genes))].drop( c, axis=1) top_p = p_df.loc[(p_df['gene'].isin(top_genes)) & (~p_df['gene'].isin(search_genes))].drop(c, axis=1) p_df = p_df.loc[(p_df[c].abs() < fc) & (~p_df['gene'].isin(top_genes)) & (~p_df['gene'].isin(search_genes))].drop(c, axis=1) p_series[c] = [{ 'name': c, 'data': p_df.to_dict('records'), 'turboThreshold': len(p_df) }] if len(p_above_fc) > 0: p_series[c].append({ 'name': 'Above threshold', 'data': p_above_fc.to_dict('records'), 'turboThreshold': len(p_above_fc), 'color': 'rgba(223, 83, 83, .5)' }) if len(top_p) > 0: p_series[c].append({ 'name': genes_highlight, 'data': top_p.to_dict('records'), 'turboThreshold': len(top_p), 'color': '#00cc99' }) if len(search_p) > 0: p_series[c].append({ 'name': 'Selected genes', 'data': search_p.to_dict('records'), 'color': '#ffcc00', 'marker': { 'radius': 5 } }) # asite a_df = asite[['gene', 'Aa', 'codon', sample, control]] a_df = a_df.dropna() all_genes += a_df['gene'].unique().tolist() a_df = a_df.groupby(['gene']).agg({ sample: 'sum', control: 'sum' }).reset_index() a_df['x'] = np.log2(a_df[sample]).round(3) a_df['y'] = np.log2(a_df[control]).round(3) a_df[sample] = a_df[sample].round(3) a_df[control] = a_df[control].round(3) a_df[c] = np.log2(a_df[sample] / a_df[control]) search_a = a_df.loc[a_df['gene'].isin(search_genes)].drop(c, axis=1) a_above_fc = a_df.loc[(a_df[c].abs() >= fc) & (~a_df['gene'].isin(search_genes))].drop( c, axis=1) top_a = a_df.loc[(a_df['gene'].isin(top_genes)) & (~a_df['gene'].isin(search_genes))].drop(c, axis=1) a_df = a_df.loc[(a_df[c].abs() < fc) & (~a_df['gene'].isin(top_genes)) & (~a_df['gene'].isin(search_genes))].drop(c, axis=1) a_series[c] = [{ 'name': c, 'data': a_df.to_dict('records'), 'turboThreshold': len(a_df) }] if len(a_above_fc) > 0: a_series[c].append({ 'name': 'Above threshold', 'data': a_above_fc.to_dict('records'), 'turboThreshold': len(a_above_fc), 'color': 'rgba(223, 83, 83, .5)' }) if len(top_a) > 0: a_series[c].append({ 'name': genes_highlight, 'data': top_a.to_dict('records'), 'turboThreshold': len(top_a), 'color': '#00cc99' }) if len(search_a) > 0: a_series[c].append({ 'name': 'Selected genes', 'data': search_a.to_dict('records'), 'color': '#ffcc00', 'marker': { 'radius': 5 } }) # esite e_df = esite[['gene', 'Aa', 'codon', sample, control]] e_df = e_df.dropna() all_genes += e_df['gene'].unique().tolist() e_df = e_df.groupby(['gene']).agg({ sample: 'sum', control: 'sum' }).reset_index() e_df['x'] = np.log2(e_df[sample]).round(3) e_df['y'] = np.log2(e_df[control]).round(3) e_df[sample] = e_df[sample].round(3) e_df[control] = e_df[control].round(3) e_df[c] = np.log2(e_df[sample] / e_df[control]) search_e = e_df.loc[e_df['gene'].isin(search_genes)].drop(c, axis=1) e_above_fc = e_df.loc[(e_df[c].abs() >= fc) & (~e_df['gene'].isin(search_genes))].drop( c, axis=1) top_e = e_df.loc[(e_df['gene'].isin(top_genes)) & (~e_df['gene'].isin(search_genes))].drop(c, axis=1) e_df = e_df.loc[(e_df[c].abs() < fc) & (~e_df['gene'].isin(top_genes)) & (~e_df['gene'].isin(search_genes))].drop(c, axis=1) e_series[c] = [{ 'name': c, 'data': e_df.to_dict('records'), 'turboThreshold': len(e_df) }] if len(e_above_fc) > 0: e_series[c].append({ 'name': 'Above threshold', 'data': e_above_fc.to_dict('records'), 'turboThreshold': len(e_above_fc), 'color': 'rgba(223, 83, 83, .5)' }) if len(top_e) > 0: e_series[c].append({ 'name': genes_highlight, 'data': top_e.to_dict('records'), 'turboThreshold': len(top_e), 'color': '#00cc99' }) if len(search_e) > 0: e_series[c].append({ 'name': 'Selected genes', 'data': search_e.to_dict('records'), 'color': '#ffcc00', 'marker': { 'radius': 5 } }) all_genes = set(all_genes) return { 'p_series': p_series, 'a_series': a_series, 'e_series': e_series, 'all_genes': all_genes }
__author__ = 'thales' import main def insert_into_database(collection, data): collection.insert(data) if __name__ == '__main__': db = main.get_db() # Start with clean collection toyota_autos_collection = db.toyotaAutos if toyota_autos_collection is not None: print 'dropping collection:', toyota_autos_collection toyota_autos_collection.drop() # Show number in collection before insert, should be 0 num_toyota_autos = toyota_autos_collection.find().count() print "num_toyota_autos before = " + str(num_toyota_autos) autos = main.get_autos_collection().find({"manufacturer_label": "Toyota"}) # Insert query result into new collection for a in autos: insert_into_database(toyota_autos_collection, a) # Show number in collection after insert num_toyota_autos = toyota_autos_collection.find().count() print "num_toyota_autos after = " + str(num_toyota_autos)
def approve_decline(): from main import get_db rdb = get_db() data = request.get_data() if data is None: return make_response({ 'status': 'error', 'error': 'no data received' }, 200) data = json.loads(data.decode('utf-8')) action = data.get('action') requests = rdb.get('to_approve') if requests is None: return make_response( { 'status': 'error', 'error': 'No records in the database' }, 200) requests = json.loads(requests) requests = pd.DataFrame(requests) req = requests.loc[(requests['tower'] == data.get('tower')) & (requests['Rack'] == data.get('Rack')) & (requests['pos'] == data.get('pos'))] if len(req) == 0: return make_response( { 'status': 'error', 'error': 'Cant find a record in the database' }, 200) tower_data = rdb.get(req['tower'].tolist()[0]) if tower_data is None: tower_df = pd.DataFrame(columns=[ 'ID', 'Rack', 'Date', 'Responsible person', 'Comments', 'pos', 'x', 'y' ]) else: tower_df = pd.DataFrame(json.loads(tower_data)) if action == 'approve': pos = tower_df.loc[ (tower_df['Rack'].astype(str) == str(data.get('Rack', '0'))) & (tower_df['pos'] == data.get('pos'))] if len(pos) == 0: # if added to a new position if not req['prev_cell_line'].tolist()[0]: to_append = req[[ 'cell_line', 'Rack', 'Date', 'Responsible person', 'Comments', 'pos', 'x', 'y' ]] to_append.columns = [ 'ID', 'Rack', 'Date', 'Responsible person', 'Comments', 'pos', 'x', 'y' ] tower_df = tower_df.append(to_append, ignore_index=True) rdb.set(data.get('tower'), json.dumps(tower_df.to_dict('list'))) requests.loc[req.index, 'status'] = 'approved' rdb.set('to_approve', json.dumps(requests.to_dict('list'))) return make_response( { 'status': 'success', 'info': 'Request has been approved' }, 200) # if requested from a postion else: return make_response( { 'status': 'error', 'error': 'You cant request from a current positon, because it is empty' }, 200) else: # len(pos) == 1: # remove from pos tower_df = tower_df.drop(pos.index) rdb.set(data.get('tower'), json.dumps(tower_df.to_dict('list'))) # change status requests.loc[req.index, 'status'] = 'approved' rdb.set('to_approve', json.dumps(requests.to_dict('list'))) return make_response( { 'status': 'success', 'info': 'Request has been approved' }, 200) elif action == 'decline': requests.loc[req.index, 'status'] = 'declined' rdb.set('to_approve', json.dumps(requests.to_dict('list'))) # update number of available tubes cell_lines = rdb.get('cell_lines') cell_lines = json.loads(cell_lines) cell_df = pd.DataFrame(cell_lines) curr_cell_line = cell_df.loc[cell_df['ID'] == data.get('cell_line_id')] # cell_df.loc[cell_df['ID'] == data.get('cell_line_id'), 'tubes_available'] = \ # curr_cell_line['tubes_available'].astype(int) + 1 rdb.set('cell_lines', json.dumps(cell_df.to_dict('list'))) return make_response( { 'status': 'success', 'info': 'Request has been declined' }, 200) elif action == 'cancel': # update number of available tubes cell_lines = rdb.get('cell_lines') cell_lines = json.loads(cell_lines) cell_df = pd.DataFrame(cell_lines) curr_cell_line = cell_df.loc[cell_df['ID'] == data.get('cell_line_id')] # cell_df.loc[cell_df['ID'] == data.get('cell_line_id'), 'tubes_available'] = \ # curr_cell_line['tubes_available'].astype(int) + 1 rdb.set('cell_lines', json.dumps(cell_df.to_dict('list'))) # remove from requests requests = requests.drop(req.index) if len(requests) == 0: rdb.delete('to_approve') else: rdb.set('to_approve', json.dumps(requests.to_dict('list'))) return make_response( { 'status': 'success', 'info': 'Request has been cancelled' }, 200) else: make_response( { 'status': 'error', 'error': 'Unknown action "{}"'.format(action) }, 200) return make_response({'status': 'success'}, 200)
def search(): from main import get_db rdb = get_db() to_search = request.get_data() if to_search is None: return make_response({'status': 'error', 'error': 'No input received'}) to_search = to_search.decode('utf-8').upper() to_approve = rdb.get('to_approve') if to_approve is not None: to_approve = json.loads(to_approve) to_approve = pd.DataFrame(to_approve) else: to_approve = pd.DataFrame(columns=['cell_line', 'prev_cell_line']) cell_lines = rdb.get('cell_lines') cell_lines = json.loads(cell_lines) cell_lines = pd.DataFrame(cell_lines) cell_lines = cell_lines.fillna('') # search by ID or name found = cell_lines.loc[ (cell_lines['ID'].str.upper().str.contains(to_search)) | (cell_lines['Cell line'].str.upper().str.contains(to_search))] cell_line_ids = found['ID'].tolist() results_df = None towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')] for tower in towers: data = rdb.get(tower) if data is None: continue data = json.loads(data) df = pd.DataFrame(data) df = pd.merge(df, found, on='ID') if len(df) == 0: continue df['tower'] = tower df['status'] = 'confirmed' if results_df is None: results_df = df else: results_df = results_df.append(df, ignore_index=True) found2 = to_approve.loc[(to_approve['cell_line'].isin(cell_line_ids)) | (to_approve['prev_cell_line'].isin(cell_line_ids))] empty = found2.loc[found2['cell_line'] == ''] found2.loc[empty.index, 'cell_line'] = found2.loc[empty.index, 'prev_cell_line'] found2 = pd.merge(found2, cell_lines, left_on='cell_line', right_on='ID') found2['status'] = 'pending' # results_df = results_df.loc[~results_df['ID'].isin(found2['ID'].tolist())] if results_df is not None: results_df = results_df.append(found2, ignore_index=True) else: results_df = found2 results_df = results_df.fillna('') results_df = results_df[[ 'ID', 'Cell line', 'tower', 'Rack', 'pos', 'Responsible person', 'Date', 'status' ]] results_df['Rack'] = results_df['Rack'].astype(int) results_df = results_df.drop_duplicates(['tower', 'Rack', 'pos'], keep='last') html_result = '<table class="table table-hover table-sm" id="table_search"><tr>' for column in results_df.columns: html_result += '<th>{}</th>'.format(column) html_result += '<th></th><th></th></tr>' for index, row in results_df.iterrows(): if row['status'] == 'pending': html_result += '<tr class="table-warning" id="{}_{}_{}">'.format( row['tower'], row['Rack'], row['pos']) else: html_result += '<tr id="{}_{}_{}">'.format(row['tower'], row['Rack'], row['pos']) for column in results_df: if column == 'status': span_class = '' if row['status'] == 'pending': span_class = 'badge badge-warning' elif row['status'] == 'approved' or row[ 'status'] == 'confirmed': span_class = 'badge badge-success' elif row['status'] == 'declined': span_class = 'badge badge-danger' html_result += '<td class="{}"><span class="{}">{}</span></td>'.format( column.replace(' ', '_'), span_class, row[column]) else: html_result += '<td class="{}">{}</td>'.format( column.replace(' ', '_'), row[column]) html_result += '<td><button type="button" class="btn btn-sm btn-outline-primary" id="edit_search">Edit</button></td>' if row['status'] != 'pending': html_result += '<td><button type="button" class="btn btn-sm btn-outline-secondary request_search" id="request_search">Request</button></td>' else: html_result += '<td></td>' html_result += '</tr>' html_result += '</table>' return make_response({ 'status': 'success', 'html_result': html_result }, 200)
def get_heatmap(project_id): from main import get_db rdb = get_db() exists = rdb.exists('cpm_coding_{}'.format(project_id)) if not exists: return render_template( "heatmap.html", no_data=True, error="No data for the project {}".format(project_id)) data = rdb.get('cpm_coding_{}'.format(project_id)) df = pd.read_msgpack(data) samples = sorted(list(df.columns)) samples.remove('gene_name') if request.method == "GET": return render_template("heatmap.html", samples=samples, first_group=[], second_group=[]) first_group = request.form.getlist('first_group') if not first_group: return render_template("heatmap.html", samples=samples, error="No samples selected") second_group = request.form.getlist('second_group') filter1 = request.form.get('filter1') filter2 = request.form.get('filter2') number_of_genes1 = int(request.form.get('number_of_genes1')) number_of_genes2 = int(request.form.get('number_of_genes2')) list_of_genes = request.form.get('list_of_genes', '').split() include_non_coding = request.form.get('include_non_coding', "") == "True" if include_non_coding: data = rdb.get('cpm_non_coding_{}'.format(project_id)) df = pd.read_msgpack(data) else: data = rdb.get('cpm_coding_{}'.format(project_id)) df = pd.read_msgpack(data) if filter1 == 'list_of_genes': df1 = None for gene in list_of_genes: row = df.loc[df['gene_name'] == gene] df1 = row if df1 is None else df1.append(row, ignore_index=True) # input for clustering: header + df data = [['gene_name'] + first_group] + df1.values.tolist() cluster = Cluster() cluster.read_data(rows=data, header=True) cluster.cluster_data() dendrogram = Dendrogram(cluster) plot_data = dendrogram.create_cluster_heatmap() plot_data = json.dumps(plot_data) csv_data = df1.to_csv(sep=",", header=True, index=False) return render_template("heatmap.html", plot_data=plot_data, first_group=first_group, samples=samples, list_of_genes=list_of_genes, number_of_genes1=number_of_genes1, filter1=filter1, number_of_genes2=number_of_genes2, include_non_coding=include_non_coding, csv_data=csv_data, filter2=filter2) # select samples df1 = df[['gene_name'] + first_group] # round to 2 decimals for sample in first_group: df1[sample] = df1[sample].round(2) # sort by variance if filter1 == 'least': df1 = df1.reindex( df1.var(axis=1).sort_values(ascending=True).index) # 1 2 3 else: df1 = df1.reindex( df1.var(axis=1).sort_values(ascending=False).index) # 3 2 1 # select top genes (or all if number_of_genes is 0) if number_of_genes1 != 0: df1 = df1[:number_of_genes1] if not second_group: # input for clustering: header + df data = [['gene_name'] + first_group] + df1.values.tolist() cluster = Cluster() cluster.read_data(rows=data, header=True) cluster.cluster_data() dendrogram = Dendrogram(cluster) plot_data = dendrogram.create_cluster_heatmap() plot_data = json.dumps(plot_data) csv_data = df1.to_csv(sep=",", header=True, index=False) return render_template("heatmap.html", plot_data=plot_data, first_group=first_group, samples=samples, list_of_genes=list_of_genes, number_of_genes1=number_of_genes1, include_non_coding=include_non_coding, filter1=filter1, number_of_genes2=number_of_genes2, csv_data=csv_data, filter2=filter2) # if second group df2 = df[['gene_name'] + first_group + second_group] # pairwise comparisons df22 = None for sample1 in first_group: for sample2 in second_group: df2_var = df[[sample1, sample2]].var(axis=1) df22 = df2_var if df22 is None else df22 + df2_var # average variance df22 = df22 / len(second_group) # sort by variance ascending = filter2 == 'least' df22 = df2.reindex(df22.sort_values(ascending=ascending).index) # 3 2 1 if number_of_genes2 != 0: df22 = df22[:number_of_genes2] # not changing genes common_genes = list( set(df22['gene_name'].tolist()) & set(df1['gene_name'].tolist())) all_genes = set(df22['gene_name'].tolist() + df1['gene_name'].tolist()) common_genes = set( set(df22['gene_name'].tolist()) & set(df1['gene_name'].tolist())) our_genes = set(df1['gene_name'].tolist()) - common_genes if len(our_genes) == 0: error = "No common genes found between 2 groups. Try to increase the number of genes" return render_template("heatmap.html", first_group=first_group, second_group=second_group, samples=samples, list_of_genes=list_of_genes, number_of_genes1=number_of_genes1, include_non_coding=include_non_coding, filter1=filter1, number_of_genes2=number_of_genes2, filter2=filter2, error=error) final_df = df2.loc[df2['gene_name'].isin(our_genes)] for sample in first_group + second_group: final_df[sample] = final_df[sample].round(2) # input for clustering: header + df data = [['gene_name'] + first_group + second_group ] + final_df.values.tolist() cluster = Cluster() cluster.read_data(rows=data, header=True) cluster.cluster_data() dendrogram = Dendrogram(cluster) plot_data = dendrogram.create_cluster_heatmap() plot_data = json.dumps(plot_data) csv_data = final_df.to_csv(sep=",", header=True, index=False) return render_template("heatmap.html", samples=samples, first_group=first_group, second_group=second_group, number_of_genes1=number_of_genes1, number_of_genes2=number_of_genes2, filter1=filter1, filter2=filter2, list_of_genes=list_of_genes, include_non_coding=include_non_coding, plot_data=plot_data, csv_data=csv_data, common_genes=len(final_df))
def get_psite_plot(project_id): from main import get_db rdb = get_db() contrasts = rdb.smembers('contrasts_{}'.format(project_id)) contrasts = sorted([c.decode('utf-8') for c in contrasts]) if request.method == 'GET': return render_template("psite_plot.html", contrasts=contrasts) p_data = rdb.get('psites_{}'.format(project_id)) a_data = rdb.get('asites_{}'.format(project_id)) e_data = rdb.get('esites_{}'.format(project_id)) if p_data is None and a_data is None and e_data is None: return render_template( 'psite_plot.html', error='No data for project {} found'.format(project_id)) # if POST p_df = pd.DataFrame(json.loads(p_data)) a_df = pd.DataFrame(json.loads(a_data)) e_df = pd.DataFrame(json.loads(e_data)) # getting rid of stop and start codons p_df = p_df.loc[~p_df['aa'].isin(['Stp', 'Str'])] a_df = a_df.loc[~a_df['aa'].isin(['Stp', 'Str'])] e_df = e_df.loc[~e_df['aa'].isin(['Stp', 'Str'])] # check if any contrasts selected selected = request.form.getlist('selected_contrasts') if len(selected) == 0: return render_template('psite_plot.html', error='Please select contrasts', contrasts=contrasts) # normalization norm = request.form.get('normalization', 'tpm') # group by codon or by amino acid group_by_aa = request.form.get('group_by_codon') != 'codon' if group_by_aa: p_df = p_df.groupby('aa').sum().reset_index() a_df = a_df.groupby('aa').sum().reset_index() e_df = e_df.groupby('aa').sum().reset_index() # get x categories for highcharts if group_by_aa: x_categories = p_df['aa'].unique().tolist() else: x_categories = [ 'GCA', 'GCC', 'GCG', 'GCT', '', 'AGA', 'CGC', 'CGA', 'CGG', 'CGT', 'AGG', '', 'AAC', 'AAT', '', 'GAC', 'GAT', '', 'TGC', 'TGT', '', 'CAA', 'CAG', '', 'GAA', 'GAG', '', 'GGA', 'GGC', 'GGG', 'GGT', '', 'CAC', 'CAT', '', 'ATA', 'ATC', 'ATT', '', 'CTA', 'CTC', 'CTG', 'CTT', 'TTA', 'TTG', '', 'AAA', 'AAG', '', 'ATG_M', '', # 'ATG_S', '', # methionine & start codon 'TTC', 'TTT', '', 'CCA', 'CCC', 'CCG', 'CCT', '', 'AGC', 'AGT', 'TCA', 'TCC', 'TCG', 'TCT', '', # 'TAA', 'TAG', 'TGA', '', # skip Stop codons 'ACA', 'ACC', 'ACG', 'ACT', '', 'TGG', '', 'TAC', 'TAT', '', 'GTA', 'GTC', 'GTG', 'GTT' ] max_fc = None min_fc = None for contrast in selected: s1, s2 = contrast.split('__vs__') cols = [ 'aa', s1, s2, '{}_{}'.format(norm, s1), '{}_{}'.format(norm, s2) ] if not group_by_aa: cols = cols + ['codon'] # calculating fc as (sample - control) / control p_df[contrast] = (p_df['{}_{}'.format(norm, s1)] - p_df['{}_{}'.format( norm, s2)]) / p_df['{}_{}'.format(norm, s2)] a_df[contrast] = (a_df['{}_{}'.format(norm, s1)] - a_df['{}_{}'.format( norm, s2)]) / a_df['{}_{}'.format(norm, s2)] e_df[contrast] = (e_df['{}_{}'.format(norm, s1)] - e_df['{}_{}'.format( norm, s2)]) / e_df['{}_{}'.format(norm, s2)] # round values cols = cols + [contrast] p_df[cols] = p_df[cols].round(3) a_df[cols] = a_df[cols].round(3) e_df[cols] = e_df[cols].round(3) if min_fc is None: min_fc = min(p_df[contrast].min(), a_df[contrast].min(), e_df[contrast].min()) else: min_fc = min(min_fc, p_df[contrast].min(), a_df[contrast].min(), e_df[contrast].min()) if max_fc is None: max_fc = max(p_df[contrast].max(), a_df[contrast].max(), e_df[contrast].max()) else: max_fc = max(max_fc, p_df[contrast].max(), a_df[contrast].max(), e_df[contrast].max()) max_fc = max(abs(min_fc), abs(max_fc)) min_fc = -1 * max_fc middle_val = 0 plot_series = [] for c in range(len(selected)): contrast = selected[c] s1, s2 = contrast.split('__vs__') cols = [ 'aa', s1, s2, '{}_{}'.format(norm, s1), '{}_{}'.format(norm, s2), contrast ] if not group_by_aa: cols = cols + ['codon'] for i in range(len(x_categories)): cat = x_categories[i] if cat == '': plot_series += [{}] else: if group_by_aa: cur_p = p_df.loc[p_df['aa'] == cat] cur_e = e_df.loc[e_df['aa'] == cat] cur_a = a_df.loc[a_df['aa'] == cat] codon = '' aa = cat else: if cat == 'ATG_M': cur_p = p_df.loc[(p_df['codon'] == 'ATG') & (p_df['aa'] == 'Met')] cur_e = e_df.loc[(e_df['codon'] == 'ATG') & (e_df['aa'] == 'Met')] cur_a = a_df.loc[(a_df['codon'] == 'ATG') & (a_df['aa'] == 'Met')] codon = 'ATG' aa = 'Met' elif cat == 'ATG_S': cur_p = p_df.loc[(p_df['codon'] == 'ATG') & (p_df['aa'] == 'Str')] cur_e = e_df.loc[(e_df['codon'] == 'ATG') & (e_df['aa'] == 'Str')] cur_a = a_df.loc[(a_df['codon'] == 'ATG') & (a_df['aa'] == 'Str')] codon = 'ATG' aa = 'Str' else: cur_p = p_df.loc[p_df['codon'] == cat] cur_e = e_df.loc[e_df['codon'] == cat] cur_a = a_df.loc[a_df['codon'] == cat] codon = cat aa = cur_p.iloc[0]['aa'] # select cols for current contrast cur_p = cur_p[cols] cur_a = cur_a[cols] cur_e = cur_e[cols] cur_p['value'] = cur_p[contrast] cur_a['value'] = cur_a[contrast] cur_e['value'] = cur_e[contrast] cur_p['contrast'] = contrast cur_a['contrast'] = contrast cur_e['contrast'] = contrast # getting plot series if len(cur_a) == 0: plot_series += [{ 'x': i, 'y': 0 + c * 4, 'codon': codon, 'aa': aa, 'site': 'A', 'value': 0 }] else: cur_a['x'] = i cur_a['y'] = 0 + c * 4 cur_a['site'] = 'A' plot_series += cur_a.to_dict('records') if len(cur_p) == 0: plot_series += [{ 'x': i, 'y': 1 + c * 4, 'codon': codon, 'aa': aa, 'site': 'P', 'value': 0 }] else: cur_p['x'] = i cur_p['y'] = 1 + c * 4 cur_p['site'] = 'P' plot_series += cur_p.to_dict('records') if len(cur_e) == 0: plot_series += [{ 'x': i, 'y': 2 + c * 4, 'codon': codon, 'aa': aa, 'site': 'E', 'value': 0 }] else: cur_e['x'] = i cur_e['y'] = 2 + c * 4 cur_e['site'] = 'E' plot_series += cur_e.to_dict('records') if group_by_aa: plot_series += [{}] y_categories = [] for contrast in selected: y_categories += [ 'A-site ({})'.format(contrast), 'P-site ({})'.format(contrast), 'E-site ({})'.format(contrast), '' ] group_by_codon = not group_by_aa return render_template('psite_plot.html', psite_series=plot_series, contrasts=contrasts, y_categories=y_categories, x_categories=x_categories, min_fc=min_fc, max_fc=max_fc, middle_val=middle_val, group_by_codon=group_by_codon, norm=norm, selected_contrasts=selected, dataset_id=project_id)
def get_volcano_plot(project_id): from main import get_db rdb = get_db() contrasts = rdb.smembers('contrasts_{}'.format(project_id)) contrasts = sorted([c.decode('utf-8') for c in contrasts]) if request.method == 'GET': return render_template("volcano_plot.html", contrasts=contrasts) # dash lines left = float(request.form.get('left')) right = float(request.form.get('right')) bottom = float(request.form.get('bottom')) left_line = round(math.log2(left), 3) right_line = round(math.log2(right), 3) bottom_line = -round(math.log10(bottom), 3) # else: (if request is POST) # contrast = request.form.get('contrast') if contrast is None: return render_template("volcano_plot.html", contrasts=contrasts, error='Contrast is not selected') # else: (if contrast is not None) # data = rdb.get('volcano_{}_{}'.format(project_id, contrast)) if data is None: return render_template("volcano_plot.html", contrasts=contrasts, error='No data found for contrast: {}'.format(contrast)) # else: (if data is not None) # data = json.loads(data) df = pd.DataFrame(data) df = df.round(decimals=3) df['-log10(pval)'] = -1 * df['pvalue'].apply(lambda x: math.log10(x)) df['fc'] = 2 ** df['log2FoldChange'] df = df.drop('padj', axis=1) df.columns = ['x', 'pvalue', 'gene', 'y', 'fc'] df = df.round(decimals=3) # thresholds asp_df = df.loc[df['gene'].isin(asp_top200)] pro_df = df.loc[df['gene'].isin(pro_top200)] left_df = df.loc[(df['fc'] <= left) & (df['pvalue'] <= bottom) & ~df['gene'].isin(asp_top200) & ~df['gene'].isin(pro_top200)] right_df = df.loc[(df['fc'] >= right) & (df['pvalue'] <= bottom) & ~df['gene'].isin(asp_top200) & ~df['gene'].isin(pro_top200)] bottom_df = df[~df.isin(left_df) & ~df.isin(right_df) & ~df.isin(asp_df) & ~df.isin(pro_df)] # series plot_series = [{ 'name': contrast, 'data': list(bottom_df.dropna().T.to_dict().values()), 'turboThreshold': len(bottom_df), 'marker': { 'symbol': 'circle', 'radius': 5, }, 'color': 'grey', }, { 'name': contrast, 'data': list(left_df.dropna().T.to_dict().values()), 'turboThreshold': len(left_df), 'color': 'blue', 'marker': { 'symbol': 'circle', 'radius': 5, }, }, { 'name': contrast, 'data': list(right_df.dropna().T.to_dict().values()), 'turboThreshold': len(right_df), 'color': 'red', 'marker': { 'symbol': 'circle', 'radius': 5, }, }, { 'name': 'ASP', 'data': list(asp_df.dropna().T.to_dict().values()), 'turboThreshold': len(asp_df), 'color': '#99ffcc', 'marker': { 'symbol': 'circle', 'radius': 5, } }, { 'name': 'PRO', 'data': list(pro_df.dropna().T.to_dict().values()), 'turboThreshold': len(pro_df), 'color': '#ff6699', 'marker': { 'symbol': 'circle', 'radius': 5, } } ] return render_template('volcano_plot.html', contrasts=contrasts, selected_contrast=contrast, plot_series=plot_series, right=right_line, left=left_line, bottom=bottom_line, selected_thresholds={ 'left': left, 'right': right, 'bottom': bottom, })
import web from main import get_session, get_render, get_db session = get_session() render = get_render() db = get_db() class login: def GET(self): return render.base(render.login(), session) def POST(self): input = web.input() result = db.query("SELECT * FROM auth WHERE username = $username AND password = $password", { 'username': input.username, 'password': input.password }) if result: session.username = input.username return web.seeother("/") else: return render.base(render.login(), session) class register: def GET(self): return render.base(render.register(), session) def POST(self): input = web.input() db.insert("auth", username = input.username, password = input.password) return web.seeother("/login") class logout:
def client(): with app.test_client() as client: with app.app_context(): get_db() yield client
def get_ma_plot(project_id): from main import get_db rdb = get_db() contrasts = rdb.smembers("contrasts_{}".format(project_id)) contrasts = sorted([c.decode('utf-8') for c in contrasts]) if request.method == "GET": no_data = len(contrasts) == 0 # if no contrasts, no_data will be True # # df = pd.read_msgpack(bi_df) return render_template("ma_plot.html", contrasts=contrasts, no_data=no_data) contrast = request.form.get('contrast') apply_filters = request.form.get('apply_filters') == "true" # is not None pval = request.form.get('pval') fc = request.form.get('fc') min_counts = request.form.get('min_counts') max_counts = request.form.get('max_counts') filters = { 'pval': pval, 'fc': fc, 'min_counts': min_counts, 'max_counts': max_counts, } bi_df = rdb.get('ma_plot_all_{}_{}'.format(project_id, contrast)) if not bi_df: return render_template( "ma_plot.html", error="No data for the contrast {}".format(contrast), selected_contrast=contrast, contrasts=contrasts, apply_filters=apply_filters, filters=filters) df = pd.read_msgpack(bi_df) # columns: ['baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'transcript'] df = df.rename({'baseMean': 'x', 'log2FoldChange': 'y'}, axis='columns') df = df.fillna('') if apply_filters: if pval != '': pval = float(pval) df = df.loc[df["pvalue"] <= pval] if fc != '': fc = float(fc) df = df.loc[(df["y"] <= fc) & (df["y"] >= -1 * fc)] if min_counts != '': min_counts = int(min_counts) df = df.loc[df["x"] >= min_counts] if max_counts != '': max_counts = int(max_counts) df = df.loc[df["x"] <= max_counts] plot_series = { 'name': contrast.replace('__', ' '), 'data': df.to_dict('records') } return render_template("ma_plot.html", contrasts=contrasts, selected_contrast=contrast, plot_series=plot_series, apply_filters=apply_filters, genes=len(df), filters=filters)
def get_liquid_nitrogen(): # Rack colors and values: # empty: white - 0, full: red - 1, to approve: yellow - 2 from main import get_db y_pos = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'] rdb = get_db() towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')] series = {} to_approve = rdb.get('to_approve') if to_approve is not None: to_approve = json.loads(to_approve) to_approve = pd.DataFrame(to_approve) else: to_approve = pd.DataFrame(columns=[ 'tower', 'pos', 'Rack', 'x', 'y', 'Responsible person', 'Date', 'Comments', 'cell_line', 'prev_cell_line', 'prev_responsible', 'prev_comments', 'prev_date', 'status' ]) to_approve = to_approve.fillna('') towers = set(towers + to_approve['tower'].tolist()) user_requests = to_approve.loc[to_approve['Responsible person'] == current_user.email] user_requests = user_requests[[ 'tower', 'Rack', 'pos', 'cell_line', 'prev_cell_line', 'Comments', 'Date', 'Responsible person', 'status' ]] user_requests = user_requests[::-1] # reverse order if len(user_requests) > 10: user_requests = user_requests[:10] to_approve = to_approve.loc[to_approve['status'] == 'pending'] for tower in towers: data = rdb.get(tower) racks = [] if data is not None: data = json.loads(data) df = pd.DataFrame(data) df = df.fillna('null') racks += set( list(df['Rack'].astype(str).unique()) + list(to_approve.loc[to_approve['tower'] == tower, 'Rack'].astype(str).unique())) else: racks += to_approve['Rack'].astype(str).unique() df = pd.DataFrame(columns=[ 'Comments', 'Date', 'ID', 'Rack', 'Responsible person', 'pos', 'x', 'y' ]) for rack in racks: rack_series = [] for y in y_pos: for x in range(1, 11): approved = to_approve.loc[ (to_approve['tower'] == tower) & (to_approve['Rack'].astype(str) == rack) & (to_approve['y'] == y) & (to_approve['x'].astype(int) == x)] df1 = df.loc[(df['Rack'].astype(str) == rack) & (df['y'] == y) & (df['x'].astype(int) == x)] if len(approved) != 0: rack_series.append({ 'pos': '{}{}'.format(y, x), 'Rack': rack, 'x': x - 1, 'y': y_pos.index(y), 'value': 2, # means to approve 'color': '#ffcc00', # yellow 'Responsible person': approved.iloc[0]['Responsible person'], 'ID': approved.iloc[0]['cell_line'], 'Date': approved.iloc[0]['Date'], 'status': 'to_confirm', 'prev_cell_line': approved.iloc[0] ['prev_cell_line'], # cell_line_IDs 'prev_responsible': approved.iloc[0]['prev_responsible'], 'prev_comments': approved.iloc[0]['prev_comments'], 'prev_date': approved.iloc[0]['prev_date'], }) elif len(df1) != 0: df1['pos'] = df1['y'].astype(str) + df1['x'].astype( str) df1['y'] = y_pos.index(y) df1['x'] = df1['x'].astype(int) - 1 df1['color'] = '#F4796E' # red df1['value'] = 1 # means confirmed rack_series.append(df1.iloc[0].to_dict()) else: rack_series.append({ 'pos': '{}{}'.format(y, x), 'Rack': rack, 'x': x - 1, 'y': y_pos.index(y), 'value': 0, # means empty 'color': '#FFFFFF', 'Tower': tower, }) key = '{}_Rack{}'.format(tower, rack) series[key] = rack_series cell_lines = rdb.get('cell_lines') if cell_lines is None: return render_template('liquid_nitrogen.html', error='No data found') cell_lines = json.loads(cell_lines) cell_lines = pd.DataFrame(cell_lines) cell_lines['tubes_available'] = cell_lines['tubes_available'].fillna(0) available_cell_lines = cell_lines.loc[ cell_lines['tubes_available'].astype(int) != 0] available_cell_lines = available_cell_lines[[ 'ID', 'Cell line', 'tubes_available' ]] available_cell_lines = available_cell_lines.to_dict('records') cell_lines = cell_lines.fillna('') cell_lines.index = cell_lines['ID'] cell_lines = cell_lines.to_dict('index') cell_lines_dropdown = [{'value': 'add_new', 'text': 'Add new'}] for key in cell_lines.keys(): cell_lines_dropdown.append({ 'value': key, 'text': key, }) liquid_nitrogen_admins = current_app.config.get('LIQUID_NITROGEN_ADMINS') if liquid_nitrogen_admins is None: liquid_nitrogen_admins = [] users = rdb.hgetall('users') users = [] if users is None else users.keys() users = [user.decode('utf-8') for user in users] to_approve = to_approve[[ 'tower', 'Rack', 'pos', 'cell_line', 'prev_cell_line', 'Comments', 'Date', 'Responsible person' ]] to_approve_data = to_approve.to_dict('records') if current_user.email in liquid_nitrogen_admins: user_requests = user_requests.to_dict('records') return render_template('liquid_nitrogen.html', series=series, cell_lines_dropdown=cell_lines_dropdown, cell_lines=json.dumps(cell_lines).replace( """\xa0""", " "), to_approve=to_approve_data, admin=True, users=users, current_user=current_user.email, available_cell_lines=available_cell_lines, user_requests=user_requests) else: user_requests = user_requests.to_dict('records') return render_template('liquid_nitrogen.html', series=series, cell_lines_dropdown=cell_lines_dropdown, to_approve=to_approve_data, cell_lines=json.dumps(cell_lines).replace( """\xa0""", " "), user_requests=user_requests, admin=False, users=users, current_user=current_user.email, available_cell_lines=available_cell_lines)
def setUp(self): with app.app_context(): from main import get_db self.db = get_db() self.object_to_remove = []