示例#1
0
def create_user(email, password):
    with app.app_context():
        db = get_db()
        with db.cursor() as cursor:
            sql = "INSERT INTO `users` (`email`, `password`) VALUES (%s, %s)"
            cursor.execute(sql, (email, generate_password_hash(password)))
        db.commit()
示例#2
0
def create_cell_line():
    from main import get_db
    rdb = get_db()
    new_cell_line = request.get_json()
    cell_lines = rdb.get('cell_lines')
    if cell_lines is not None:
        cell_lines = json.loads(cell_lines)
        df = pd.DataFrame(cell_lines)
    else:
        df = pd.DataFrame(columns=new_cell_line.keys())
    # df['tubes_available'] = df['tubes_available'].fillna(0)
    # todo: later
    # # if name exists but id is different
    # if len(df.loc[df['Cell line'] == new_cell_line.get('Cell line')]) != 0:
    #     existing = df.loc[df['Cell line'] == new_cell_line.get('Cell line')]
    #     if new_cell_line.get('ID')

    # in case we added new fields
    for key in new_cell_line.keys():
        if key not in df.columns:
            df[key] = ''
    # if exists - overwrite
    if len(df.loc[df['ID'] == new_cell_line.get('ID')]) != 0:
        df.loc[df['ID'] == new_cell_line.get('ID'), key] = new_cell_line[key]
    else:
        df = df.append(new_cell_line, ignore_index=True)
    try:
        rdb.set('cell_lines', json.dumps(df.to_dict('list')))
    except Exception as e:
        return make_response({'status': 'error', 'error': str(e)}, 500)

    return make_response({'status': 'success'}, 200)
 def change_password(self, new_password):
     from main import get_db
     rdb = get_db()
     self.password = sha256_crypt.hash(new_password)
     rdb.hmset('users', {self.email: self.password})
     rdb.srem('not_activated_users', self.email)
     self.activated = True
示例#4
0
def update_rack():
    from main import get_db
    data = request.get_json()
    if 'status' not in data.keys():
        data['status'] = 'pending'
    rdb = get_db()
    to_approve = rdb.get('to_approve')
    if to_approve is None:
        to_approve = pd.DataFrame(columns=[
            'tower', 'pos', 'Rack', 'x', 'y', 'Responsible person', 'Date',
            'Comments', 'cell_line', 'prev_cell_line', 'prev_responsible',
            'prev_comments', 'prev_date', 'status'
        ])
    else:
        to_approve = json.loads(to_approve)
        to_approve = pd.DataFrame(to_approve)

    to_approve = to_approve.loc[to_approve['status'] == 'pending']
    tower = data.get('tower')
    tower_data = rdb.get(tower)

    if tower_data is not None:
        tower_data = json.loads(tower_data)
        tower_data = pd.DataFrame(tower_data)

    pos = data.get('pos')
    if type(pos) == str:
        pos = [pos]

    # if there is already something on that positions, then ...
    to_overwrite = to_approve.loc[(to_approve['Rack'] == data.get('Rack'))
                                  & (to_approve['tower'] == data.get('tower'))
                                  & (to_approve['pos'].isin(pos))]
    # ... then drop it and ...
    if len(to_overwrite) != 0:
        to_approve = to_approve.drop(to_overwrite.index)
    # ... and add the new data

    for p in pos:
        cur_data = data
        cur_data['pos'] = p
        cur_data['y'] = p[0]
        cur_data['x'] = int(p[1:])
        if tower_data is not None:
            current_pos_data = tower_data.loc[(tower_data['Rack'].astype(int) == int(data.get('Rack', 0))) & \
                                              (tower_data['pos'] == p)]
            if len(current_pos_data) != 0:
                cur_data['prev_cell_line'] = current_pos_data.iloc[0]['ID']
                cur_data['prev_responsible'] = current_pos_data.iloc[0][
                    'Responsible person']

        to_approve = to_approve.append(cur_data, ignore_index=True)
    # save to db
    try:
        rdb.set('to_approve', json.dumps(to_approve.to_dict('list')))
    except Exception as e:
        return make_response({'status': 'error', 'error': str(e)}, 500)

    return make_response({'status': 'success'}, 200)
def get_reads_per_position(project_id):
    # this import has to be here
    from main import get_db
    rdb = get_db()

    key = "{}_reads_per_position".format(project_id)
    binary_data = rdb.get(key)
    if binary_data is None:
        return "NO DATA for project {}".format(project_id)
    df = pd.read_msgpack(binary_data)

    plot_names = []
    plot_series = {}
    categories = {}

    gene_lengths = rdb.get('{}_rrna_genes'.format(project_id))
    if gene_lengths is not None:
        gene_lengths = json.loads(gene_lengths)
    else:
        gene_lengths = {
            'RNA18S5': 1869,
            'RNA28S5': 5070,
            'RNA5-8S5': 153,
        }

    genes = gene_lengths.keys()
    samples = df.get('sample').unique()
    for gene in genes:
        gene_length = int(gene_lengths.get(gene))
        for sample in samples:
            current_df = df.loc[(df['gene'] == gene)
                                & (df['sample'] == sample)]
            if current_df.empty:
                continue

            series_df = pd.DataFrame(columns=['x', 'y', 'reads_info'],
                                     index=range(1, gene_length + 1))
            series_df['x'] = range(1, gene_length + 1)
            series_df['y'] = 0
            series_df['reads_info'] = ''
            plot_name = "{}_{}".format(gene, sample)
            plot_names.append(plot_name)
            categories[plot_name] = list(range(1, gene_length + 1))
            for row_id, row in current_df.iterrows():
                position = row['start']
                series_df.loc[position, 'x'] = position
                series_df.loc[position, 'y'] = row['counts']
                series_df.loc[position,
                              'reads_info'] = row['reads_info'].replace(
                                  ',', '<br> • ').replace('. ', '<br> • ')
            plot_series[plot_name] = {
                'name': plot_name,
                'data': series_df.to_dict('records')
            }
    return render_template("reads_per_position.html",
                           plot_names=plot_names,
                           plot_series=plot_series,
                           categories=categories,
                           project_id=project_id)
示例#6
0
def write_to_db(temperature):
    db = get_db()
    db.execute(
    """
    INSERT INTO temperatures (read_datetime, temperature)
    VALUES (?, ?)
    """, (datetime.datetime.now(), temperature))
    db.commit()
示例#7
0
def export_data():
    from main import get_db
    rdb = get_db()
    cell_lines = rdb.get('cell_lines')
    cell_lines = json.loads(cell_lines)
    cell_lines = pd.DataFrame(cell_lines)
    cell_lines = cell_lines.fillna('')

    to_approve = rdb.get('to_approve')
    to_approve = json.loads(to_approve)
    to_approve = pd.DataFrame(to_approve)
    to_approve = to_approve.fillna('')
    to_approve = pd.merge(to_approve,
                          cell_lines,
                          left_on='cell_line',
                          right_on='ID')
    to_approve = to_approve[[
        'ID', 'Cell line', 'Rack', 'tower', 'pos', 'Media (Freezing Medium)',
        'transfected plasmid', 'selection', 'Typ', 'Date',
        'Responsible person', 'Biosafety level S1/S2', 'Comments',
        'Mycoplasma checked', 'Source', 'status'
    ]]
    to_approve.columns = [
        'ID', 'Cell line', 'Rack', 'Tower', 'Position',
        'Media (Freezing Medium)', 'transfected plasmid', 'selection', 'Typ',
        'Date', 'Responsible person', 'Biosafety level S1/S2', 'Comments',
        'Mycoplasma checked', 'Source', 'status'
    ]

    towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')]
    full_df = None
    for tower in towers:
        data = rdb.get(tower)

        data = json.loads(data)
        df = pd.DataFrame(data)
        df = df.fillna('')
        df['Tower'] = tower

        if full_df is None:
            full_df = df
        else:
            full_df = full_df.append(df)

    full_df = pd.merge(full_df, cell_lines, on='ID')
    full_df['Position'] = full_df['pos']
    full_df = full_df.drop(['pos', 'x', 'y'], axis='columns')

    full_df['status'] = 'confirmed'
    full_df = full_df.append(to_approve, ignore_index=True)
    full_df = full_df[[
        'ID', 'Cell line', 'Rack', 'Tower', 'Position',
        'Media (Freezing Medium)', 'transfected plasmid', 'selection', 'Typ',
        'Date', 'Responsible person', 'Biosafety level S1/S2', 'Comments',
        'Mycoplasma checked', 'Source', 'status'
    ]]
    content = full_df.to_csv(sep=";", index=False)
    return make_response({'status': 'success', 'csv_content': content}, 200)
示例#8
0
def delete_cell_line():
    from main import get_db
    rdb = get_db()
    data = request.get_data()
    if data is None:
        return make_response({
            'status': 'error',
            'error': 'no data received'
        }, 200)
    data = json.loads(data.decode('utf-8'))
    cell_line_id = data.get('cell_line_id')

    to_approve = rdb.get('to_approve')

    if to_approve is not None:
        to_approve = json.loads(to_approve)
        to_approve = pd.DataFrame(to_approve)
    else:
        to_approve = pd.DataFrame(columns=[
            'tower', 'pos', 'Rack', 'x', 'y', 'Responsible person', 'Date',
            'Comments', 'cell_line', 'prev_cell_line', 'prev_responsible',
            'prev_comments', 'prev_date', 'status'
        ])

    found = to_approve.loc[to_approve['cell_line'] == cell_line_id]
    to_approve = to_approve.drop(found.index)
    rdb.set('to_approve', json.dumps(to_approve.to_dict('list')))

    towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')]
    for tower in towers:
        data = rdb.get(tower)
        if data is None:
            continue
        data = json.loads(data)
        df = pd.DataFrame(data)
        found = df.loc[df['ID'] == cell_line_id]
        if len(found) != 0:
            df = df.drop(found.index)
            rdb.set(tower, json.dumps(df.to_dict('list')))

    cell_lines = rdb.get('cell_lines')

    if cell_lines is not None:
        cell_lines = rdb.get('cell_lines')
        cell_lines = json.loads(cell_lines)
        cell_lines = pd.DataFrame(cell_lines)
        found = cell_lines.loc[cell_lines['ID'] == cell_line_id]
        cell_lines = cell_lines.drop(found.index)
        rdb.set('cell_lines', json.dumps(cell_lines.to_dict('list')))

    return make_response(
        {
            'status':
            'success',
            'info':
            'Cell line has been removed from the DB. All associated positions have been cleared'
        }, 200)
示例#9
0
def delete_user():
    from main import get_db
    rdb = get_db()

    data = request.get_json()
    email = data.get('email')

    rdb.hdel('users', email)
    rdb.srem('not_activated_users', email)
    return make_response({'status': 'success'}, 200)
示例#10
0
def get_user_details():
    from main import get_db
    rdb = get_db()
    app_admins = current_app.config.get('APP_ADMINS')
    if current_user.email in app_admins:
        users = rdb.hgetall('users')
        users = [] if users is None else users.keys()
        users = [user.decode('utf-8') for user in users]
        print(users)
        return render_template('user_details.html', is_admin=True, users=users)
    return render_template('user_details.html')
def get_translational_efficiency(project_id):
    from main import get_db
    rdb = get_db()
    rp = rdb.get("{}_rpkm_rp".format(project_id))
    rna = rdb.get("{}_rpkm_rna".format(project_id))
    list_of_samples = []
    if rp is None or rna is None:
        return render_template("translational_efficiency.html", samples=list_of_samples,
                               error="No data for project: {}".format(project_id))
    rp_df = pd.read_msgpack(rp)
    rna_df = pd.read_msgpack(rna)
    samples = list(rp_df.columns)
    samples.remove('gene_name')
    list_of_samples = samples
    if request.method == "GET":
        return render_template("translational_efficiency.html", samples=list_of_samples)

    selected_samples = request.form.getlist('selected_samples')
    if not selected_samples:
        return render_template("translational_efficiency.html", samples=list_of_samples, error="No samples selected")

    apply_filter = request.form.get('apply_filter') == "True"
    min_y = request.form.get('min_y', -100)
    max_y = request.form.get('max_y', 100)
    min_y = int(min_y)
    max_y = int(max_y)
    plot_series = []
    for sample in selected_samples:
        gene_names = rp_df['gene_name'].tolist()
        rp = rp_df[sample].astype(float).tolist()
        rna = rna_df[sample].astype(float).tolist()
        df = pd.DataFrame(columns=['gene_name', 'x', 'y'])
        df['gene_name'] = gene_names
        df['rpkm_rna'] = rna
        df['rpkm_rp'] = rp
        df['log2(rp)'] = np.log2(df['rpkm_rp'])
        df['log2(rna)'] = np.log2(df['rpkm_rna'])
        df['x'] = df['log2(rna)']
        df['y'] = df['log2(rna)'] / df['log2(rp)']

        if apply_filter:
            df = df.loc[df['y'] >= min_y]
            df = df.loc[df['y'] <= max_y]
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.dropna()
        series = {
            'name': sample,
            'data': df.to_dict('records')
        }
        plot_series.append(series)
    return render_template("translational_efficiency.html", samples=list_of_samples, selected_samples=selected_samples,
                           apply_filter=apply_filter, min_y=min_y, max_y=max_y, plot_series=plot_series)
示例#12
0
def get_projects():
    from main import get_db
    rdb = get_db()
    projects = rdb.smembers('projects')
    projects = [p.decode('utf-8') for p in projects]
    project_info = {}
    for project_id in projects:
        rdb_data = rdb.get("project_info_{}".format(project_id))
        if rdb_data is not None:
            project_info[project_id] = json.loads(rdb_data.decode('utf-8'))
    return render_template('projects.html',
                           projects=projects,
                           project_info=project_info)
示例#13
0
    def get_by_id(self, email):
        # for now get user by email and use email as an id
        from main import get_db
        rdb = get_db()
        password = rdb.hget('users', email) or ''
        not_activated_users = rdb.smembers('not_activated_users')

        if password:
            return User(email,
                        email,
                        password,
                        activated=email not in not_activated_users)
        return None
def get_psite_dotplot(project_id):
    from main import get_db
    rdb = get_db()
    amino_acids = rdb.smembers('aa_dotplot_{}'.format(project_id)) or []
    amino_acids = [aa.decode('utf-8') for aa in amino_acids]
    contrasts = rdb.smembers('contrasts_{}'.format(project_id)) or []
    contrasts = [c.decode('utf-8') for c in contrasts]
    if request.method == 'GET':
        return render_template('psite_dotplot.html',
                               amino_acids=amino_acids,
                               contrasts=contrasts)

    # if POST
    selected = request.form.getlist('selected_contrasts')
    if len(selected) == 0:
        return render_template('psite_dotplot.html',
                               error='Please select contrasts',
                               contrasts=contrasts,
                               amino_acids=amino_acids)
    aa = request.form.get('amino_acid')
    if aa is None or aa == 'select':
        return render_template('psite_dotplot.html',
                               error='Please select amino acid',
                               contrasts=contrasts,
                               amino_acids=amino_acids)

    norm = request.form.get('norm', 'tpm')
    genes_highlight = request.form.get('genes_highlight')

    fc = float(request.form.get('fc_highlight', 0))

    res = get_plot_series(project_id, aa, selected, fc, genes_highlight, norm)
    p_series = res['p_series']
    a_series = res['a_series']
    e_series = res['e_series']
    all_genes = res['all_genes']

    return render_template('psite_dotplot.html',
                           amino_acids=amino_acids,
                           contrasts=contrasts,
                           selected_aa=aa,
                           selected_contrasts=selected,
                           p_series=p_series,
                           a_series=a_series,
                           e_series=e_series,
                           norm=norm,
                           fc_highlight=fc,
                           genes_highlight=genes_highlight,
                           all_genes=all_genes)
示例#15
0
def search_projects():
    from main import get_db
    to_search = request.get_data()
    if to_search is None:
        return make_response({'status': 'error', 'error': 'No input received'})
    to_search = to_search.decode('utf-8').lower()

    rdb = get_db()
    projects_ids = rdb.smembers('projects')
    projects_ids = [proj.decode('utf-8') for proj in projects_ids]
    found = list(filter(lambda x: to_search in x.lower(), projects_ids))

    return make_response({
        'status': 'success',
        'matching_projects': found
    }, 200)
示例#16
0
def get_cell_line_info():
    from main import get_db
    rdb = get_db()
    data = request.get_data()
    if data is None:
        return make_response({
            'status': 'error',
            'error': 'no data received'
        }, 200)
    data = json.loads(data.decode('utf-8'))

    cell_line_id = data.get('cell_line_id')
    to_approve = rdb.get('to_approve')
    if to_approve is not None:
        to_approve = json.loads(to_approve)
        to_approve = pd.DataFrame(to_approve)
    else:
        to_approve = pd.DataFrame(columns=[
            'tower', 'pos', 'Rack', 'x', 'y', 'Responsible person', 'Date',
            'Comments', 'cell_line', 'prev_cell_line', 'prev_responsible',
            'prev_comments', 'prev_date', 'status'
        ])
    results = None
    found = to_approve.loc[(to_approve['cell_line'] == cell_line_id) |
                           (to_approve['prev_cell_line'] == cell_line_id)]
    if len(found) != 0:
        found = found[['tower', 'Rack', 'pos', 'status']]
        results = found.to_dict('records')

    towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')]
    for tower in towers:
        data = rdb.get(tower)
        if data is None:
            continue
        data = json.loads(data)
        df = pd.DataFrame(data)
        found2 = df.loc[df['ID'] == cell_line_id]
        if len(found2) != 0:
            found2['tower'] = tower
            found2 = found2[['tower', 'Rack', 'pos', 'status']]
            if results is None:
                results = found2.to_dict('records')
            else:
                results.append(found2.to_dict('records'))
    if results is None:
        results = []
    return make_response({'status': 'success', 'results': results}, 200)
示例#17
0
def create_user():
    from main import get_db
    rdb = get_db()
    data = request.get_json()
    email = data.get('email')
    password = data.get('password')
    error_message = ""
    if email is None:
        error_message += "<p>Email is empty</p>"
    if password is None:
        error_message += "<p>Password is empty</p>"
    if error_message:
        return make_response({'status': 'error', 'error': error_message}, 500)
    encrypted_password = sha256_crypt.hash(password)
    user_exists = rdb.hexists('users', email)
    if user_exists:
        error_message += '<p>User <b>{}</b> already exists</p>'.format(email)
    if error_message:
        return make_response({'status': 'error', 'error_message': error_message}, 500)

    rdb.hmset('users', {email: encrypted_password})
    rdb.sadd('not_activated_users', email)

    # # Notify user - shitty yahoo does not allow it
    # from_email = current_app.config.get('YAHOO_EMAIL')
    # to = email
    # subj = 'Your account on b250 web-site has been created'
    # date = datetime.date.today()
    # message_text = 'Your account on b250 web-site has been created by the administrator. \nUsername: {}\nOne-time password:{}\n\
    # Please note, when you login for the first time, you will be asked to change your password!'
    #
    # msg = "From: %s\nTo: %s\nSubject: %s\nDate: %s\n\n%s" % (from_email, to, subj, date, message_text)
    #
    # username = from_email
    # password = current_app.config.get('YAHOO_PASS')
    #
    # # try:
    # server = smtplib.SMTP("smtp.mail.yahoo.com", 587)
    # # server.ehlo()
    # server.starttls()
    # server.login(username, password)
    # server.sendmail(from_email, to, msg)
    # server.quit()
    # # except Exception as e:
    # #     return make_response({'status': 'error', 'error_message': str(e)}, 500)

    return make_response({'status': 'success'}, 200)
示例#18
0
def check_queue(ip, port):
    app = Flask(__name__)
    app.config['SERVER_NAME'] = conf.SERVER_NAME
    app.add_url_rule('/pathfinder/download/<uuid>', 'download', download)

    with app.app_context():
        db = get_db()
        cur = db.cursor()
        cur.execute('select * from job where status == ?', ('R',))
        numjobs = 0
        mail = None
        for row in cur.fetchall():
            uuid, email, date, status = row
            # finished?
            pid = int(open(os.path.join(get_job_folder(uuid), 'run.pid'), 'r').read())
            if pid_exists(pid):
                numjobs += 1
            else:
                mail = Mail(app)
                msg = Message('[PathFinder] Your job is finished.', sender='*****@*****.**', recipients=[email, '*****@*****.**'])

                if os.path.exists(os.path.join(get_job_folder(uuid), 'pathway.pdb')):
                    cur.execute('update job set status = ? where uuid = ?', ('F',uuid))
                    msg.body = render_template('email.tpl', uuid=uuid, has_error=False)
                    msg.attach('pathway.pdb', 'text/pdb', open(os.path.join(get_job_folder(uuid), 'pathway.pdb')).read())
                else:
                    cur.execute('update job set status = ? where uuid = ?', ('E',uuid))
                    msg.body = render_template('email.tpl', uuid=uuid, has_error=True)

        if numjobs < NUMJOBS_CONCUR:
            cur.execute('select * from job where status == ?', ('Q',))
            for row in cur.fetchall():
                uuid, email, date, status = row
                newpid = client(ip, port, "SPAWN:%s" % uuid)
                open(os.path.join(get_job_folder(uuid), 'run.pid'), 'w').write(newpid)
                cur.execute('update job set status = ? where uuid = ?', ('R',uuid))
                numjobs += 1
                if numjobs >= NUMJOBS_CONCUR: break

        db.commit()
        db.close()

        if mail:
            try:
                mail.send(msg)
            except:
                pass
示例#19
0
    def get_user(self, email, password):
        from main import get_db
        rdb = get_db()
        encrypted_password = rdb.hget('users', email) or b''
        encrypted_password = encrypted_password.decode('utf-8')
        not_activated_users = [
            user.decode('utf-8')
            for user in rdb.smembers('not_activated_users')
        ]

        if encrypted_password == '':
            return None
        if sha256_crypt.verify(password, encrypted_password):
            # for now we use email as an id
            return User(email,
                        email,
                        password,
                        activated=email not in not_activated_users)
        return None
def get_periodicity_heatmap(project_id):
    from main import get_db
    rdb = get_db()

    result = rdb.get('{}_periodicity_heatmap'.format(project_id))
    if not result:
        return "No data for dataset {} found".format(project_id)

    full_df = pd.read_msgpack(result)
    # columns: ['length', 'dist', 'count', 'region', 'end', 'sample']
    # ['length', 'dist', 'count', 'region', 'end', 'sample']

    full_df.columns = ['y', 'x', 'value', 'region', 'end', 'sample']
    full_df = full_df.replace({'sample': '.'}, '_') # javascript doesn't like dots
    full_df['end'] = full_df['end'].str.replace("' ", 'p ') # javascript doesn't like single quotes
    full_df = full_df.sort_values(by=['x', 'y'])
    samples = list(full_df['sample'].unique())
    start_3p_plots = {}
    start_5p_plots = {}
    stop_3p_plots = {}
    stop_5p_plots = {}
    for sample in samples:
        ## make 4 plots
        start_5p_df = full_df.loc[(full_df['region'] == "Distance from start (nt)") &
                                  (full_df['end'] == "5p end") &
                                  (full_df['sample'] == sample)]
        stop_5p_df = full_df.loc[(full_df['region'] == "Distance from stop (nt)") &
                                  (full_df['end'] == "5p end") &
                                  (full_df['sample'] == sample)]
        start_3p_df = full_df.loc[(full_df['region'] == "Distance from start (nt)") &
                                  (full_df['end'] == "3p end") &
                                  (full_df['sample'] == sample)]
        stop_3p_df = full_df.loc[(full_df['region'] == "Distance from stop (nt)") &
                                  (full_df['end'] == "3p end") &
                                  (full_df['sample'] == sample)]

        # getting plots
        start_5p_plots[sample] = start_5p_df.to_dict('records')
        start_3p_plots[sample] = start_3p_df.to_dict('records')
        stop_5p_plots[sample] = stop_5p_df.to_dict('records')
        stop_3p_plots[sample] = stop_3p_df.to_dict('records')
    return render_template("periodicity_heatmap.html", project_id=project_id, start_5p_plots=start_5p_plots,
                           stop_5p_plots=stop_5p_plots, start_3p_plots=start_3p_plots, stop_3p_plots=stop_3p_plots, samples=samples)
示例#21
0
def get_teams():
    # makes a dictionary of teams to analyse and consolidate
    # i.e. put together 'Tottenham' and 'Tottenham Hotspur'
    db = get_db()
    d = set()

    result = db.find({}, {"_id": 0, 'winner.team': 1, 'description': 1})
    for i in result:
        d.add((i['description'].split(' ')[0], sub_common_names(i['winner']['team'])))

    result = db.find({}, {"_id": 0, 'loser.team': 1, 'description': 1})
    for i in result:
        d.add((i['description'].split(' ')[0], sub_common_names(i['loser']['team'])))

    to_remove = []

    disclude = ['v. ', 'vs. ', ' v ', '@', ' or ', ' OR ', ' AND ',
                'or Fewer', 'or Lower', 'or More', '+', 'Any Other', ',',
                '1st ', '2nd ', '3rd', '1-', '(USA)', ' - ', 'Touchdown',
                'Strikeout']

    for item in d:
        if any(i in item[1] for i in disclude):
            to_remove.append(item)
        elif item[1] == 'Yes' or item[1] == 'No':
            to_remove.append(item)

    for k in to_remove:
        d.remove(k)

    f = open("teams", "w")
    for sport, team in d:
        for s2, t2 in d:
            if team in t2 and not team == t2:
                j = sport + '|' + team + '-------' + s2 + '|' + t2
                f.write("%s\n" % j.encode('ascii', 'ignore'))
    f.close()
def get_plot_series(project_id,
                    aa,
                    selected_contrasts,
                    fc,
                    genes_highlight,
                    norm,
                    search_genes=[]):
    from main import get_db
    rdb = get_db()
    amino_acids = rdb.smembers('aa_dotplot_{}'.format(project_id)) or []
    amino_acids = [aa.decode('utf-8') for aa in amino_acids]
    contrasts = rdb.smembers('contrasts_{}'.format(project_id)) or []
    contrasts = [c.decode('utf-8') for c in contrasts]

    psite = rdb.get('psite_dotplot_{}_{}'.format(project_id, aa))
    if psite is None:
        return render_template('psite_dotplot.html',
                               error='No P-site data for {}'.format(aa),
                               contrasts=contrasts,
                               amino_acids=amino_acids)

    asite = rdb.get('asite_dotplot_{}_{}'.format(project_id, aa))
    if asite is None:
        return render_template('psite_dotplot.html',
                               error='No A-site data for {}'.format(aa),
                               contrasts=contrasts,
                               amino_acids=amino_acids)

    esite = rdb.get('esite_dotplot_{}_{}'.format(project_id, aa))
    if esite is None:
        return render_template('psite_dotplot.html',
                               error='No E-site data for {}'.format(aa),
                               contrasts=contrasts,
                               amino_acids=amino_acids)

    psite = json.loads(psite.decode('utf-8'))
    psite = pd.DataFrame(psite)

    asite = json.loads(asite.decode('utf-8'))
    asite = pd.DataFrame(asite)

    esite = json.loads(esite.decode('utf-8'))
    esite = pd.DataFrame(esite)

    top_genes = []
    if genes_highlight is not None:
        if genes_highlight != 'do_not_highlight':
            n = int(genes_highlight.replace('top', ''))
            top_df = rdb.get('{}_top1000'.format(aa))
            if top_df is not None:
                top_df = json.loads(top_df)
                top_genes = pd.DataFrame(top_df).loc[:n]['gene'].tolist()

    p_series = {}
    a_series = {}
    e_series = {}
    all_genes = []
    for c in selected_contrasts:
        sample, control = c.split('__vs__')
        sample = '{}_{}'.format(norm, sample)
        control = '{}_{}'.format(norm, control)

        # psite
        p_df = psite[['gene', 'Aa', 'codon', sample, control]]
        p_df = p_df.dropna()
        all_genes += p_df['gene'].unique().tolist()
        p_df = p_df.groupby(['gene']).agg({
            sample: 'sum',
            control: 'sum'
        }).reset_index()
        p_df['x'] = np.log2(p_df[sample]).round(3)
        p_df['y'] = np.log2(p_df[control]).round(3)
        p_df[sample] = p_df[sample].round(3)
        p_df[control] = p_df[control].round(3)
        p_df[c] = np.log2(p_df[sample] / p_df[control])
        search_p = p_df.loc[p_df['gene'].isin(search_genes)].drop(c, axis=1)
        p_above_fc = p_df.loc[(p_df[c].abs() >= fc)
                              & (~p_df['gene'].isin(search_genes))].drop(
                                  c, axis=1)
        top_p = p_df.loc[(p_df['gene'].isin(top_genes))
                         & (~p_df['gene'].isin(search_genes))].drop(c, axis=1)
        p_df = p_df.loc[(p_df[c].abs() < fc) & (~p_df['gene'].isin(top_genes))
                        & (~p_df['gene'].isin(search_genes))].drop(c, axis=1)

        p_series[c] = [{
            'name': c,
            'data': p_df.to_dict('records'),
            'turboThreshold': len(p_df)
        }]
        if len(p_above_fc) > 0:
            p_series[c].append({
                'name': 'Above threshold',
                'data': p_above_fc.to_dict('records'),
                'turboThreshold': len(p_above_fc),
                'color': 'rgba(223, 83, 83, .5)'
            })
        if len(top_p) > 0:
            p_series[c].append({
                'name': genes_highlight,
                'data': top_p.to_dict('records'),
                'turboThreshold': len(top_p),
                'color': '#00cc99'
            })
        if len(search_p) > 0:
            p_series[c].append({
                'name': 'Selected genes',
                'data': search_p.to_dict('records'),
                'color': '#ffcc00',
                'marker': {
                    'radius': 5
                }
            })
        # asite
        a_df = asite[['gene', 'Aa', 'codon', sample, control]]
        a_df = a_df.dropna()
        all_genes += a_df['gene'].unique().tolist()
        a_df = a_df.groupby(['gene']).agg({
            sample: 'sum',
            control: 'sum'
        }).reset_index()
        a_df['x'] = np.log2(a_df[sample]).round(3)
        a_df['y'] = np.log2(a_df[control]).round(3)
        a_df[sample] = a_df[sample].round(3)
        a_df[control] = a_df[control].round(3)
        a_df[c] = np.log2(a_df[sample] / a_df[control])

        search_a = a_df.loc[a_df['gene'].isin(search_genes)].drop(c, axis=1)
        a_above_fc = a_df.loc[(a_df[c].abs() >= fc)
                              & (~a_df['gene'].isin(search_genes))].drop(
                                  c, axis=1)
        top_a = a_df.loc[(a_df['gene'].isin(top_genes))
                         & (~a_df['gene'].isin(search_genes))].drop(c, axis=1)
        a_df = a_df.loc[(a_df[c].abs() < fc) & (~a_df['gene'].isin(top_genes))
                        & (~a_df['gene'].isin(search_genes))].drop(c, axis=1)

        a_series[c] = [{
            'name': c,
            'data': a_df.to_dict('records'),
            'turboThreshold': len(a_df)
        }]
        if len(a_above_fc) > 0:
            a_series[c].append({
                'name': 'Above threshold',
                'data': a_above_fc.to_dict('records'),
                'turboThreshold': len(a_above_fc),
                'color': 'rgba(223, 83, 83, .5)'
            })
        if len(top_a) > 0:
            a_series[c].append({
                'name': genes_highlight,
                'data': top_a.to_dict('records'),
                'turboThreshold': len(top_a),
                'color': '#00cc99'
            })
        if len(search_a) > 0:
            a_series[c].append({
                'name': 'Selected genes',
                'data': search_a.to_dict('records'),
                'color': '#ffcc00',
                'marker': {
                    'radius': 5
                }
            })

        # esite
        e_df = esite[['gene', 'Aa', 'codon', sample, control]]
        e_df = e_df.dropna()
        all_genes += e_df['gene'].unique().tolist()
        e_df = e_df.groupby(['gene']).agg({
            sample: 'sum',
            control: 'sum'
        }).reset_index()
        e_df['x'] = np.log2(e_df[sample]).round(3)
        e_df['y'] = np.log2(e_df[control]).round(3)
        e_df[sample] = e_df[sample].round(3)
        e_df[control] = e_df[control].round(3)
        e_df[c] = np.log2(e_df[sample] / e_df[control])

        search_e = e_df.loc[e_df['gene'].isin(search_genes)].drop(c, axis=1)
        e_above_fc = e_df.loc[(e_df[c].abs() >= fc)
                              & (~e_df['gene'].isin(search_genes))].drop(
                                  c, axis=1)
        top_e = e_df.loc[(e_df['gene'].isin(top_genes))
                         & (~e_df['gene'].isin(search_genes))].drop(c, axis=1)
        e_df = e_df.loc[(e_df[c].abs() < fc) & (~e_df['gene'].isin(top_genes))
                        & (~e_df['gene'].isin(search_genes))].drop(c, axis=1)

        e_series[c] = [{
            'name': c,
            'data': e_df.to_dict('records'),
            'turboThreshold': len(e_df)
        }]
        if len(e_above_fc) > 0:
            e_series[c].append({
                'name': 'Above threshold',
                'data': e_above_fc.to_dict('records'),
                'turboThreshold': len(e_above_fc),
                'color': 'rgba(223, 83, 83, .5)'
            })
        if len(top_e) > 0:
            e_series[c].append({
                'name': genes_highlight,
                'data': top_e.to_dict('records'),
                'turboThreshold': len(top_e),
                'color': '#00cc99'
            })
        if len(search_e) > 0:
            e_series[c].append({
                'name': 'Selected genes',
                'data': search_e.to_dict('records'),
                'color': '#ffcc00',
                'marker': {
                    'radius': 5
                }
            })
    all_genes = set(all_genes)
    return {
        'p_series': p_series,
        'a_series': a_series,
        'e_series': e_series,
        'all_genes': all_genes
    }
示例#23
0
__author__ = 'thales'

import main


def insert_into_database(collection, data):
    collection.insert(data)

if __name__ == '__main__':
    db = main.get_db()

    # Start with clean collection
    toyota_autos_collection = db.toyotaAutos
    if toyota_autos_collection is not None:
        print 'dropping collection:', toyota_autos_collection
        toyota_autos_collection.drop()

    # Show number in collection before insert, should be 0
    num_toyota_autos = toyota_autos_collection.find().count()
    print "num_toyota_autos before = " + str(num_toyota_autos)

    autos = main.get_autos_collection().find({"manufacturer_label": "Toyota"})
    # Insert query result into new collection
    for a in autos:
        insert_into_database(toyota_autos_collection, a)

    # Show number in collection after insert
    num_toyota_autos = toyota_autos_collection.find().count()
    print "num_toyota_autos after = " + str(num_toyota_autos)
示例#24
0
def approve_decline():
    from main import get_db
    rdb = get_db()
    data = request.get_data()
    if data is None:
        return make_response({
            'status': 'error',
            'error': 'no data received'
        }, 200)
    data = json.loads(data.decode('utf-8'))
    action = data.get('action')

    requests = rdb.get('to_approve')
    if requests is None:
        return make_response(
            {
                'status': 'error',
                'error': 'No records in the database'
            }, 200)
    requests = json.loads(requests)
    requests = pd.DataFrame(requests)
    req = requests.loc[(requests['tower'] == data.get('tower'))
                       & (requests['Rack'] == data.get('Rack')) &
                       (requests['pos'] == data.get('pos'))]
    if len(req) == 0:
        return make_response(
            {
                'status': 'error',
                'error': 'Cant find a record in the database'
            }, 200)

    tower_data = rdb.get(req['tower'].tolist()[0])
    if tower_data is None:
        tower_df = pd.DataFrame(columns=[
            'ID', 'Rack', 'Date', 'Responsible person', 'Comments', 'pos', 'x',
            'y'
        ])
    else:
        tower_df = pd.DataFrame(json.loads(tower_data))

    if action == 'approve':

        pos = tower_df.loc[
            (tower_df['Rack'].astype(str) == str(data.get('Rack', '0')))
            & (tower_df['pos'] == data.get('pos'))]
        if len(pos) == 0:

            # if added to a new position
            if not req['prev_cell_line'].tolist()[0]:
                to_append = req[[
                    'cell_line', 'Rack', 'Date', 'Responsible person',
                    'Comments', 'pos', 'x', 'y'
                ]]
                to_append.columns = [
                    'ID', 'Rack', 'Date', 'Responsible person', 'Comments',
                    'pos', 'x', 'y'
                ]
                tower_df = tower_df.append(to_append, ignore_index=True)

                rdb.set(data.get('tower'),
                        json.dumps(tower_df.to_dict('list')))

                requests.loc[req.index, 'status'] = 'approved'
                rdb.set('to_approve', json.dumps(requests.to_dict('list')))
                return make_response(
                    {
                        'status': 'success',
                        'info': 'Request has been approved'
                    }, 200)

            # if requested from a postion
            else:
                return make_response(
                    {
                        'status':
                        'error',
                        'error':
                        'You cant request from a current positon, because it is empty'
                    }, 200)

        else:  # len(pos) == 1:
            # remove from pos
            tower_df = tower_df.drop(pos.index)
            rdb.set(data.get('tower'), json.dumps(tower_df.to_dict('list')))
            # change status
            requests.loc[req.index, 'status'] = 'approved'
            rdb.set('to_approve', json.dumps(requests.to_dict('list')))
            return make_response(
                {
                    'status': 'success',
                    'info': 'Request has been approved'
                }, 200)

    elif action == 'decline':
        requests.loc[req.index, 'status'] = 'declined'
        rdb.set('to_approve', json.dumps(requests.to_dict('list')))
        # update number of available tubes
        cell_lines = rdb.get('cell_lines')
        cell_lines = json.loads(cell_lines)
        cell_df = pd.DataFrame(cell_lines)
        curr_cell_line = cell_df.loc[cell_df['ID'] == data.get('cell_line_id')]
        # cell_df.loc[cell_df['ID'] == data.get('cell_line_id'), 'tubes_available'] = \
        #     curr_cell_line['tubes_available'].astype(int) + 1
        rdb.set('cell_lines', json.dumps(cell_df.to_dict('list')))
        return make_response(
            {
                'status': 'success',
                'info': 'Request has been declined'
            }, 200)
    elif action == 'cancel':
        # update number of available tubes
        cell_lines = rdb.get('cell_lines')
        cell_lines = json.loads(cell_lines)
        cell_df = pd.DataFrame(cell_lines)
        curr_cell_line = cell_df.loc[cell_df['ID'] == data.get('cell_line_id')]
        # cell_df.loc[cell_df['ID'] == data.get('cell_line_id'), 'tubes_available'] = \
        #     curr_cell_line['tubes_available'].astype(int) + 1
        rdb.set('cell_lines', json.dumps(cell_df.to_dict('list')))

        # remove from requests
        requests = requests.drop(req.index)
        if len(requests) == 0:
            rdb.delete('to_approve')
        else:
            rdb.set('to_approve', json.dumps(requests.to_dict('list')))
        return make_response(
            {
                'status': 'success',
                'info': 'Request has been cancelled'
            }, 200)
    else:
        make_response(
            {
                'status': 'error',
                'error': 'Unknown action "{}"'.format(action)
            }, 200)

    return make_response({'status': 'success'}, 200)
示例#25
0
def search():
    from main import get_db
    rdb = get_db()
    to_search = request.get_data()
    if to_search is None:
        return make_response({'status': 'error', 'error': 'No input received'})
    to_search = to_search.decode('utf-8').upper()

    to_approve = rdb.get('to_approve')
    if to_approve is not None:
        to_approve = json.loads(to_approve)
        to_approve = pd.DataFrame(to_approve)
    else:
        to_approve = pd.DataFrame(columns=['cell_line', 'prev_cell_line'])

    cell_lines = rdb.get('cell_lines')
    cell_lines = json.loads(cell_lines)
    cell_lines = pd.DataFrame(cell_lines)
    cell_lines = cell_lines.fillna('')

    # search by ID or name
    found = cell_lines.loc[
        (cell_lines['ID'].str.upper().str.contains(to_search)) |
        (cell_lines['Cell line'].str.upper().str.contains(to_search))]
    cell_line_ids = found['ID'].tolist()
    results_df = None

    towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')]
    for tower in towers:
        data = rdb.get(tower)
        if data is None:
            continue
        data = json.loads(data)
        df = pd.DataFrame(data)
        df = pd.merge(df, found, on='ID')
        if len(df) == 0:
            continue

        df['tower'] = tower
        df['status'] = 'confirmed'
        if results_df is None:
            results_df = df
        else:
            results_df = results_df.append(df, ignore_index=True)

    found2 = to_approve.loc[(to_approve['cell_line'].isin(cell_line_ids)) |
                            (to_approve['prev_cell_line'].isin(cell_line_ids))]

    empty = found2.loc[found2['cell_line'] == '']
    found2.loc[empty.index, 'cell_line'] = found2.loc[empty.index,
                                                      'prev_cell_line']

    found2 = pd.merge(found2, cell_lines, left_on='cell_line', right_on='ID')
    found2['status'] = 'pending'

    # results_df = results_df.loc[~results_df['ID'].isin(found2['ID'].tolist())]
    if results_df is not None:
        results_df = results_df.append(found2, ignore_index=True)
    else:
        results_df = found2

    results_df = results_df.fillna('')

    results_df = results_df[[
        'ID', 'Cell line', 'tower', 'Rack', 'pos', 'Responsible person',
        'Date', 'status'
    ]]
    results_df['Rack'] = results_df['Rack'].astype(int)

    results_df = results_df.drop_duplicates(['tower', 'Rack', 'pos'],
                                            keep='last')

    html_result = '<table class="table table-hover table-sm" id="table_search"><tr>'
    for column in results_df.columns:
        html_result += '<th>{}</th>'.format(column)

    html_result += '<th></th><th></th></tr>'

    for index, row in results_df.iterrows():
        if row['status'] == 'pending':
            html_result += '<tr class="table-warning" id="{}_{}_{}">'.format(
                row['tower'], row['Rack'], row['pos'])
        else:
            html_result += '<tr id="{}_{}_{}">'.format(row['tower'],
                                                       row['Rack'], row['pos'])
        for column in results_df:
            if column == 'status':
                span_class = ''
                if row['status'] == 'pending':
                    span_class = 'badge badge-warning'
                elif row['status'] == 'approved' or row[
                        'status'] == 'confirmed':
                    span_class = 'badge badge-success'
                elif row['status'] == 'declined':
                    span_class = 'badge badge-danger'
                html_result += '<td class="{}"><span class="{}">{}</span></td>'.format(
                    column.replace(' ', '_'), span_class, row[column])
            else:
                html_result += '<td class="{}">{}</td>'.format(
                    column.replace(' ', '_'), row[column])

        html_result += '<td><button type="button" class="btn btn-sm btn-outline-primary" id="edit_search">Edit</button></td>'
        if row['status'] != 'pending':
            html_result += '<td><button type="button" class="btn btn-sm btn-outline-secondary request_search" id="request_search">Request</button></td>'
        else:
            html_result += '<td></td>'

    html_result += '</tr>'
    html_result += '</table>'

    return make_response({
        'status': 'success',
        'html_result': html_result
    }, 200)
def get_heatmap(project_id):
    from main import get_db
    rdb = get_db()
    exists = rdb.exists('cpm_coding_{}'.format(project_id))
    if not exists:
        return render_template(
            "heatmap.html",
            no_data=True,
            error="No data for the project {}".format(project_id))

    data = rdb.get('cpm_coding_{}'.format(project_id))
    df = pd.read_msgpack(data)
    samples = sorted(list(df.columns))
    samples.remove('gene_name')

    if request.method == "GET":
        return render_template("heatmap.html",
                               samples=samples,
                               first_group=[],
                               second_group=[])

    first_group = request.form.getlist('first_group')
    if not first_group:
        return render_template("heatmap.html",
                               samples=samples,
                               error="No samples selected")

    second_group = request.form.getlist('second_group')

    filter1 = request.form.get('filter1')
    filter2 = request.form.get('filter2')

    number_of_genes1 = int(request.form.get('number_of_genes1'))
    number_of_genes2 = int(request.form.get('number_of_genes2'))

    list_of_genes = request.form.get('list_of_genes', '').split()
    include_non_coding = request.form.get('include_non_coding', "") == "True"

    if include_non_coding:
        data = rdb.get('cpm_non_coding_{}'.format(project_id))
        df = pd.read_msgpack(data)
    else:
        data = rdb.get('cpm_coding_{}'.format(project_id))
        df = pd.read_msgpack(data)

    if filter1 == 'list_of_genes':
        df1 = None
        for gene in list_of_genes:
            row = df.loc[df['gene_name'] == gene]
            df1 = row if df1 is None else df1.append(row, ignore_index=True)

            # input for clustering: header + df
            data = [['gene_name'] + first_group] + df1.values.tolist()

            cluster = Cluster()
            cluster.read_data(rows=data, header=True)
            cluster.cluster_data()
            dendrogram = Dendrogram(cluster)
            plot_data = dendrogram.create_cluster_heatmap()
            plot_data = json.dumps(plot_data)

            csv_data = df1.to_csv(sep=",", header=True, index=False)
            return render_template("heatmap.html",
                                   plot_data=plot_data,
                                   first_group=first_group,
                                   samples=samples,
                                   list_of_genes=list_of_genes,
                                   number_of_genes1=number_of_genes1,
                                   filter1=filter1,
                                   number_of_genes2=number_of_genes2,
                                   include_non_coding=include_non_coding,
                                   csv_data=csv_data,
                                   filter2=filter2)

    # select samples
    df1 = df[['gene_name'] + first_group]

    # round to 2 decimals
    for sample in first_group:
        df1[sample] = df1[sample].round(2)

    # sort by variance
    if filter1 == 'least':
        df1 = df1.reindex(
            df1.var(axis=1).sort_values(ascending=True).index)  # 1 2 3
    else:
        df1 = df1.reindex(
            df1.var(axis=1).sort_values(ascending=False).index)  # 3 2 1

    # select top genes (or all if number_of_genes is 0)
    if number_of_genes1 != 0:
        df1 = df1[:number_of_genes1]

    if not second_group:
        # input for clustering: header + df
        data = [['gene_name'] + first_group] + df1.values.tolist()

        cluster = Cluster()
        cluster.read_data(rows=data, header=True)
        cluster.cluster_data()
        dendrogram = Dendrogram(cluster)
        plot_data = dendrogram.create_cluster_heatmap()
        plot_data = json.dumps(plot_data)
        csv_data = df1.to_csv(sep=",", header=True, index=False)

        return render_template("heatmap.html",
                               plot_data=plot_data,
                               first_group=first_group,
                               samples=samples,
                               list_of_genes=list_of_genes,
                               number_of_genes1=number_of_genes1,
                               include_non_coding=include_non_coding,
                               filter1=filter1,
                               number_of_genes2=number_of_genes2,
                               csv_data=csv_data,
                               filter2=filter2)

    # if second group
    df2 = df[['gene_name'] + first_group + second_group]

    # pairwise comparisons
    df22 = None
    for sample1 in first_group:
        for sample2 in second_group:
            df2_var = df[[sample1, sample2]].var(axis=1)
            df22 = df2_var if df22 is None else df22 + df2_var
    # average variance
    df22 = df22 / len(second_group)
    # sort by variance
    ascending = filter2 == 'least'
    df22 = df2.reindex(df22.sort_values(ascending=ascending).index)  # 3 2 1

    if number_of_genes2 != 0:
        df22 = df22[:number_of_genes2]

    # not changing genes
    common_genes = list(
        set(df22['gene_name'].tolist()) & set(df1['gene_name'].tolist()))

    all_genes = set(df22['gene_name'].tolist() + df1['gene_name'].tolist())
    common_genes = set(
        set(df22['gene_name'].tolist()) & set(df1['gene_name'].tolist()))
    our_genes = set(df1['gene_name'].tolist()) - common_genes

    if len(our_genes) == 0:
        error = "No common genes found between 2 groups. Try to increase the number of genes"
        return render_template("heatmap.html",
                               first_group=first_group,
                               second_group=second_group,
                               samples=samples,
                               list_of_genes=list_of_genes,
                               number_of_genes1=number_of_genes1,
                               include_non_coding=include_non_coding,
                               filter1=filter1,
                               number_of_genes2=number_of_genes2,
                               filter2=filter2,
                               error=error)

    final_df = df2.loc[df2['gene_name'].isin(our_genes)]
    for sample in first_group + second_group:
        final_df[sample] = final_df[sample].round(2)

    # input for clustering: header + df
    data = [['gene_name'] + first_group + second_group
            ] + final_df.values.tolist()

    cluster = Cluster()
    cluster.read_data(rows=data, header=True)
    cluster.cluster_data()
    dendrogram = Dendrogram(cluster)
    plot_data = dendrogram.create_cluster_heatmap()
    plot_data = json.dumps(plot_data)
    csv_data = final_df.to_csv(sep=",", header=True, index=False)

    return render_template("heatmap.html",
                           samples=samples,
                           first_group=first_group,
                           second_group=second_group,
                           number_of_genes1=number_of_genes1,
                           number_of_genes2=number_of_genes2,
                           filter1=filter1,
                           filter2=filter2,
                           list_of_genes=list_of_genes,
                           include_non_coding=include_non_coding,
                           plot_data=plot_data,
                           csv_data=csv_data,
                           common_genes=len(final_df))
def get_psite_plot(project_id):
    from main import get_db
    rdb = get_db()
    contrasts = rdb.smembers('contrasts_{}'.format(project_id))
    contrasts = sorted([c.decode('utf-8') for c in contrasts])
    if request.method == 'GET':
        return render_template("psite_plot.html", contrasts=contrasts)

    p_data = rdb.get('psites_{}'.format(project_id))
    a_data = rdb.get('asites_{}'.format(project_id))
    e_data = rdb.get('esites_{}'.format(project_id))
    if p_data is None and a_data is None and e_data is None:
        return render_template(
            'psite_plot.html',
            error='No data for project {} found'.format(project_id))

    # if POST
    p_df = pd.DataFrame(json.loads(p_data))
    a_df = pd.DataFrame(json.loads(a_data))
    e_df = pd.DataFrame(json.loads(e_data))

    # getting rid of stop and start codons
    p_df = p_df.loc[~p_df['aa'].isin(['Stp', 'Str'])]
    a_df = a_df.loc[~a_df['aa'].isin(['Stp', 'Str'])]
    e_df = e_df.loc[~e_df['aa'].isin(['Stp', 'Str'])]

    # check if any contrasts selected
    selected = request.form.getlist('selected_contrasts')
    if len(selected) == 0:
        return render_template('psite_plot.html',
                               error='Please select contrasts',
                               contrasts=contrasts)

    # normalization
    norm = request.form.get('normalization', 'tpm')

    # group by codon or by amino acid
    group_by_aa = request.form.get('group_by_codon') != 'codon'

    if group_by_aa:
        p_df = p_df.groupby('aa').sum().reset_index()
        a_df = a_df.groupby('aa').sum().reset_index()
        e_df = e_df.groupby('aa').sum().reset_index()

    # get x categories for highcharts
    if group_by_aa:
        x_categories = p_df['aa'].unique().tolist()
    else:
        x_categories = [
            'GCA',
            'GCC',
            'GCG',
            'GCT',
            '',
            'AGA',
            'CGC',
            'CGA',
            'CGG',
            'CGT',
            'AGG',
            '',
            'AAC',
            'AAT',
            '',
            'GAC',
            'GAT',
            '',
            'TGC',
            'TGT',
            '',
            'CAA',
            'CAG',
            '',
            'GAA',
            'GAG',
            '',
            'GGA',
            'GGC',
            'GGG',
            'GGT',
            '',
            'CAC',
            'CAT',
            '',
            'ATA',
            'ATC',
            'ATT',
            '',
            'CTA',
            'CTC',
            'CTG',
            'CTT',
            'TTA',
            'TTG',
            '',
            'AAA',
            'AAG',
            '',
            'ATG_M',
            '',  # 'ATG_S', '',  # methionine & start codon
            'TTC',
            'TTT',
            '',
            'CCA',
            'CCC',
            'CCG',
            'CCT',
            '',
            'AGC',
            'AGT',
            'TCA',
            'TCC',
            'TCG',
            'TCT',
            '',
            # 'TAA', 'TAG', 'TGA', '',  # skip Stop codons
            'ACA',
            'ACC',
            'ACG',
            'ACT',
            '',
            'TGG',
            '',
            'TAC',
            'TAT',
            '',
            'GTA',
            'GTC',
            'GTG',
            'GTT'
        ]

    max_fc = None
    min_fc = None
    for contrast in selected:
        s1, s2 = contrast.split('__vs__')
        cols = [
            'aa', s1, s2, '{}_{}'.format(norm, s1), '{}_{}'.format(norm, s2)
        ]
        if not group_by_aa:
            cols = cols + ['codon']

        # calculating fc as (sample - control) / control
        p_df[contrast] = (p_df['{}_{}'.format(norm, s1)] - p_df['{}_{}'.format(
            norm, s2)]) / p_df['{}_{}'.format(norm, s2)]
        a_df[contrast] = (a_df['{}_{}'.format(norm, s1)] - a_df['{}_{}'.format(
            norm, s2)]) / a_df['{}_{}'.format(norm, s2)]
        e_df[contrast] = (e_df['{}_{}'.format(norm, s1)] - e_df['{}_{}'.format(
            norm, s2)]) / e_df['{}_{}'.format(norm, s2)]

        # round values
        cols = cols + [contrast]
        p_df[cols] = p_df[cols].round(3)
        a_df[cols] = a_df[cols].round(3)
        e_df[cols] = e_df[cols].round(3)

        if min_fc is None:
            min_fc = min(p_df[contrast].min(), a_df[contrast].min(),
                         e_df[contrast].min())
        else:
            min_fc = min(min_fc, p_df[contrast].min(), a_df[contrast].min(),
                         e_df[contrast].min())
        if max_fc is None:
            max_fc = max(p_df[contrast].max(), a_df[contrast].max(),
                         e_df[contrast].max())
        else:
            max_fc = max(max_fc, p_df[contrast].max(), a_df[contrast].max(),
                         e_df[contrast].max())

    max_fc = max(abs(min_fc), abs(max_fc))
    min_fc = -1 * max_fc
    middle_val = 0

    plot_series = []
    for c in range(len(selected)):
        contrast = selected[c]
        s1, s2 = contrast.split('__vs__')
        cols = [
            'aa', s1, s2, '{}_{}'.format(norm, s1), '{}_{}'.format(norm, s2),
            contrast
        ]
        if not group_by_aa:
            cols = cols + ['codon']
        for i in range(len(x_categories)):
            cat = x_categories[i]
            if cat == '':
                plot_series += [{}]
            else:
                if group_by_aa:
                    cur_p = p_df.loc[p_df['aa'] == cat]
                    cur_e = e_df.loc[e_df['aa'] == cat]
                    cur_a = a_df.loc[a_df['aa'] == cat]
                    codon = ''
                    aa = cat
                else:
                    if cat == 'ATG_M':
                        cur_p = p_df.loc[(p_df['codon'] == 'ATG')
                                         & (p_df['aa'] == 'Met')]
                        cur_e = e_df.loc[(e_df['codon'] == 'ATG')
                                         & (e_df['aa'] == 'Met')]
                        cur_a = a_df.loc[(a_df['codon'] == 'ATG')
                                         & (a_df['aa'] == 'Met')]
                        codon = 'ATG'
                        aa = 'Met'
                    elif cat == 'ATG_S':
                        cur_p = p_df.loc[(p_df['codon'] == 'ATG')
                                         & (p_df['aa'] == 'Str')]
                        cur_e = e_df.loc[(e_df['codon'] == 'ATG')
                                         & (e_df['aa'] == 'Str')]
                        cur_a = a_df.loc[(a_df['codon'] == 'ATG')
                                         & (a_df['aa'] == 'Str')]
                        codon = 'ATG'
                        aa = 'Str'
                    else:
                        cur_p = p_df.loc[p_df['codon'] == cat]
                        cur_e = e_df.loc[e_df['codon'] == cat]
                        cur_a = a_df.loc[a_df['codon'] == cat]
                        codon = cat
                        aa = cur_p.iloc[0]['aa']

                # select cols for current contrast
                cur_p = cur_p[cols]
                cur_a = cur_a[cols]
                cur_e = cur_e[cols]
                cur_p['value'] = cur_p[contrast]
                cur_a['value'] = cur_a[contrast]
                cur_e['value'] = cur_e[contrast]
                cur_p['contrast'] = contrast
                cur_a['contrast'] = contrast
                cur_e['contrast'] = contrast

                # getting plot series
                if len(cur_a) == 0:
                    plot_series += [{
                        'x': i,
                        'y': 0 + c * 4,
                        'codon': codon,
                        'aa': aa,
                        'site': 'A',
                        'value': 0
                    }]
                else:
                    cur_a['x'] = i
                    cur_a['y'] = 0 + c * 4
                    cur_a['site'] = 'A'
                    plot_series += cur_a.to_dict('records')

                if len(cur_p) == 0:
                    plot_series += [{
                        'x': i,
                        'y': 1 + c * 4,
                        'codon': codon,
                        'aa': aa,
                        'site': 'P',
                        'value': 0
                    }]
                else:
                    cur_p['x'] = i
                    cur_p['y'] = 1 + c * 4
                    cur_p['site'] = 'P'
                    plot_series += cur_p.to_dict('records')

                if len(cur_e) == 0:
                    plot_series += [{
                        'x': i,
                        'y': 2 + c * 4,
                        'codon': codon,
                        'aa': aa,
                        'site': 'E',
                        'value': 0
                    }]
                else:
                    cur_e['x'] = i
                    cur_e['y'] = 2 + c * 4
                    cur_e['site'] = 'E'
                    plot_series += cur_e.to_dict('records')
        if group_by_aa:
            plot_series += [{}]

    y_categories = []
    for contrast in selected:
        y_categories += [
            'A-site ({})'.format(contrast), 'P-site ({})'.format(contrast),
            'E-site ({})'.format(contrast), ''
        ]

    group_by_codon = not group_by_aa
    return render_template('psite_plot.html',
                           psite_series=plot_series,
                           contrasts=contrasts,
                           y_categories=y_categories,
                           x_categories=x_categories,
                           min_fc=min_fc,
                           max_fc=max_fc,
                           middle_val=middle_val,
                           group_by_codon=group_by_codon,
                           norm=norm,
                           selected_contrasts=selected,
                           dataset_id=project_id)
def get_volcano_plot(project_id):
    from main import get_db
    rdb = get_db()
    contrasts = rdb.smembers('contrasts_{}'.format(project_id))
    contrasts = sorted([c.decode('utf-8') for c in contrasts])
    if request.method == 'GET':
        return render_template("volcano_plot.html", contrasts=contrasts)

    #  dash lines
    left = float(request.form.get('left'))
    right = float(request.form.get('right'))
    bottom = float(request.form.get('bottom'))

    left_line = round(math.log2(left), 3)
    right_line = round(math.log2(right), 3)
    bottom_line = -round(math.log10(bottom), 3)

    # else: (if request is POST) #
    contrast = request.form.get('contrast')
    if contrast is None:
        return render_template("volcano_plot.html", contrasts=contrasts, error='Contrast is not selected')

    # else: (if contrast is not None) #
    data = rdb.get('volcano_{}_{}'.format(project_id, contrast))
    if data is None:
        return render_template("volcano_plot.html", contrasts=contrasts, error='No data found for contrast: {}'.format(contrast))

    # else: (if data is not None) #
    data = json.loads(data)
    df = pd.DataFrame(data)

    df = df.round(decimals=3)

    df['-log10(pval)'] = -1 * df['pvalue'].apply(lambda x: math.log10(x))

    df['fc'] = 2 ** df['log2FoldChange']
    df = df.drop('padj', axis=1)
    df.columns = ['x', 'pvalue', 'gene', 'y', 'fc']
    df = df.round(decimals=3)

    # thresholds
    asp_df = df.loc[df['gene'].isin(asp_top200)]
    pro_df = df.loc[df['gene'].isin(pro_top200)]

    left_df = df.loc[(df['fc'] <= left) & (df['pvalue'] <= bottom) & ~df['gene'].isin(asp_top200) & ~df['gene'].isin(pro_top200)]
    right_df = df.loc[(df['fc'] >= right) & (df['pvalue'] <= bottom) & ~df['gene'].isin(asp_top200) & ~df['gene'].isin(pro_top200)]
    bottom_df = df[~df.isin(left_df) & ~df.isin(right_df) & ~df.isin(asp_df) & ~df.isin(pro_df)]

    # series
    plot_series = [{
        'name': contrast,
        'data': list(bottom_df.dropna().T.to_dict().values()),
        'turboThreshold': len(bottom_df),
        'marker': {
            'symbol': 'circle',
            'radius': 5,
        },
        'color': 'grey',
    },
        {
            'name': contrast,
            'data': list(left_df.dropna().T.to_dict().values()),
            'turboThreshold': len(left_df),
            'color': 'blue',
            'marker': {
                'symbol': 'circle',
                'radius': 5,
            },
        },
        {
            'name': contrast,
            'data': list(right_df.dropna().T.to_dict().values()),
            'turboThreshold': len(right_df),
            'color': 'red',
            'marker': {
                'symbol': 'circle',
                'radius': 5,
            },
        },
        {
            'name': 'ASP',
            'data': list(asp_df.dropna().T.to_dict().values()),
            'turboThreshold': len(asp_df),
            'color': '#99ffcc',
            'marker': {
                'symbol': 'circle',
                'radius': 5,
            }
        },
        {
            'name': 'PRO',
            'data': list(pro_df.dropna().T.to_dict().values()),
            'turboThreshold': len(pro_df),
            'color': '#ff6699',
            'marker': {
                'symbol': 'circle',
                'radius': 5,
            }

        }
    ]

    return render_template('volcano_plot.html', contrasts=contrasts, selected_contrast=contrast, plot_series=plot_series,
                           right=right_line, left=left_line, bottom=bottom_line, selected_thresholds={
                               'left': left,
                               'right': right,
                               'bottom': bottom,
                           })
示例#29
0
import web
from main import get_session, get_render, get_db
session = get_session()
render = get_render()
db = get_db()

class login:
	def GET(self):
		return render.base(render.login(), session)
	def POST(self):
		input = web.input()
		result = db.query("SELECT * FROM auth WHERE username = $username AND password = $password", 
						{
						'username': input.username,
						'password': input.password
						})
		if result:
			session.username = input.username
			return web.seeother("/")
		else:
			return render.base(render.login(), session)

class register:
	def GET(self):
		return render.base(render.register(), session)
	def POST(self):
		input = web.input()
		db.insert("auth", username = input.username, password = input.password)
		return web.seeother("/login")

class logout:
示例#30
0
def client():
    with app.test_client() as client:
        with app.app_context():
            get_db()
        yield client
示例#31
0
def get_ma_plot(project_id):
    from main import get_db
    rdb = get_db()
    contrasts = rdb.smembers("contrasts_{}".format(project_id))
    contrasts = sorted([c.decode('utf-8') for c in contrasts])

    if request.method == "GET":
        no_data = len(contrasts) == 0  # if no contrasts, no_data will be True
        #
        # df = pd.read_msgpack(bi_df)
        return render_template("ma_plot.html",
                               contrasts=contrasts,
                               no_data=no_data)

    contrast = request.form.get('contrast')
    apply_filters = request.form.get('apply_filters') == "true"  # is not None
    pval = request.form.get('pval')
    fc = request.form.get('fc')
    min_counts = request.form.get('min_counts')
    max_counts = request.form.get('max_counts')
    filters = {
        'pval': pval,
        'fc': fc,
        'min_counts': min_counts,
        'max_counts': max_counts,
    }
    bi_df = rdb.get('ma_plot_all_{}_{}'.format(project_id, contrast))
    if not bi_df:
        return render_template(
            "ma_plot.html",
            error="No data for the contrast {}".format(contrast),
            selected_contrast=contrast,
            contrasts=contrasts,
            apply_filters=apply_filters,
            filters=filters)
    df = pd.read_msgpack(bi_df)
    # columns: ['baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'transcript']
    df = df.rename({'baseMean': 'x', 'log2FoldChange': 'y'}, axis='columns')
    df = df.fillna('')
    if apply_filters:
        if pval != '':
            pval = float(pval)
            df = df.loc[df["pvalue"] <= pval]
        if fc != '':
            fc = float(fc)
            df = df.loc[(df["y"] <= fc) & (df["y"] >= -1 * fc)]
        if min_counts != '':
            min_counts = int(min_counts)
            df = df.loc[df["x"] >= min_counts]
        if max_counts != '':
            max_counts = int(max_counts)
            df = df.loc[df["x"] <= max_counts]

    plot_series = {
        'name': contrast.replace('__', ' '),
        'data': df.to_dict('records')
    }
    return render_template("ma_plot.html",
                           contrasts=contrasts,
                           selected_contrast=contrast,
                           plot_series=plot_series,
                           apply_filters=apply_filters,
                           genes=len(df),
                           filters=filters)
示例#32
0
def get_liquid_nitrogen():
    # Rack colors and values:
    # empty: white - 0, full: red - 1, to approve: yellow - 2
    from main import get_db
    y_pos = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
    rdb = get_db()
    towers = [tower.decode('utf-8') for tower in rdb.smembers('towers')]
    series = {}
    to_approve = rdb.get('to_approve')
    if to_approve is not None:
        to_approve = json.loads(to_approve)
        to_approve = pd.DataFrame(to_approve)
    else:
        to_approve = pd.DataFrame(columns=[
            'tower', 'pos', 'Rack', 'x', 'y', 'Responsible person', 'Date',
            'Comments', 'cell_line', 'prev_cell_line', 'prev_responsible',
            'prev_comments', 'prev_date', 'status'
        ])
    to_approve = to_approve.fillna('')
    towers = set(towers + to_approve['tower'].tolist())

    user_requests = to_approve.loc[to_approve['Responsible person'] ==
                                   current_user.email]
    user_requests = user_requests[[
        'tower', 'Rack', 'pos', 'cell_line', 'prev_cell_line', 'Comments',
        'Date', 'Responsible person', 'status'
    ]]
    user_requests = user_requests[::-1]  # reverse order
    if len(user_requests) > 10:
        user_requests = user_requests[:10]
    to_approve = to_approve.loc[to_approve['status'] == 'pending']
    for tower in towers:
        data = rdb.get(tower)
        racks = []
        if data is not None:
            data = json.loads(data)
            df = pd.DataFrame(data)
            df = df.fillna('null')
            racks += set(
                list(df['Rack'].astype(str).unique()) +
                list(to_approve.loc[to_approve['tower'] == tower,
                                    'Rack'].astype(str).unique()))
        else:
            racks += to_approve['Rack'].astype(str).unique()
            df = pd.DataFrame(columns=[
                'Comments', 'Date', 'ID', 'Rack', 'Responsible person', 'pos',
                'x', 'y'
            ])
        for rack in racks:
            rack_series = []
            for y in y_pos:
                for x in range(1, 11):
                    approved = to_approve.loc[
                        (to_approve['tower'] == tower)
                        & (to_approve['Rack'].astype(str) == rack) &
                        (to_approve['y'] == y) &
                        (to_approve['x'].astype(int) == x)]
                    df1 = df.loc[(df['Rack'].astype(str) == rack)
                                 & (df['y'] == y) & (df['x'].astype(int) == x)]
                    if len(approved) != 0:
                        rack_series.append({
                            'pos':
                            '{}{}'.format(y, x),
                            'Rack':
                            rack,
                            'x':
                            x - 1,
                            'y':
                            y_pos.index(y),
                            'value':
                            2,  # means to approve
                            'color':
                            '#ffcc00',  # yellow
                            'Responsible person':
                            approved.iloc[0]['Responsible person'],
                            'ID':
                            approved.iloc[0]['cell_line'],
                            'Date':
                            approved.iloc[0]['Date'],
                            'status':
                            'to_confirm',
                            'prev_cell_line':
                            approved.iloc[0]
                            ['prev_cell_line'],  # cell_line_IDs
                            'prev_responsible':
                            approved.iloc[0]['prev_responsible'],
                            'prev_comments':
                            approved.iloc[0]['prev_comments'],
                            'prev_date':
                            approved.iloc[0]['prev_date'],
                        })
                    elif len(df1) != 0:
                        df1['pos'] = df1['y'].astype(str) + df1['x'].astype(
                            str)
                        df1['y'] = y_pos.index(y)
                        df1['x'] = df1['x'].astype(int) - 1
                        df1['color'] = '#F4796E'  # red
                        df1['value'] = 1  # means confirmed

                        rack_series.append(df1.iloc[0].to_dict())
                    else:
                        rack_series.append({
                            'pos': '{}{}'.format(y, x),
                            'Rack': rack,
                            'x': x - 1,
                            'y': y_pos.index(y),
                            'value': 0,  # means empty
                            'color': '#FFFFFF',
                            'Tower': tower,
                        })
            key = '{}_Rack{}'.format(tower, rack)
            series[key] = rack_series
    cell_lines = rdb.get('cell_lines')
    if cell_lines is None:
        return render_template('liquid_nitrogen.html', error='No data found')

    cell_lines = json.loads(cell_lines)
    cell_lines = pd.DataFrame(cell_lines)
    cell_lines['tubes_available'] = cell_lines['tubes_available'].fillna(0)

    available_cell_lines = cell_lines.loc[
        cell_lines['tubes_available'].astype(int) != 0]
    available_cell_lines = available_cell_lines[[
        'ID', 'Cell line', 'tubes_available'
    ]]
    available_cell_lines = available_cell_lines.to_dict('records')

    cell_lines = cell_lines.fillna('')
    cell_lines.index = cell_lines['ID']
    cell_lines = cell_lines.to_dict('index')
    cell_lines_dropdown = [{'value': 'add_new', 'text': 'Add new'}]
    for key in cell_lines.keys():
        cell_lines_dropdown.append({
            'value': key,
            'text': key,
        })

    liquid_nitrogen_admins = current_app.config.get('LIQUID_NITROGEN_ADMINS')
    if liquid_nitrogen_admins is None:
        liquid_nitrogen_admins = []

    users = rdb.hgetall('users')
    users = [] if users is None else users.keys()
    users = [user.decode('utf-8') for user in users]

    to_approve = to_approve[[
        'tower', 'Rack', 'pos', 'cell_line', 'prev_cell_line', 'Comments',
        'Date', 'Responsible person'
    ]]
    to_approve_data = to_approve.to_dict('records')
    if current_user.email in liquid_nitrogen_admins:
        user_requests = user_requests.to_dict('records')

        return render_template('liquid_nitrogen.html',
                               series=series,
                               cell_lines_dropdown=cell_lines_dropdown,
                               cell_lines=json.dumps(cell_lines).replace(
                                   """\xa0""", " "),
                               to_approve=to_approve_data,
                               admin=True,
                               users=users,
                               current_user=current_user.email,
                               available_cell_lines=available_cell_lines,
                               user_requests=user_requests)
    else:

        user_requests = user_requests.to_dict('records')
        return render_template('liquid_nitrogen.html',
                               series=series,
                               cell_lines_dropdown=cell_lines_dropdown,
                               to_approve=to_approve_data,
                               cell_lines=json.dumps(cell_lines).replace(
                                   """\xa0""", " "),
                               user_requests=user_requests,
                               admin=False,
                               users=users,
                               current_user=current_user.email,
                               available_cell_lines=available_cell_lines)
示例#33
0
 def setUp(self):
     with app.app_context():
         from main import get_db
         self.db = get_db()
         self.object_to_remove = []