def get_unique_users(): """ Counts the number of unique mapping users Cached weekly """ users = query_item('user', projection=['user_email']) return len(users)
def reports(self): filters = [('user_id', '=', self.user_id)] # Note this requires a composite index defined very precisely. results = query_item('trait', filters=filters, order=['user_id', '-created_on']) results = sorted(results, key=lambda x: x['created_on'], reverse=True) results_out = defaultdict(list) for row in results: results_out[row['report_slug']].append(row) # Generate report objects return results_out
def validate_report_name_unique(form, field): """ Checks to ensure that the report name submitted is unique. """ report_slug = slugify(form.report_name.data) try: reports = query_item('trait', filters=[('report_slug', '=', report_slug)]) if len(reports) > 0: raise ValidationError(f"That report name is not available. Choose a unique report name") except BadRequest: raise ValidationError(f"Backend Error")
def get_latest_public_mappings(): """ Returns the 5 most recent mappings """ recent_traits = list(query_item('trait', filters=[('is_public', '=', True), ('status', '=', 'complete')], projection=('report_slug', 'trait_name', 'created_on',), limit=5)) for trait in recent_traits: trait['created_on'] = arrow.get(int(trait['created_on'])/1e6) return recent_traits
def report_data(report_slug): trait_set = query_item('trait', filters=[('report_slug', '=', report_slug)]) # Get first report if available. try: trait = trait_set[0] except IndexError: try: trait_set = query_item('trait', filters=[('secret_hash', '=', report_slug)]) trait = trait_set[0] except IndexError: flash('Cannot find report', 'danger') return abort(404) return Response(trait['trait_data'], mimetype="text/csv", headers={ "Content-disposition": "attachment; filename=%s.tsv" % report_slug })
def mapping_interval(report_name, trait_name, peak): try: trait = query_item('trait', filters=[('report_trait', '=', f"{report_name}:{trait_name}")])[0] except IndexError: err = f"Report - Trait not found: {report_slug}:{trait_name}" logger.error(err) return err, 404 trait = trait_m(trait.key.name) interval_summary = trait.get_gs_as_dataset( "interval_variants.tsv.gz").fillna("") interval_summary = interval_summary[interval_summary.peak == peak.replace( "_", ":")] interval_summary = interval_summary.loc[:, ("CHROM", "POS", "REF", "ALT", "impact", "effect", "aa_change", "gene_name", "gene_id", "corrected_spearman_cor_p")] interval_summary['color'] = interval_summary.impact.apply( lambda x: impact_colors[x]) try: interval_summary['name'] = interval_summary.apply( lambda x: f"{x.gene_name} ({x.gene_id}) - {x.effect}\n{x.aa_change}", axis=1) except ValueError: columns = ("CHROM", "POS", "REF", "ALT", "impact", "effect", "aa_change", "gene_name", "gene_id", "corrected_spearman_cor_p") return jsonify(None) # Take top 25 most correlated genes. top_genes = list(interval_summary.groupby('gene_id') \ .corrected_spearman_cor_p \ .apply(lambda x: max(x)) \ .nlargest(25) \ .reset_index() \ .gene_id.values) #interval_summary = interval_summary[interval_summary['gene_id'].isin(top_genes)] interval_summary = interval_summary[interval_summary['gene_id'].isin( top_genes)][:500] out = { k: list(interval_summary[k]) for k in interval_summary.columns.values } return jsonify(out)
def get_mappings_summary(): """ Generates the cumulative sum of reports and traits mapped. Cached daily """ traits = query_item('trait') traits = pd.DataFrame.from_dict(traits) traits.created_on = traits.apply(lambda x: arrow.get(str(x['created_on'])[:-6]).date().isoformat(), axis=1) trait_df = traits.groupby('created_on').size().reset_index(name='traits') report_df = traits[['report_slug', 'created_on']].drop_duplicates().groupby('created_on').size().reset_index(name='reports') df = pd.merge(report_df, trait_df, how='outer').fillna(0).sort_values('created_on') df.reports = df.reports.cumsum() df.traits = df.traits.cumsum() return df
def public_mapping(): query = request.args.get("query") title = "Public Mappings" pub_mappings = query_item('mapping', filters=[('is_public', '=', True)]) return render_template('public_mapping.html', **locals())
def report_view(report_slug, trait_name=None, rerun=None): """ This view will handle logic of handling legacy reports and v2 reports. """ trait_set = query_item('trait', filters=[('report_slug', '=', report_slug)]) # Get first report if available. try: trait = trait_set[0] except IndexError: try: trait_set = query_item('trait', filters=[('secret_hash', '=', report_slug)]) trait = trait_set[0] except IndexError: flash('Cannot find report', 'danger') return abort(404) # Enable reruns if rerun: trait_set = [x for x in trait_set if x['trait_name'] == trait_name] for n, existing_trait in enumerate(trait_set): logger.info(n) logger.info(existing_trait.key) delete_item(existing_trait) trait = trait_m(trait_set[0]) mapping_items = query_item('mapping', filters=[('report_slug', '=', report_slug), ('trait_slug', '=', trait_name)]) for existing_mapping in mapping_items: delete_item(existing_mapping) trait.status = "Rerunning" # Running the task will save it. trait.run_task() return redirect( url_for('mapping.report_view', report_slug=report_slug, trait_name=trait_name)) # Verify user has permission to view report user = session.get('user') if not trait.get('is_public'): if user: user_id = user.get('user_id') else: user_id = None if trait['secret_hash'] != report_slug and user_id != trait['user_id']: flash('You do not have access to that report', 'danger') return abort(404) if not trait_name: logger.error("Trait name not found") # Redirect to the first trait return redirect( url_for('mapping.report_view', report_slug=report_slug, trait_name=trait_set[0]['trait_name'])) try: # Resolve REPORT --> TRAIT # Fetch trait and convert to trait object. cur_trait = [x for x in trait_set if x['trait_name'] == trait_name][0] trait = trait_m(cur_trait.key.name) trait.__dict__.update(cur_trait) logger.info(trait) except IndexError: return abort(404) VARS = { 'title': trait.report_name, 'subtitle': trait_name, 'trait_name': trait_name, 'report_slug': report_slug, 'trait': trait, 'trait_set': trait_set, 'BIOTYPES': BIOTYPES, 'TABLE_COLORS': TABLE_COLORS, 'n_peaks': 0 } # Set status to error if the container is stopped and status is not set to complete. if trait.container_status() == 'STOPPED' and trait.status != "complete": trait.status = 'error' trait.save() if trait.status == 'complete': if trait.REPORT_VERSION == 'v1': """ VERSION 1 """ phenotype_data = trait.get_gs_as_dataset("tables/phenotype.tsv") isotypes = list(phenotype_data.iloc[:, 1].dropna().values) phenotype_data = list(phenotype_data.iloc[:, 3].values) VARS.update({ 'phenotype_data': phenotype_data, 'isotypes': isotypes }) if trait.is_significant: interval_summary = trait.get_gs_as_dataset("tables/interval_summary.tsv.gz") \ .rename(index=str, columns={'gene_w_variants': 'genes w/ variants'}) try: variant_correlation = trait.get_gs_as_dataset( "tables/variant_correlation.tsv.gz") max_corr = variant_correlation.groupby( ['gene_id', 'interval']).apply(lambda x: max(abs(x.correlation))) max_corr = max_corr.reset_index().rename( index=str, columns={0: 'max_correlation'}) variant_correlation = pd.merge(variant_correlation, max_corr, on=['gene_id', 'interval']) \ .sort_values(['max_correlation', 'gene_id'], ascending=False) except (urllib.error.HTTPError, pd.errors.EmptyDataError): variant_correlation = [] peak_summary = trait.get_gs_as_dataset( "tables/peak_summary.tsv.gz") peak_summary['interval'] = peak_summary.apply( lambda row: f"{row.chrom}:{row.interval_start}-{row.interval_end}", axis=1) first_peak = peak_summary.iloc[0] VARS.update({ 'peak_summary': peak_summary, 'first_peak': first_peak, 'n_peaks': len(peak_summary), 'variant_correlation': variant_correlation, 'interval_summary': interval_summary }) elif trait.REPORT_VERSION == "v2": """ VERSION 2 """ # If the mapping is complete: # Phenotype plot phenotype_plot = plotly_distplot(trait._trait_df, trait_name) VARS.update({'phenotype_plot': phenotype_plot}) # Fetch datafiles for complete runs VARS.update({'n_peaks': 0}) if trait.is_significant: peak_summary = trait.get_gs_as_dataset("peak_summary.tsv.gz") try: first_peak = peak_summary.loc[0] chrom, interval_start, interval_end = re.split( ":|\-", first_peak['interval']) first_peak.chrom = chrom first_peak.pos = int(first_peak['peak_pos'].split(":")[1]) first_peak.interval_start = int(interval_start) first_peak.interval_end = int(interval_end) except: first_peak = None try: variant_correlation = trait.get_gs_as_dataset( "interval_variants.tsv.gz") except (pd.errors.EmptyDataError): variant_correlation = pd.DataFrame() interval_summary = trait.get_gs_as_dataset("interval_summary.tsv.gz") \ .rename(index=str, columns={'gene_w_variants': 'genes w/ variants'}) peak_marker_data = trait.get_gs_as_dataset( "peak_markers.tsv.gz") peak_summary = trait.get_gs_as_dataset("peak_summary.tsv.gz") VARS.update({ 'pxg_plot': pxg_plot(peak_marker_data, trait_name), 'interval_summary': interval_summary, 'variant_correlation': variant_correlation, 'peak_summary': peak_summary, 'n_peaks': len(peak_summary), 'isotypes': list(trait._trait_df.ISOTYPE.values), 'first_peak': first_peak }) # To handle report data, functions specific # to the version will be required. report_template = f"reports/{trait.REPORT_VERSION}.html" return render_template(report_template, **VARS)