def main(): db = MongoDB() # Creates main supplementary table with GeVIR, LOEUF and VIRLoF scores # is required to run "draw_web_gene_scores" method in figures.py # enrichment_offset is used to calculate AD/AR fold-enrichment # 5% offset = 18,352 / 20 ~= 918 # 5% offset = 19,361 / 20 ~= 968 # Supplementary Table 2 export_gene_scores(db, enrichment_offset=AD_AR_ENRICHMENT_OFFSET, include_gene_groups=True)
def main(): db = MongoDB() # Create "variant_regions" collection from gnomAD variants data # This method does not have to be rerun to recreate GeVIR scores. calculate_all_transcripts_regions(db) # Create GeVIR scores, requires "variant_regions" collection # If regions collection was created with INCLUDE_GNOMAD_OUTLIERS=True, # only these methods have to be rerun with INCLUDE_GNOMAD_OUTLIERS=False # to create gene scores for a dataset without outliers # Creates Supplementary Tables 7 and 8 create_gevir_scores(db, no_gerp=True) create_gevir_scores(db, no_gerp=False)
def main(): db = MongoDB() # For the first run, please carefully read the comments and uncomment all of the following functions: # These functions create "variants_hwe_pop" collection which stores # data related to variant deviation from Hardy-Weinberg Equilibrium in each population. # "variants_hwe_pop" collection is then updated with aggregated frequencies of alternative alleles (alt_af), # which are used for further variant filtering (high alt_af might compromise HWE analysis results). #analyse_gnomad_variants_deviations_from_hwe(db) #add_alt_af_data(db) # These functions create temporary "variants_hwe_regions" collection # and add flags to "variants_hwe_pop" which indicate whether variants # are located in tandem repeat and segmental duplication regions or not. #create_hwe_variants_regions(db) #update_variants_hwe_pop_with_region_data(db) # This function obtains Allele Balance (AB) data from gnomAD for rare variants (i.e. 0.001<=AF<=RARE_HET_EXCESS_MAX_AF (0.1)) # It creates "rare_variants_ab" collection, which is used for further variant filtering (low AB might be a sign of sequencing errors). # !!! IMPORTANT !!! # It works with gnomAD API (i.e. requires internet connection) and can take up to a couple of days to run!!! ######create_rare_variants_ab(db) # This function creates a dataset ("rare_het_excess_variants" collection) of variants with heterozygous excess (HetExc) #create_rare_het_excess_variants(db) # This function marks HetExc variants with skewed allele balance (>0.9 and 0.8) #update_rare_het_excess_variants_with_skeweb_ab_stats(db, remove=False) # These functions export HetExc variant chromosomal coordinates for LiftOver conversion (this has to be done manually), # import converted variants (./tables/het_exc_variants_build_38.csv) back to the "rare_het_excess_variants" collection # and use these new coordinates to get allele data from gnomAD v3 via API. #export_rare_het_exc_variants_for_lift_over(db) #import_rare_het_exc_variants_lift_over_results(db) #update_rare_het_exc_variants_with_gnomad_3_data(db, clean=False) # Multiple Test corrections, not used since they are too conservative. # HBB example was found, but not CFTR: #calculate_multiple_testing_adjustments(db) #add_multiple_testing_adjustments(db, remove=False) #export_adjusted_p_value_stats(db) #calculate_eas_inbreeding_coeff_variants(db) pass
def main(): db = MongoDB() # Creates additional database (gerp) with gerp score for each chromosomal position # IMPORTANT: this operation might require quite a lot of time to run (progress for each chromosome will be displayed) # Final database size should be ~36.6 GB (storage size occupied on disk) import_gerp(db) # ENS CDS FASTA import_ens_cds_fasta(db, ENS_CDS_FASTA, 'ens_cds_fasta') import_ens_cds_fasta(db, ENS_AA_FASTA, 'ens_aa_fasta') # gnomAD scores (22/10/18) import_gnomad_scores(db, new_gnomad_file=False) # OMIM data donwloaded (11/13/18) import_omim(db) # ClinVar data donwloaded (21/08/18) import_clin_var(db) # Conservative Coding RegionS # https://s3.us-east-2.amazonaws.com/ccrs/ccr.html import_ccrs(db) count_gene_ccrs(db) # Mac Arthur Datasets # https://github.com/macarthur-lab/gene_lists import_mac_arthur_gene_lists(db) # Mouse Het Lethal Knockout Genes (5/02/19) # http://www.mousemine.org/mousemine/templates.do # Mammalian phenotypes (MP terms) --> Mouse genes and models # Search for *lethal* # Alternatively, use following method to query mousemine database: # query_mousemine_to_create_mouse_het_lethal_knockout_genes() import_mouse_het_lethal_knockout_genes(db) # HUGO (15/11/19) import_hugo_genes(db)
def main(): db = MongoDB()
import re import requests import pymongo from flask import Blueprint, render_template, jsonify, request from config import MONGO_IP from interface.service import Assert from common import generate_id, MongoDB interface = Blueprint('interface', __name__, static_folder='interface_static', template_folder='interface_templates') db = MongoDB(MONGO_IP, 27017) @interface.route('/debug') def page_debug(): return render_template('interface_debug.html') @interface.route('/edit/<id>') def page_edit(id): return render_template('interface_edit.html') @interface.route('/api/v2/load_api', methods=['POST']) def load_api(): data = request.get_json() db.switch_database_collection('interface', 'api') return jsonify({ 'status_code': 200, 'message': 'ok', 'data': db.find_one(data) })
def main(): db = MongoDB() create_common_gene_scores(db)