Exemplo n.º 1
0
def main():
	db = MongoDB()
	# Creates main supplementary table with GeVIR, LOEUF and VIRLoF scores
	# is required to run "draw_web_gene_scores" method in figures.py
	# enrichment_offset is used to calculate AD/AR fold-enrichment
	# 5% offset = 18,352 / 20 ~= 918
	# 5% offset = 19,361 / 20 ~= 968
	# Supplementary Table 2
	export_gene_scores(db, enrichment_offset=AD_AR_ENRICHMENT_OFFSET, include_gene_groups=True)
Exemplo n.º 2
0
def main():
    db = MongoDB()
    # Create "variant_regions" collection from gnomAD variants data
    # This method does not have to be rerun to recreate GeVIR scores.
    calculate_all_transcripts_regions(db)

    # Create GeVIR scores, requires "variant_regions" collection
    # If regions collection was created with INCLUDE_GNOMAD_OUTLIERS=True,
    # only these methods have to be rerun with INCLUDE_GNOMAD_OUTLIERS=False
    # to create gene scores for a dataset without outliers
    # Creates Supplementary Tables 7 and 8
    create_gevir_scores(db, no_gerp=True)
    create_gevir_scores(db, no_gerp=False)
Exemplo n.º 3
0
def main():
    db = MongoDB()
    # For the first run, please carefully read the comments and uncomment all of the following functions:

    # These functions create "variants_hwe_pop" collection which stores
    # data related to variant deviation from Hardy-Weinberg Equilibrium in each population.
    # "variants_hwe_pop" collection is then updated with aggregated frequencies of alternative alleles (alt_af),
    # which are used for further variant filtering (high alt_af might compromise HWE analysis results).
    #analyse_gnomad_variants_deviations_from_hwe(db)
    #add_alt_af_data(db)

    # These functions create temporary "variants_hwe_regions" collection
    # and add flags to "variants_hwe_pop" which indicate whether variants
    # are located in tandem repeat and segmental duplication regions or not.
    #create_hwe_variants_regions(db)
    #update_variants_hwe_pop_with_region_data(db)

    # This function obtains Allele Balance (AB) data from gnomAD for rare variants (i.e. 0.001<=AF<=RARE_HET_EXCESS_MAX_AF (0.1))
    # It creates "rare_variants_ab" collection, which is used for further variant filtering (low AB might be a sign of sequencing errors).
    # !!! IMPORTANT !!!
    # It works with gnomAD API (i.e. requires internet connection) and can take up to a couple of days to run!!!
    ######create_rare_variants_ab(db)

    # This function creates a dataset ("rare_het_excess_variants" collection) of variants with heterozygous excess (HetExc)
    #create_rare_het_excess_variants(db)
    # This function marks HetExc variants with skewed allele balance (>0.9 and 0.8)
    #update_rare_het_excess_variants_with_skeweb_ab_stats(db, remove=False)

    # These functions export HetExc variant chromosomal coordinates for LiftOver conversion (this has to be done manually),
    # import converted variants (./tables/het_exc_variants_build_38.csv) back to the "rare_het_excess_variants" collection
    # and use these new coordinates to get allele data from gnomAD v3 via API.
    #export_rare_het_exc_variants_for_lift_over(db)
    #import_rare_het_exc_variants_lift_over_results(db)
    #update_rare_het_exc_variants_with_gnomad_3_data(db, clean=False)

    # Multiple Test corrections, not used since they are too conservative.
    # HBB example was found, but not CFTR:
    #calculate_multiple_testing_adjustments(db)
    #add_multiple_testing_adjustments(db, remove=False)
    #export_adjusted_p_value_stats(db)

    #calculate_eas_inbreeding_coeff_variants(db)

    pass
Exemplo n.º 4
0
def main():
    db = MongoDB()
    # Creates additional database (gerp) with gerp score for each chromosomal position
    # IMPORTANT: this operation might require quite a lot of time to run (progress for each chromosome will be displayed)
    # Final database size should be ~36.6 GB (storage size occupied on disk)
    import_gerp(db)

    # ENS CDS FASTA
    import_ens_cds_fasta(db, ENS_CDS_FASTA, 'ens_cds_fasta')
    import_ens_cds_fasta(db, ENS_AA_FASTA, 'ens_aa_fasta')

    # gnomAD scores (22/10/18)
    import_gnomad_scores(db, new_gnomad_file=False)

    # OMIM data donwloaded (11/13/18)
    import_omim(db)

    # ClinVar data donwloaded (21/08/18)
    import_clin_var(db)

    # Conservative Coding RegionS
    # https://s3.us-east-2.amazonaws.com/ccrs/ccr.html
    import_ccrs(db)
    count_gene_ccrs(db)

    # Mac Arthur Datasets
    # https://github.com/macarthur-lab/gene_lists
    import_mac_arthur_gene_lists(db)

    # Mouse Het Lethal Knockout Genes (5/02/19)
    # http://www.mousemine.org/mousemine/templates.do
    # Mammalian phenotypes (MP terms) --> Mouse genes and models
    # Search for *lethal*
    # Alternatively, use following method to query mousemine database:
    # query_mousemine_to_create_mouse_het_lethal_knockout_genes()

    import_mouse_het_lethal_knockout_genes(db)

    # HUGO (15/11/19)
    import_hugo_genes(db)
Exemplo n.º 5
0
def main():
    db = MongoDB()
Exemplo n.º 6
0
import re
import requests
import pymongo
from flask import Blueprint, render_template, jsonify, request
from config import MONGO_IP
from interface.service import Assert
from common import generate_id, MongoDB

interface = Blueprint('interface', __name__, static_folder='interface_static', template_folder='interface_templates')
db = MongoDB(MONGO_IP, 27017)


@interface.route('/debug')
def page_debug():
    return render_template('interface_debug.html')


@interface.route('/edit/<id>')
def page_edit(id):
    return render_template('interface_edit.html')


@interface.route('/api/v2/load_api', methods=['POST'])
def load_api():
    data = request.get_json()
    db.switch_database_collection('interface', 'api')
    return jsonify({
        'status_code': 200,
        'message': 'ok',
        'data': db.find_one(data)
    })
Exemplo n.º 7
0
def main():
    db = MongoDB()
    create_common_gene_scores(db)