def case( vcf, vcf_sv, vcf_cancer, vcf_cancer_sv, vcf_str, owner, ped, update, config, no_variants, peddy_ped, peddy_sex, peddy_check, keep_actions, ): """Load a case into the database. A case can be loaded without specifying vcf files and/or bam files """ adapter = store if config is None and ped is None: LOG.warning("Please provide either scout config or ped file") raise click.Abort() # Scout needs a config object with the neccessary information # If no config is used create a dictionary config_raw = yaml.load(config, Loader=yaml.FullLoader) if config else {} try: config_data = parse_case_data( config=config_raw, ped=ped, owner=owner, vcf_snv=vcf, vcf_sv=vcf_sv, vcf_str=vcf_str, vcf_cancer=vcf_cancer, vcf_cancer_sv=vcf_cancer_sv, peddy_ped=peddy_ped, peddy_sex=peddy_sex, peddy_check=peddy_check, ) except SyntaxError as err: LOG.warning(err) raise click.Abort() except KeyError as err: LOG.error("KEYERROR {} missing when loading '{}'".format(err, config.name)) LOG.debug("Stack trace: {}".format(traceback.format_exc())) raise click.Abort() LOG.info("Use family %s" % config_data["family"]) try: case_obj = adapter.load_case(config_data, update, keep_actions) except Exception as err: LOG.error("Something went wrong during loading") LOG.warning(err) raise click.Abort()
def test_parse_case_ped_file(ped_file): # GIVEN a pedigree with three samples with open(ped_file, 'r') as case_lines: # WHEN parsing out relevant sample info config_data = parse_case_data(ped=case_lines, owner='cust000') # THEN it should return correct family id assert config_data['family'] == '643594' # THEN it should return correct number of individuals assert len(config_data['samples']) == 3
def test_parse_case_ped_file(ped_file): # GIVEN a pedigree with three samples with open(ped_file, "r") as case_lines: # WHEN parsing out relevant sample info config_data = parse_case_data(ped=case_lines, owner="cust000") # THEN it should return correct family id assert config_data["family"] == "643594" # THEN it should return correct number of individuals assert len(config_data["samples"]) == 3
def upload(self, data: dict, threshold: int = 5, force: bool = False): """Load analysis of a new family into Scout.""" data["rank_score_threshold"] = threshold config_data = parse_case_data(config=data) existing_case = self.case( institute_id=config_data["owner"], display_name=config_data["family_name"] ) if existing_case: if force or config_data["analysis_date"] > existing_case["analysis_date"]: LOG.info("update existing Scout case") load_scout(self, config_data, update=True) else: existing_date = existing_case["analysis_date"].date() LOG.warning("analysis of case already loaded: %s", existing_date) return LOG.debug("load new Scout case") load_scout(self, config_data) LOG.debug("Case loaded successfully to Scout")
def upload(self, data: dict, threshold: int = 5, force: bool = False): """Load analysis of a new family into Scout.""" data['rank_score_threshold'] = threshold config_data = parse_case_data(config=data) existing_case = self.case(institute_id=config_data['owner'], display_name=config_data['family_name']) if existing_case: if force or config_data['analysis_date'] > existing_case[ 'analysis_date']: LOG.info(f"update existing Scout case") load_scout(self, config_data, update=True) else: existing_date = existing_case['analysis_date'].date() LOG.warning( f"analysis of case already loaded: {existing_date}") else: LOG.debug("load new Scout case") load_scout(self, config_data)
def case(context, vcf, vcf_sv, vcf_cancer, vcf_str, owner, ped, update, config, no_variants, peddy_ped, peddy_sex, peddy_check): """Load a case into the database. A case can be loaded without specifying vcf files and/or bam files """ adapter = context.obj['adapter'] if config is None and ped is None: LOG.warning("Please provide either scout config or ped file") context.abort() # Scout needs a config object with the neccessary information # If no config is used create a dictionary config_raw = yaml.load(config, Loader=yaml.FullLoader) if config else {} try: config_data = parse_case_data( config=config_raw, ped=ped, owner=owner, vcf_snv=vcf, vcf_sv=vcf_sv, vcf_str=vcf_str, vcf_cancer=vcf_cancer, peddy_ped=peddy_ped, peddy_sex=peddy_sex, peddy_check=peddy_check ) except SyntaxError as err: LOG.warning(err) context.abort() LOG.info("Use family %s" % config_data['family']) try: case_obj = adapter.load_case(config_data, update) except Exception as err: LOG.error("Something went wrong during loading") LOG.warning(err) context.abort()
def case(context, vcf, vcf_sv, vcf_cancer, vcf_str, owner, ped, update, config, no_variants, peddy_ped, peddy_sex, peddy_check): """Load a case into the database. A case can be loaded without specifying vcf files and/or bam files """ adapter = context.obj['adapter'] if config is None and ped is None: LOG.warning("Please provide either scout config or ped file") context.abort() # Scout needs a config object with the neccessary information # If no config is used create a dictionary config_raw = yaml.load(config) if config else {} try: config_data = parse_case_data(config=config_raw, ped=ped, owner=owner, vcf_snv=vcf, vcf_sv=vcf_sv, vcf_str=vcf_str, vcf_cancer=vcf_cancer, peddy_ped=peddy_ped, peddy_sex=peddy_sex, peddy_check=peddy_check) except SyntaxError as err: LOG.warning(err) context.abort() LOG.info("Use family %s" % config_data['family']) try: case_obj = adapter.load_case(config_data, update) except Exception as err: LOG.warning(err) context.abort()
def case( vcf, vcf_sv, vcf_cancer, vcf_cancer_sv, vcf_str, owner, ped, update, config, no_variants, peddy_ped, peddy_sex, peddy_check, keep_actions, ): """Load a case into the database. A case can be loaded without specifying vcf files and/or bam files """ adapter = store if config is None and ped is None: LOG.warning("Please provide either scout config or ped file") raise click.Abort() # Scout needs a config object with the neccessary information # If no config is used create a dictionary config_raw = yaml.load(config, Loader=yaml.SafeLoader) if config else {} try: config_data = parse_case_data( config=config_raw, ped=ped, owner=owner, vcf_snv=vcf, vcf_sv=vcf_sv, vcf_str=vcf_str, vcf_cancer=vcf_cancer, vcf_cancer_sv=vcf_cancer_sv, peddy_ped=peddy_ped, peddy_sex=peddy_sex, peddy_check=peddy_check, ) except SyntaxError as err: LOG.error( "SyntaxError {}: missing when loading '{}' {}".format( err, config.name, traceback.format_exc() ) ) raise click.Abort() except KeyError as err: LOG.error( "KeyError {} when loading '{}' {}".format(err, config.name, traceback.format_exc()) ) raise click.Abort() if config_data.get("genome_build") not in [37, 38, "37", "38"]: config_data["genome_build"] = int( click.prompt( f"Please enter a valid genome build for this case", type=click.Choice(["37", "38"]), ) ) LOG.info("Use family %s" % config_data["family"]) try: adapter.load_case(config_data, update, keep_actions) except SyntaxError as err: LOG.error( "SyntaxError {} missing when loading '{}' {}".format( err, config.name, traceback.format_exc() ) ) raise click.Abort() except KeyError as err: LOG.error( "KeyError {} when loading '{}' {}".format(err, config.name, traceback.format_exc()) ) raise click.Abort() except Exception as err: LOG.error("Unhandled Exception: {}".format(traceback.format_exc())) raise click.Abort()
def setup_scout( adapter, institute_id="cust000", user_name="Clark Kent", user_mail="*****@*****.**", api_key=None, demo=False, resource_files=None, ): """Function to setup a working scout instance. WARNING: If the instance is populated all collections will be deleted Build insert a institute and an admin user. There are multiple sources of information that is used by scout and that needs to exist for scout to work proper. Genes: Scout uses HGNC as the source for gene identifiers en ensembl as source for coordinates. Additional information of disease connections for genes if fetched from OMIM. Link between hpo terms and genes is fetched from HPO For more details check the documentation. """ LOG.info("Check if there was a database, delete if existing") existing_database = False for collection_name in adapter.db.list_collection_names(): if collection_name.startswith("system"): continue LOG.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) existing_database = True if existing_database: LOG.info("Database deleted") institute_obj = build_institute( internal_id=institute_id, display_name=institute_id, sanger_recipients=[user_mail], ) adapter.add_institute(institute_obj) user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=["admin"], institutes=[institute_id], ) adapter.add_user(user_obj) resource_files = resource_files or {} if demo: resource_files = demo_files mim2gene_lines = None genemap_lines = None mim2gene_path = resource_files.get("mim2gene_path") genemap_path = resource_files.get("genemap_path") if genemap_path and mim2gene_path: mim2gene_lines = [line for line in get_file_handle(mim2gene_path)] genemap_lines = [line for line in get_file_handle(genemap_path)] if (genemap_lines is None) and api_key: try: mim_files = fetch_mim_files(api_key, mim2genes=True, genemap2=True) except Exception as err: LOG.warning(err) raise err mim2gene_lines = mim_files["mim2genes"] genemap_lines = mim_files["genemap2"] if resource_files.get("hpogenes_path"): hpo_gene_lines = [ line for line in get_file_handle(resource_files.get("hpogenes_path")) ] else: hpo_gene_lines = fetch_genes_to_hpo_to_disease() if resource_files.get("hgnc_path"): hgnc_lines = [ line for line in get_file_handle(resource_files.get("hgnc_path")) ] else: hgnc_lines = fetch_hgnc() if resource_files.get("exac_path"): exac_lines = [ line for line in get_file_handle(resource_files.get("exac_path")) ] else: exac_lines = fetch_exac_constraint() # Load cytobands into cytoband collection for genome_build, cytobands_path in cytoband_files.items(): load_cytobands(cytobands_path, genome_build, adapter) builds = ["37", "38"] for build in builds: genes_path = "genes{}_path".format(build) if resource_files.get(genes_path): ensembl_genes = get_file_handle(resource_files[genes_path]) else: ensembl_genes = fetch_ensembl_genes(build=build) hgnc_genes = load_hgnc_genes( adapter=adapter, ensembl_lines=ensembl_genes, hgnc_lines=hgnc_lines, exac_lines=exac_lines, mim2gene_lines=mim2gene_lines, genemap_lines=genemap_lines, hpo_lines=hpo_gene_lines, build=build, ) # Create a map from ensembl ids to gene objects ensembl_genes = {} for gene_obj in hgnc_genes: ensembl_id = gene_obj["ensembl_id"] ensembl_genes[ensembl_id] = gene_obj tx_path = "transcripts{}_path".format(build) if resource_files.get(tx_path): ensembl_transcripts = get_file_handle(resource_files[tx_path]) else: ensembl_transcripts = fetch_ensembl_transcripts(build=build) # Load the transcripts for a certain build transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes) hpo_terms_handle = None if resource_files.get("hpoterms_path"): hpo_terms_handle = get_file_handle(resource_files["hpoterms_path"]) hpo_to_genes_handle = None if resource_files.get("hpo_to_genes_path"): hpo_to_genes_handle = get_file_handle( resource_files["hpo_to_genes_path"]) hpo_disease_handle = None if resource_files.get("hpo_disease_path"): hpo_disease_handle = get_file_handle( resource_files["hpo_disease_path"]) load_hpo( adapter=adapter, disease_lines=genemap_lines, hpo_lines=hpo_terms_handle, hpo_gene_lines=hpo_to_genes_handle, ) # If demo we load a gene panel and some case information if demo: parsed_panel = parse_gene_panel( path=panel_path, institute="cust000", panel_id="panel1", version=1.0, display_name="Test panel", ) adapter.load_panel(parsed_panel) case_handle = get_file_handle(load_path) case_data = yaml.load(case_handle, Loader=yaml.FullLoader) config_data = parse_case_data(config=case_data) adapter.load_case(config_data) LOG.info("Creating indexes") adapter.load_indexes() LOG.info("Scout instance setup successful")