def __init__(self, config): client = MongoClient(config["scout"]["database"], serverSelectionTimeoutMS=20) super(ScoutAPI, self).__init__(client[config["scout"]["database_name"]]) binary_path = config["scout"]["binary_path"] config_path = config["scout"]["config_path"] self.process = Process(binary=binary_path, config=config_path)
class HermesApi: """Class to communicate with hermes""" def __init__(self, config: dict): self.process = Process(config["hermes"]["binary_path"]) def convert_deliverables( self, deliverables_file: Path, pipeline: str, analysis_type: Optional[str] = None) -> CGDeliverables: """Convert deliverables file in raw pipeline format to CG format with hermes""" LOG.info("Converting pipeline deliverables to CG deliverables") convert_command = [ "convert", "deliverables", "--pipeline", pipeline, str(deliverables_file), ] if analysis_type: convert_command.extend(["--analysis-type", analysis_type]) self.process.run_command(convert_command) return CGDeliverables.parse_raw(self.process.stdout) def create_housekeeper_bundle( self, bundle_name: str, deliverables: Path, pipeline: str, analysis_type: Optional[str], created: Optional[datetime], ) -> hk_models.InputBundle: """Convert pipeline deliverables to housekeeper bundle ready to be inserted into hk""" cg_deliverables: CGDeliverables = self.convert_deliverables( deliverables_file=deliverables, pipeline=pipeline, analysis_type=analysis_type) return self.get_housekeeper_bundle(deliverables=cg_deliverables, created=created, bundle_name=bundle_name) @staticmethod def get_housekeeper_bundle( deliverables: CGDeliverables, bundle_name: str, created: Optional[datetime] = None) -> hk_models.InputBundle: """Convert a deliverables object to a housekeeper object""" bundle_info = { "name": bundle_name, "files": [file_info.dict() for file_info in deliverables.files], } if created: bundle_info["created"] = created return hk_models.InputBundle(**bundle_info)
def __init__(self, config): binary_path = config["scout"]["binary_path"] config_path = config["scout"]["config_path"] self.process = Process(binary=binary_path, config=config_path)
class ScoutAPI: """Interface to Scout.""" def __init__(self, config): binary_path = config["scout"]["binary_path"] config_path = config["scout"]["config_path"] self.process = Process(binary=binary_path, config=config_path) def upload(self, scout_load_config: Path, threshold: int = 5, force: bool = False): """Load analysis of a new family into Scout.""" with open(scout_load_config, "r") as stream: data = yaml.safe_load(stream) scout_load_config_object: ScoutLoadConfig = ScoutLoadConfig(**data) existing_case: Optional[ScoutExportCase] = self.get_case( case_id=scout_load_config_object.family ) load_command = ["load", "case", str(scout_load_config)] if existing_case: if force or scout_load_config_object.analysis_date > existing_case.analysis_date: load_command.append("--update") LOG.info("update existing Scout case") else: existing_date = existing_case.analysis_date.date() LOG.warning("analysis of case already loaded: %s", existing_date) return LOG.debug("load new Scout case") self.process.run_command(load_command) LOG.debug("Case loaded successfully to Scout") def update_alignment_file(self, case_id: str, sample_id: str, alignment_path: Path): """Update alignment file for individual in case""" parameters = [ "update", "individual", "--case-id", case_id, "--ind-id", sample_id, "--alignment-path", str(alignment_path), ] self.process.run_command(parameters=parameters) def export_panels(self, panels: List[str], build: str = GENOME_BUILD_37) -> List[str]: """Pass through to export of a list of gene panels. Return list of lines in bed format """ export_panels_command = ["export", "panel", "--bed"] for panel_id in panels: export_panels_command.append(panel_id) if build: export_panels_command.extend(["--build", build]) try: self.process.run_command(export_panels_command) if not self.process.stdout: return [] except CalledProcessError: LOG.info("Could not find panels") return [] return [line for line in self.process.stdout_lines()] def get_genes(self, panel_id: str, build: str = None) -> list: """Fetch panel genes. Args: panel_id (str): unique id for the panel build (str): version of the panel. If 'None' latest version will be returned Returns: panel genes: panel genes list """ # This can be run from CLI with `scout export panels <panel1> ` export_panel_command = ["export", "panel", panel_id] if build: export_panel_command.extend(["--build", build]) try: self.process.run_command(export_panel_command) if not self.process.stdout: return [] except CalledProcessError: LOG.info("Could not find panel %s", panel_id) return [] panel_genes = [] for gene_line in self.process.stdout_lines(): if gene_line.startswith("#"): continue gene_info = gene_line.strip().split("\t") if not len(gene_info) > 1: continue panel_genes.append({"hgnc_id": int(gene_info[0]), "hgnc_symbol": gene_info[1]}) return panel_genes def get_causative_variants(self, case_id: str) -> List[Variant]: """ Get causative variants for a case """ # These commands can be run with `scout export variants` get_causatives_command = ["export", "variants", "--json", "--case-id", case_id] try: self.process.run_command(get_causatives_command) if not self.process.stdout: return [] except CalledProcessError: LOG.warning("Could not find case %s in scout", case_id) return [] variants: List[Variant] = [] for variant_info in json.loads(self.process.stdout): variants.append(Variant(**variant_info)) return variants def get_case(self, case_id: str) -> Optional[ScoutExportCase]: """Fetch a case from Scout""" cases: List[ScoutExportCase] = self.get_cases(case_id=case_id) if not cases: return None return cases[0] def get_cases( self, case_id: Optional[str] = None, reruns: bool = False, finished: bool = False, status: Optional[str] = None, days_ago: int = None, ) -> List[ScoutExportCase]: """Interact with cases existing in the database.""" # These commands can be run with `scout export cases` get_cases_command = ["export", "cases", "--json"] if case_id: get_cases_command.extend(["--case-id", case_id]) elif status: get_cases_command.extend(["--status", status]) elif finished: get_cases_command.append("--finished") if reruns: LOG.info("Fetching cases that are reruns") get_cases_command.append("--reruns") if days_ago: get_cases_command.extend(["--within-days", str(days_ago)]) try: self.process.run_command(get_cases_command) if not self.process.stdout: return [] except CalledProcessError: LOG.info("Could not find cases") return [] cases = [] for case_export in json.loads(self.process.stdout): LOG.info("Validating case %s", case_export.get("_id")) case_obj = ScoutExportCase(**case_export) cases.append(case_obj) return cases def get_solved_cases(self, days_ago: int) -> List[ScoutExportCase]: """ Get cases solved within chosen timespan Args: days_ago (int): Maximum days ago a case has been solved Return: cases (list): list of cases """ return self.get_cases(status="solved", days_ago=days_ago) def upload_delivery_report(self, report_path: str, case_id: str, update: bool = False) -> None: """Load a delivery report into a case in the database If the report already exists the function will exit. If the user want to load a report that is already in the database 'update' has to be 'True' Args: report_path (string): Path to delivery report case_id (string): Case identifier update (bool): If an existing report should be replaced Returns: updated_case(dict) """ # This command can be run with `scout load delivery-report <CASE-ID> <REPORT-PATH>` upload_command = ["load", "delivery-report", case_id, report_path] if update: upload_command.append("--update") try: LOG.info("Uploading delivery report %s to case %s", report_path, case_id) self.process.run_command(upload_command) except CalledProcessError: LOG.warning("Something went wrong when uploading delivery report") def upload_fusion_report( self, case_id: str, report_path: str, research: bool, update: bool ) -> None: """Load a fusion report into a case in the database Args: report_path (string): Path to delivery report case_id (string): Case identifier research (bool): Research report update (bool): If an existing report should be replaced Returns: Nothing """ # This command can be run with # `scout load gene-fusion-report [-r] <case_id> <path/to/research_gene_fusion_report.pdf>` upload_command = ["load", "gene-fusion-report"] if research: upload_command.append("--research") if update: upload_command.append("--update") upload_command.extend([case_id, report_path]) try: LOG.info("Uploading fusion report %s to case %s", report_path, case_id) self.process.run_command(upload_command) except CalledProcessError: raise ScoutUploadError("Something went wrong when uploading fusion report") def upload_splice_junctions_bed(self, file_path: str, case_id: str, customer_sample_id): """Load a splice junctions bed file into a case in the database Args: file_path (string): Path to delivery report case_id (string): Case identifier customer_sample_id (bool): Customers sample identifier Returns: updated_case(dict) """ # This command can be run with # `scout update individual -c <case_id> -n <customer_sample_id> splice_junctions_bed # <path/to/junction_file.bed>` upload_command = [ "update", "individual", "-c", case_id, "-n", customer_sample_id, "splice_junctions_bed", file_path, ] try: LOG.info("Uploading splice junctions bed file %s to case %s", file_path, case_id) self.process.run_command(upload_command) except CalledProcessError: raise ScoutUploadError("Something went wrong when uploading splice junctions bed file") def upload_rna_coverage_bigwig(self, file_path: str, case_id: str, customer_sample_id: str): """Load a rna coverage bigwig file into a case in the database Args: file_path (string): Path to delivery report case_id (string): Case identifier customer_sample_id (bool): Customers sample identifier Returns: updated_case(dict) """ # This command can be run with # `scout update individual -c <case_id> -n <customer_sample_id> rna_coverage_bigwig # <path/to/coverage_file.bigWig>` upload_command = [ "update", "individual", "-c", case_id, "-n", customer_sample_id, "rna_coverage_bigwig", file_path, ] try: LOG.info("Uploading rna coverage bigwig file %s to case %s", file_path, case_id) self.process.run_command(upload_command) except CalledProcessError: raise ScoutUploadError("Something went wrong when uploading rna coverage bigwig file")
class ScoutAPI(MongoAdapter): """Interface to Scout.""" def __init__(self, config): client = MongoClient(config["scout"]["database"], serverSelectionTimeoutMS=20) super(ScoutAPI, self).__init__(client[config["scout"]["database_name"]]) binary_path = config["scout"]["binary_path"] config_path = config["scout"]["config_path"] self.process = Process(binary=binary_path, config=config_path) def upload(self, data: dict, threshold: int = 5, force: bool = False): """Load analysis of a new family into Scout.""" data["rank_score_threshold"] = threshold config_data = parse_case_data(config=data) existing_case = self.case( institute_id=config_data["owner"], display_name=config_data["family_name"] ) if existing_case: if force or config_data["analysis_date"] > existing_case["analysis_date"]: LOG.info("update existing Scout case") load_scout(self, config_data, update=True) else: existing_date = existing_case["analysis_date"].date() LOG.warning("analysis of case already loaded: %s", existing_date) return LOG.debug("load new Scout case") load_scout(self, config_data) LOG.debug("Case loaded successfully to Scout") def update_alignment_file(self, case_id: str, sample_id: str, alignment_path: Path): """Update alignment file for individual in case""" parameters = [ "update", "individual", "--case-id", case_id, "--ind-id", sample_id, "--alignment-path", str(alignment_path), ] self.process.run_command(parameters=parameters) def export_panels(self, panels: List[str], versions=None): """Pass through to export of a list of gene panels.""" return scout_export_panels(self, panels, versions) def get_genes(self, panel_id: str, version: str = None) -> list: """Fetch panel genes. Args: panel_id (str): unique id for the panel version (str): version of the panel. If 'None' latest version will be returned Returns: panel genes: panel genes list """ gene_panel = self.gene_panel(panel_id=panel_id, version=version) return gene_panel.get("genes") def get_cases( self, case_id=None, institute=None, reruns=None, finished=None, causatives=None, research_requested=None, is_research=None, status=None, ): """Interact with cases existing in the database.""" models = [] if case_id: case_obj = self.case(case_id=case_id) if case_obj: models.append(case_obj) else: models = self.cases( collaborator=institute, reruns=reruns, finished=finished, has_causatives=causatives, research_requested=research_requested, is_research=is_research, status=status, ) return models def get_causative_variants(self, case_id=None, collaborator=None): """ Get causative variants for a case """ causative_ids = self.get_causatives(institute_id=collaborator, case_id=case_id) causatives = [self.variant(causative_id) for causative_id in causative_ids] return causatives def get_solved_cases(self, days_ago): """ Get cases solved within chosen timespan Args: days_ago (int): Maximum days ago a case has been solved Return: cases (list): list of cases """ days_datetime = dt.datetime.now() - dt.timedelta(days=days_ago) # Look up 'mark_causative' events added since specified number days ago event_query = { "category": "case", "verb": "mark_causative", "created_at": {"$gte": days_datetime}, } recent_events = self.event_collection.find(event_query) solved_cases = set() # Find what cases these events concern for event in recent_events: solved_cases.add(event["case"]) solved_cases = list(solved_cases) # Find these cases in the database cases = self.case_collection.find({"_id": {"$in": solved_cases}}) return cases def upload_delivery_report(self, report_path: str, case_id: str, update: bool = False): """ Load a delivery report into a case in the database If the report already exists the function will exit. If the user want to load a report that is already in the database 'update' has to be 'True' Args: report_path (string): Path to delivery report case_id (string): Case identifier update (bool): If an existing report should be replaced Returns: updated_case(dict) """ return load_delivery_report( adapter=self, case_id=case_id, report_path=report_path, update=update )
def __init__(self, config: dict): self.process = Process(config["hermes"]["binary_path"])
def __init__(self, config: dict): self.process = Process(binary=config["genotype"]["binary_path"], config=config["genotype"]["config_path"]) self.dry_run = False
class GenotypeAPI: """Interface with Genotype app. The config should contain a 'genotype' key: { 'database': 'mysql://localhost:3306/database' } """ def __init__(self, config: dict): self.process = Process(binary=config["genotype"]["binary_path"], config=config["genotype"]["config_path"]) self.dry_run = False def set_dry_run(self, dry_run: bool) -> None: """Set the dry run state""" self.dry_run = dry_run def upload(self, bcf_path: str, samples_sex: dict, force: bool = False) -> None: """Upload genotypes for a family of samples.""" upload_parameters = ["load", str(bcf_path)] if force: upload_parameters.append("--force") LOG.info("loading VCF genotypes for sample(s): %s", ", ".join(samples_sex.keys())) self.process.run_command(parameters=upload_parameters, dry_run=self.dry_run) for sample_id in samples_sex: # This is the sample sex specified by the customer sample_sex = samples_sex[sample_id]["pedigree"] self.update_sample_sex(sample_id, sample_sex) # This is the predicted sex based on variant calls from the pipeline analysis_predicted_sex = samples_sex[sample_id]["analysis"] self.update_analysis_sex(sample_id, sex=analysis_predicted_sex) def update_sample_sex(self, sample_id: str, sex: str) -> None: """Update the sex for a sample in the genotype tool""" sample_sex_parameters = ["add-sex", sample_id, "-s", sex] LOG.debug("Set sex for sample %s to %s", sample_id, sex) self.process.run_command(parameters=sample_sex_parameters, dry_run=self.dry_run) def update_analysis_sex(self, sample_id: str, sex: str) -> None: """Update the predicted sex for a sample based on genotype analysis in the genotype tool""" analysis_sex_parameters = ["add-sex", sample_id, "-a", "sequence", sex] LOG.debug( "Set predicted sex for sample %s to %s for the sequence analysis", sample_id, sex) self.process.run_command(parameters=analysis_sex_parameters, dry_run=self.dry_run) def export_sample(self, days: int = 0) -> str: """Export sample info.""" export_sample_parameters = ["export-sample", "-d", str(days)] self.process.run_command(parameters=export_sample_parameters, dry_run=self.dry_run) output = self.process.stdout # If sample not in genotype db, stdout of genotype command will be empty. if not output: raise CaseNotFoundError("samples not found in genotype db") return output def export_sample_analysis(self, days: int = 0) -> str: """Export analysis.""" export_sample_analysis_parameters = [ "export-sample-analysis", "-d", str(days) ] self.process.run_command(parameters=export_sample_analysis_parameters, dry_run=self.dry_run) output = self.process.stdout # If sample not in genotype db, stdout of genotype command will be empty. if not output: raise CaseNotFoundError("samples not found in genotype db") return output def __str__(self): return f"GenotypeAPI(dry_run: {self.dry_run})"
def __init__(self, config: dict): super(VogueAPI, self).__init__() self.vogue_config = config["vogue"]["config_path"] self.vogue_binary = config["vogue"]["binary_path"] self.process = Process(binary=self.vogue_binary, config=self.vogue_config)
class VogueAPI: """ API for vogue """ def __init__(self, config: dict): super(VogueAPI, self).__init__() self.vogue_config = config["vogue"]["config_path"] self.vogue_binary = config["vogue"]["binary_path"] self.process = Process(binary=self.vogue_binary, config=self.vogue_config) def load_genotype_data(self, genotype_dict: dict) -> None: """Load genotype data from a dict.""" load_call = ["load", "genotype", "-s", json.dumps(genotype_dict)] self.process.run_command(parameters=load_call) # Execute command and print its stdout+stderr as it executes for line in self.process.stderr_lines(): LOG.info("vogue output: %s", line) def load_apptags(self, apptag_list: list) -> None: """Add observations from a VCF.""" load_call = ["load", "apptag", json.dumps(apptag_list)] self.process.run_command(parameters=load_call) # Execute command and print its stdout+stderr as it executes for line in self.process.stderr_lines(): LOG.info("vogue output: %s", line) def load_samples(self, days: int) -> None: """Running vogue load samples.""" load_call = ["load", "sample", "-d", str(days)] self.process.run_command(parameters=load_call) # Execute command and print its stdout+stderr as it executes for line in self.process.stderr_lines(): LOG.info("vogue output: %s", line) def load_flowcells(self, days: int) -> None: """Running vogue load flowcells.""" load_call = ["load", "flowcell", "-d", str(days)] self.process.run_command(parameters=load_call) # Execute command and print its stdout+stderr as it executes for line in self.process.stderr_lines(): LOG.info("vogue output: %s", line) def load_reagent_labels(self, days: int) -> None: """Running vogue load reagent_labels.""" load_call = ["load", "reagent_labels", "-d", str(days)] self.process.run_command(parameters=load_call) # Execute command and print its stdout+stderr as it executes for line in self.process.stderr_lines(): LOG.info("vogue output: %s", line) def load_bioinfo_raw(self, load_bioinfo_inputs: dict) -> None: """Running vogue load bioinfo raw.""" load_bioinfo_raw_call = [ "load", "bioinfo", "raw", "--sample-list", load_bioinfo_inputs["samples"], "--analysis-result", load_bioinfo_inputs["analysis_result_file"], "--analysis-type", load_bioinfo_inputs["analysis_type"], "--analysis-case", load_bioinfo_inputs["analysis_case_name"], "--workflow-version", load_bioinfo_inputs["analysis_workflow_version"], "--case-analysis-type", load_bioinfo_inputs["case_analysis_type"], "--analysis-workflow", load_bioinfo_inputs["analysis_workflow_name"], ] self.process.run_command(parameters=load_bioinfo_raw_call) def load_bioinfo_process(self, load_bioinfo_inputs: dict, cleanup_flag: bool) -> None: """Running load bioinfo process.""" load_bioinfo_process_call = [ "load", "bioinfo", "process", "--analysis-type", load_bioinfo_inputs["analysis_type"], "--analysis-case", load_bioinfo_inputs["analysis_case_name"], "--analysis-workflow", load_bioinfo_inputs["analysis_workflow_name"], "--workflow-version", load_bioinfo_inputs["analysis_workflow_version"], "--case-analysis-type", load_bioinfo_inputs["case_analysis_type"], ] if cleanup_flag: load_bioinfo_process_call.append("--cleanup") self.process.run_command(parameters=load_bioinfo_process_call) def load_bioinfo_sample(self, load_bioinfo_inputs: dict) -> None: """Running load bioinfo sample.""" load_bioinfo_sample_call = [ "load", "bioinfo", "sample", "--analysis-case", load_bioinfo_inputs["analysis_case_name"], ] self.process.run_command(parameters=load_bioinfo_sample_call)