Пример #1
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/mgi_markers_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}mgi_markers_parquet")
Пример #2
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/output/genotype_phenotype_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}genotype_phenotype_parquet")
Пример #3
0
 def output(self):
     self.output_path = (
         self.output_path + "/"
         if not self.output_path.endswith("/")
         else self.output_path
     )
     if self.extract_windowed_data == "true":
         return ImpcConfig().get_target(
             f"{self.output_path}open_stats_parquet_with_windowing_data"
         )
     elif self.raw_data_in_output == "include":
         return ImpcConfig().get_target(
             f"{self.output_path}open_stats_parquet_with_raw_data"
         )
     else:
         return ImpcConfig().get_target(f"{self.output_path}open_stats_parquet")
Пример #4
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/colonies_tracking_clean_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}colonies_tracking_clean_parquet")
Пример #5
0
 def output(self):
     self.output_path = (
         self.output_path + "/"
         if not self.output_path.endswith("/")
         else self.output_path
     )
     return ImpcConfig().get_target(f"{self.output_path}mgi_phenotype_parquet")
Пример #6
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/product_report_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}product_report_raw_parquet")
Пример #7
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/impc_ontology_metadata_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}impc_ontology_metadata_parquet")
Пример #8
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/mouse_specimen_clean_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}{self.specimen_type}_specimen_clean_parquet")
Пример #9
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/product_report_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}/impc_web_api/gene_diseases_json")
Пример #10
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/specimen_level_experiment_cross_ref_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}specimen_level_experiment_cross_ref_parquet")
Пример #11
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr16.0/parquet/stats_analysis_out_parquet)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}stats_analysis_out_parquet")
Пример #12
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/gene_histopath_json)
     """
     return ImpcConfig().get_target(
         f"{self.output_path}/impc_web_api/gene_histopath_json"
     )
Пример #13
0
 def output(self):
     self.output_path = (
         self.output_path + "/"
         if not self.output_path.endswith("/")
         else self.output_path
     )
     return ImpcConfig().get_target(
         f"{self.output_path}imits_{self.entity_type.lower()}_raw_parquet"
     )
Пример #14
0
 def output(self):
     self.output_path = (
         self.output_path + "/"
         if not self.output_path.endswith("/")
         else self.output_path
     )
     return ImpcConfig().get_target(
         f"{self.output_path}{self.entity_type}_normalized_parquet"
     )
Пример #15
0
 def app_options(self):
     """
     Generates the options pass to the PySpark job
     """
     return [
         self.obo_ontology_input_path,
         ImpcConfig().deploy_mode,
         self.output().path,
     ]
Пример #16
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr15.2/parquet/line_level_experiment_cross_ref_parquet)
     """
     self.output_path = (self.output_path +
                         "/" if not self.output_path.endswith("/") else
                         self.output_path)
     return ImpcConfig().get_target(
         f"{self.output_path}line_level_experiment_cross_ref_parquet")
Пример #17
0
def extract_ontology_terms(spark_session: SparkSession) -> DataFrame:
    """

    :param spark_session:
    :param ontologies_path:
    :return:
    """
    ontology_terms = []
    if ImpcConfig().deploy_mode in ["local", "client"]:
        for ontology_desc in ONTOLOGIES:
            print(
                f"Processing {ontology_desc['id']}.{ontology_desc['format']}")
            ontology: Ontology = pronto.Ontology.from_obo_library(
                f"{ontology_desc['id']}.{ontology_desc['format']}")

            part_of_rel: Relationship = None
            for rel in ontology.relationships():
                if rel.id == "part_of":
                    part_of_rel = rel
                    break
            if part_of_rel is not None:
                part_of_rel.transitive = False
                print("Starting to compute super classes from part_of")
                for term in ontology.terms():
                    for super_part_term in term.objects(part_of_rel):
                        if super_part_term.id in ontology.keys():
                            term.superclasses().add(super_part_term)
                print("Finished to compute super classes from part_of")
            top_level_terms = [
                ontology[term] for term in ontology_desc["top_level_terms"]
            ]
            top_level_ancestors = []
            for top_level_term in top_level_terms:
                top_level_ancestors.extend(
                    top_level_term.superclasses(with_self=False))
            top_level_ancestors = set(top_level_ancestors)
            ontology_terms += [
                _parse_ontology_term(term, top_level_terms,
                                     top_level_ancestors, part_of_rel)
                for term in ontology.terms() if term.name is not None
            ]
            print(
                f"Finished processing {ontology_desc['id']}.{ontology_desc['format']}"
            )
    ontology_terms_json = spark_session.sparkContext.parallelize(
        ontology_terms)
    ontology_terms_df = spark_session.read.json(ontology_terms_json,
                                                schema=ONTOLOGY_SCHEMA,
                                                mode="FAILFAST")
    return ontology_terms_df
Пример #18
0
 def output(self):
     self.output_path = (self.output_path +
                         "/" if not self.output_path.endswith("/") else
                         self.output_path)
     return ImpcConfig().get_target(
         f"{self.output_path}impress_parameter_parquet")
Пример #19
0
 def output(self):
     return ImpcConfig().get_target(f"{self.output_path}gene_bundle_parquet")
Пример #20
0
 def output(self):
     """
     Returns the full parquet path as an output for the Luigi Task
     (e.g. impc/dr16.0/parquet/allele_ref_parquet)
     """
     return ImpcConfig().get_target(f"{self.output_path}allele_ref_parquet")
 def output(self):
     return ImpcConfig().get_target(f"{self.output_path}/pain-xml/")
Пример #22
0
 def output(self):
     self.output_path = (self.output_path +
                         "/" if not self.output_path.endswith("/") else
                         self.output_path)
     return ImpcConfig().get_target(
         f"{self.output_path}impc_images_core_parquet")
Пример #23
0
 def output(self):
     self.output_path = (self.output_path +
                         "/" if not self.output_path.endswith("/") else
                         self.output_path)
     return ImpcConfig().get_target(
         f"{self.output_path}flatten_observations_parquet")
Пример #24
0
 def output(self):
     self.output_path = (self.output_path +
                         "/" if not self.output_path.endswith("/") else
                         self.output_path)
     return ImpcConfig().get_target(f"{self.output_path}mp_chooser_json")
Пример #25
0
 def output(self):
     return ImpcConfig().get_target("done!")
Пример #26
0
 def output(self):
     return ImpcConfig().get_target(f"{self.output_path}batch_query_parquet")
Пример #27
0
 def output(self):
     self.output_path = (self.output_path +
                         "/" if not self.output_path.endswith("/") else
                         self.output_path)
     return ImpcConfig().get_target(
         f"{self.output_path}images_pipeline_input_csv")