示例#1
0
    def build(self):
        """create a db for phenoscoring, includes setup and table-filling."""

        # create db with empty tables
        dbpath, config = self.setup()

        # avoid work if setup decided db exists and build can be skipped
        if dbpath is None:
            return

        # check prerequisite files
        obopath = check_file(config.obo, dbpath, "obo")
        refpath = check_file(config.reference_phenotypes, dbpath,
                             "reference_phenotypes")
        freqpath = check_file(config.phenotype_frequencies, dbpath,
                              "phenotype_frequencies")

        self.logger.msg1("Loading ontology")
        obo = MinimalObo(obopath, True)

        self.logger.msg1("Preparing phenotype frequencies")
        fill_phenotype_frequency_table(dbpath, freqpath)

        # fill database with data
        self.logger.msg1("Preparing references")
        fill_concise_reference_table(dbpath, refpath)
        fill_complete_reference_table(dbpath, obo, config)

        self._end()
示例#2
0
    def test_detects_missing(self):
        """neither primary or fallback file exist."""

        file_path = join("tests", "not-a-dir", "small.obo")
        fallback_path = join("tests", "also-not-a-dir", "small.obo")

        with self.assertRaises(Exception):
            check_file(file_path, fallback_path)
示例#3
0
    def setUpClass(cls):
        """create a new db with some model and references definitions."""

        config = MGITestConfig()
        config.action = "build"
        cls.dbfile = config.db
        config.obo = check_file(config.obo, config.db)
        # build a new database
        cls.pipeline = Phenoscoring(config)
        cls.pipeline.build()
        # add model and definitions (don't compute scores)
        desc_file = check_file(config.model_descriptions, config.db)
        phen_file = check_file(config.model_phenotypes, config.db)
        cls.pipeline._update(desc_file, phen_file)
示例#4
0
    def setUpClass(cls):
        """create a new db with model definitions"""

        cls.config = config = IMPCTestConfig()
        cls.dbfile = dbfile = cls.config.db
        remove_db(cls.dbfile)
        config.scale_oo_scores = False
        config.obo = check_file(config.obo, dbfile)
        cls.desc_file = check_file(config.model_descriptions, dbfile)
        cls.phen_file = check_file(config.model_phenotypes, dbfile)
        # build a db
        cls.pipeline = Phenoscoring(config)
        cls.pipeline.build()
        # add some model definitions (don't compute)
        cls.pipeline.update()
示例#5
0
 def setUpClass(cls):
     """For setup, ensure db does not exist."""
                     
     config = CompleteTestConfig()
     config.null_prior = 0.2
     cls.dbfile = config.db        
     cls.pipeline = Phenoscoring(config)
     cls.pipeline.build()
     obopath = check_file(config.obo, config.db, "obo")
     cls.obo = MinimalObo(obopath, True)
     
     # a dummy set of default values
     cls.obodefaults = dict.fromkeys(cls.obo.ids(), 0.2)
     cls.obozeros = dict.fromkeys(cls.obo.ids(), 0)
     
     cls.ref_priors = get_ref_priors(config.db)
     cls.rs, cls.rs2 = get_refsets(config.db, ref_priors=cls.ref_priors)
     cls.rs.learn_obo(cls.obo)
     cls.rs2.learn_obo(cls.obo)
     
     # for testing individual configurations
     cls.y3model = Representation(name="Y3").set("Y:003", 0.8)        
     cls.refA = Representation(name="refA").set("Y:002", 1)
     cls.refA.defaults(cls.obozeros)
     cls.refB = Representation(name="refB").set("Y:002", 1)
     cls.refB.defaults(cls.obozeros)
示例#6
0
    def test_uses_fallback(self):
        """check doesn't find file, returns fallback."""

        file_path = join("tests", "not-a-dir", "small.obo")
        fallback_path = join("tests", "testdata", "small.obo")
        selected_path = check_file(file_path, fallback_path, required=None)
        self.assertEqual(selected_path, fallback_path)
示例#7
0
    def export_representations(self):
        """write matrix representations for models and refs to disk"""

        dbpath, config = self._start()
        self.logger.msg1("Loading ontology")
        obo_path = check_file(config.obo, dbpath, "obo")
        self.obo = MinimalObo(obo_path, True)
        self._export_reference_representations()
        self._export_model_representations(config)
        self._end()
示例#8
0
    def update(self):
        """add model descriptions and phenotypes to the database."""

        dbpath, config = self._start()

        self.config.obo = check_file(config.obo, dbpath, "obo")
        desc_file = check_file(config.model_descriptions,
                               dbpath,
                               "model_descriptions",
                               allow_none=True)
        phen_file = check_file(config.model_phenotypes,
                               dbpath,
                               "model_phenotypes",
                               allow_none=True)

        summary = self._update(desc_file, phen_file)
        if len(summary["incorrect_ids"]) == 0 and not config.skip_compute:
            self._compute(models=summary["new_phenotypes"])

        self._end()
示例#9
0
    def remove(self):
        """remove certain model descriptions and phenotypes from database."""

        dbpath, config = self._start()
        desc_file = check_file(config.model_descriptions,
                               dbpath,
                               "model_descriptions",
                               allow_none=False)
        self.logger.msg1("Reading model ids")
        ids = values_in_column(desc_file, "id")
        self.logger.msg1("Deleting models: " + str(len(ids)))
        delete_models(dbpath, ids)
        self._end()
示例#10
0
    def explain(self):
        """Perform a verbose calculation of inference scores.
        
        The prep for this function is similar as for compute().
        Once the relevant data is loaded from the db, the calculations
        are performed and recorded manually.
        """

        self.logger.verbose = False
        dbpath, config = self._start()

        if config.explain not in ["specific", "general"]:
            return "--explain must be 'general' or 'specific'"
        config.obo = check_file(config.obo, dbpath, "obo")

        # allow user to pass several model/reference pairs
        models = config.model.split(",")
        references = config.reference.split(",")
        M = len(models)

        if len(references) != M:
            raise Exception("incompatible number of models and references")

        # use the packet to load information from the db, refset and models
        packet = prep_compute_packets(self.config,
                                      references=references,
                                      models=models,
                                      partition_size=M)[0]
        packet.prep()
        refset = packet.general_refset
        if config.explain == "specific":
            refset = packet.specific_refset
        refset.learn_obo(MinimalObo(config.obo))

        allresults = [None] * M
        for i, (modelid, refid) in enumerate(zip(models, references)):
            data = packet.models[modelid]
            result = refset.inference_chain(data,
                                            refid,
                                            verbose=True,
                                            fp_penalty=config.fp_penalty)
            allresults[i] = result.to_json(nodata=config.explain_nodata)

        return "[" + (",".join(allresults)) + "]"
示例#11
0
    def setUpClass(cls):
        """For setup, ensure db does not exist."""

        config = MGITestConfig()
        config.scale_oo_scores = False
        cls.dbfile = config.db
        config.obo = check_file(config.obo, config.db)
        cls.pipeline = Phenoscoring(config)
        cls.pipeline.build()

        # first add some rows to the db by hand
        model = ModelScoreTable(config.db)
        model.add("model:1", "DISEASE:1", "stamp", 0.95, 0.98)
        model.add("model:2", "DISEASE:1", "stamp", 0.94, 0.96)
        model.add("model:3", "DISEASE:1", "stamp", 0.24, 0.96)
        model.add("model:4", "DISEASE:2", "stamp", 0.92, 0.95)
        model.add("model:5", "DISEASE:2", "stamp", 0.86, 0.85)
        model.add("model:6", "DISEASE:3", "stamp", 0.96, 0.95)
        model.save()
示例#12
0
                    help="mgi model descriptions")
parser.add_argument("--mgi_phen", action="store", required=True,
                    help="mgi model phenotypes")

parser.add_argument("--output", action="store", required=True,
                    help="prefix for output files")


# ##################################################################
# Execute the program if module is used as an executable


if __name__ == "__main__":    
        
    config = parser.parse_args()            
    config.impc_desc = check_file(config.impc_desc, required="impc_desc")
    config.impc_phen = check_file(config.impc_phen, required="impc_phen")
    config.mgi_desc = check_file(config.mgi_desc, required="mgi_desc")
    config.mgi_phen = check_file(config.mgi_phen, required="mgi_phen")
    timestamp = now_timestamp()
    
    # load impc and mgi models into memory
    impc_models = get_file_models(config.impc_desc, timestamp)
    impc_phenotypes = get_file_phenotypes(config.impc_phen, timestamp)
    mgi_models = get_file_models(config.mgi_desc, timestamp)
    mgi_phenotypes = get_file_phenotypes(config.mgi_phen, timestamp)
    
    # get all allele_ids from impc and mgi
    def model_alleles(models):
        """scan a set of models and get a set of allele_id"""
        result = set()
示例#13
0
# ##################################################################
# Execute the program if module is used as an executable

if __name__ == "__main__":

    config = parser.parse_args()
    tprfpr = (config.tpr, config.fpr)
    fe = filter_entities
    fe_cat = filter_entities_cat
    threshold = config.threshold

    if config.action == "MGI":
        # action to parse mouse phenotype models from MGI

        check_file(config.input, required="input")
        check_file(config.obo)
        obo = MinimalObo(config.obo)
        models = prep_MGI(config.input, tprfpr, obo)
        # write out all models and subsets
        genotype_models = fe_cat(models, set(["genotype"]))
        marker_models = fe_cat(models, set(["marker"]))
        write_models(genotype_models, config.output + "-genotype-universal")
        write_models(marker_models, config.output + "-marker-universal")
        # compute and write priors based on certain types of models
        categories = set(config.priors.split(","))
        priors, num_models = get_priors_from_models(models,
                                                    categories,
                                                    obo,
                                                    dark=config.dark_count)
        print("Number of models used to inform prior: " + str(num_models))
示例#14
0
    def test_detects_required(self):
        """check raises exception when a required file is not present."""

        with self.assertRaises(Exception):
            check_file(None, None, required=True)
示例#15
0
    def test_allow_none(self):
        """allows a file check to return None."""

        result = check_file(None, None, required="aaa", allow_none=True)
        self.assertEqual(result, None)
示例#16
0
    def test_check(self):
        """check succeeds."""

        file_path = join("tests", "testdata", "small.obo")
        selected_path = check_file(file_path, None, required=None)
        self.assertEqual(selected_path, file_path)