Пример #1
0
    def predict(self, reactions, verbose=False):
        """Make predictions from the voting for a set of provided reactions."""
        if self.built:
            resDict = {}

            num_models = self.statsmodel_set.all().count()
            num_finished = 0
            overall_start_time = datetime.datetime.now()
            for model in self.statsmodel_set.all():
                visitorOptions = json.loads(self.modelVisitorOptions)
                modelVisitor = getattr(visitorModules[self.modelVisitorLibrary], self.modelVisitorTool)(
                    statsModel=model, **visitorOptions)
                if verbose:
                    print "statsModel {}, saved at {}, predicting...".format(model.pk, model.outputFile)
                predictions = modelVisitor.predict(reactions, verbose=verbose)
                if verbose:
                    print "\t...finished predicting. Storing predictions...",
                newResDict = self._storePredictionComponents(
                    predictions, model)

                # Update the overall result-dictionary with these new counts.
                for reaction, responseDict in newResDict.items():
                    for response, outcomeDict in responseDict.items():
                        for outcome, count in outcomeDict.items():
                            if reaction not in resDict:
                                resDict[reaction] = {}
                            if response not in resDict[reaction]:
                                resDict[reaction][response] = {}

                            if outcome not in resDict[reaction][response]:
                                resDict[reaction][response][outcome] = count
                            resDict[reaction][response][outcome] += count

                if verbose:
                    print "predictions stored."
                    for response in self.outcomeDescriptors:
                        predDesc = response.predictedDescriptorType.objects.get(
                            modelContainer=self, statsModel=model, predictionOf=response)
                        conf_mtrx = predDesc.getConfusionMatrix(
                            reactions=reactions)

                        print "Confusion matrix for {}:".format(predDesc.heading)
                        print confusionMatrixString(conf_mtrx)
                        print "Accuracy: {:.3}".format(accuracy(conf_mtrx))
                        print "BCR: {:.3}".format(BCR(conf_mtrx))
                        print "Matthews: {:.3}".format(Matthews(conf_mtrx))
                    num_finished += 1
                    end_time = datetime.datetime.now()
                    elapsed = (end_time - overall_start_time)
                    expected_finish = datetime.timedelta(seconds=(elapsed.total_seconds(
                    ) * (num_models / float(num_finished)))) + overall_start_time
                    print "{}. Predictions from {} of {} models.".format(end_time, num_finished, num_models)
                    print "Elapsed prediction time: {}. Expected completion time: {}".format(elapsed, expected_finish)

            return self._storePredictions(resDict)
        else:
            raise RuntimeError(
                'A model container cannot be used to make predictions before the build method has been called')
Пример #2
0
def display_model_results(container, reactions=None, heading=""):
    """
    Display confusion matrices for a model container.

    Optional heading specifies prefix for the summary statistics
    (useful for when multiple model containers are built by a single script)
    """
    overall_conf_mtrcs = container.getOverallConfusionMatrices(
        reactions=reactions)
    if not overall_conf_mtrcs:
        print "No model results to display"
        return
    if len(overall_conf_mtrcs) != 1:
        raise NotImplementedError('Can only handle one response')
    for descriptor_header, conf_mtrx in overall_conf_mtrcs:
        acc = accuracy(conf_mtrx)
        bcr = BCR(conf_mtrx)
        matthews = Matthews(conf_mtrx)
        print "Confusion matrix for {}:".format(descriptor_header)
        print confusionMatrixString(conf_mtrx)
        print "Accuracy: {:.3}".format(acc)
        print "BCR: {:.3}".format(bcr)
        print "Matthews: {:.3}".format(matthews)
    conf_mtrcs = container.getComponentConfusionMatrices(reactions=reactions)

    sum_acc = 0.0
    sum_bcr = 0.0
    sum_matthews = 0.0
    count = 0

    for model_mtrcs in conf_mtrcs:
        if len(model_mtrcs) != 1:
            raise NotImplementedError('Can only handle one response')
        for descriptor_header, conf_mtrx in model_mtrcs:
            acc = accuracy(conf_mtrx)
            bcr = BCR(conf_mtrx)
            matthews = Matthews(conf_mtrx)
            print "Confusion matrix for {}:".format(descriptor_header)
            print confusionMatrixString(conf_mtrx)
            print "Accuracy: {:.3}".format(acc)
            print "BCR: {:.3}".format(bcr)
            print "Matthews: {:.3}".format(matthews)

            # This only works for one response. Sorry...
            # TODO XXX make this work for multiple responses
            sum_acc += acc
            sum_bcr += bcr
            sum_matthews += matthews
            count += 1

    print "{} Average accuracy: {:.3}".format(heading, sum_acc / count)
    print "{} Average BCR: {:.3}".format(heading, sum_bcr / count)
    print "{} Average Matthews: {:.3}".format(heading, sum_matthews / count)
Пример #3
0
def display_model_results(container, reactions=None, heading=""):
    """
    Display confusion matrices for a model container.

    Optional heading specifies prefix for the summary statistics
    (useful for when multiple model containers are built by a single script)
    """
    overall_conf_mtrcs = container.getOverallConfusionMatrices(
        reactions=reactions)
    if not overall_conf_mtrcs:
        print("No model results to display")
        return
    if len(overall_conf_mtrcs) != 1:
        raise NotImplementedError('Can only handle one response')
    for descriptor_header, conf_mtrx in overall_conf_mtrcs:
        acc = accuracy(conf_mtrx)
        bcr = BCR(conf_mtrx)
        matthews = Matthews(conf_mtrx)
        print("Confusion matrix for {}:".format(descriptor_header))
        print(confusionMatrixString(conf_mtrx))
        print("Accuracy: {:.3}".format(acc))
        print("BCR: {:.3}".format(bcr))
        print("Matthews: {:.3}".format(matthews))
    conf_mtrcs = container.getComponentConfusionMatrices(reactions=reactions)

    sum_acc = 0.0
    sum_bcr = 0.0
    sum_matthews = 0.0
    count = 0

    for model_mtrcs in conf_mtrcs:
        if len(model_mtrcs) != 1:
            raise NotImplementedError('Can only handle one response')
        for descriptor_header, conf_mtrx in model_mtrcs:
            acc = accuracy(conf_mtrx)
            bcr = BCR(conf_mtrx)
            matthews = Matthews(conf_mtrx)
            print("Confusion matrix for {}:".format(descriptor_header))
            print(confusionMatrixString(conf_mtrx))
            print("Accuracy: {:.3}".format(acc))
            print("BCR: {:.3}".format(bcr))
            print("Matthews: {:.3}".format(matthews))

            # This only works for one response. Sorry...
            # TODO XXX make this work for multiple responses
            sum_acc += acc
            sum_bcr += bcr
            sum_matthews += matthews
            count += 1

    print("{} Average accuracy: {:.3}".format(heading, sum_acc / count))
    print("{} Average BCR: {:.3}".format(heading, sum_bcr / count))
    print("{} Average Matthews: {:.3}".format(heading, sum_matthews / count))
Пример #4
0
    def build(self, verbose=False):
        """
        Take all options confirmed so far and generate a full model set.

        Train a mutlitude of models using the external libraries selected, then save the
        relevant information to the database (see the statsmodel class.)

        Run the tests for the model using the test sets of data, and then saves that information.

        """
        if self.built:
            raise RuntimeError(
                "Cannot build a model that has already been built.")

        if verbose:
            print "Starting building at {}".format(datetime.datetime.now())

        # set up a prediction results dictionary. Hold on tight. This gets
        # hairy real fast.
        resDict = {}

        num_models = self.statsmodel_set.all().count()
        num_finished = 0
        overall_start_time = datetime.datetime.now()
        for statsModel in self.statsmodel_set.all():
            visitorOptions = json.loads(self.modelVisitorOptions)
            modelVisitor = getattr(visitorModules[self.modelVisitorLibrary], self.modelVisitorTool)(
                statsModel=statsModel, **visitorOptions)
            # Train the model.
            statsModel.startTime = datetime.datetime.now()
            fileName = os.path.join(settings.MODEL_DIR, '{}_{}_{}_{}.model'.format(
                self.pk, statsModel.pk, self.modelVisitorLibrary, self.modelVisitorTool))
            statsModel.outputFile = fileName
            if verbose:
                print "{} statsModel {}, saving to {}, training...".format(statsModel.startTime, statsModel.pk, fileName)
            modelVisitor.train(verbose=verbose)
            statsModel.endTime = datetime.datetime.now()
            if verbose:
                print "\t...Trained. Finished at {}. Saving statsModel...".format(statsModel.endTime),
            statsModel.save()
            if verbose:
                print "saved"

            # Test the model.
            for testSet in statsModel.testSets.all():
                if testSet.reactions.all().count() != 0:
                    if verbose:
                        print "Predicting test set..."
                    predictions = modelVisitor.predict(
                        testSet.reactions.all(), verbose=verbose)
                    if verbose:
                        print "\t...finished predicting. Storing predictions...",
                    newResDict = self._storePredictionComponents(
                        predictions, statsModel)

                    # Update the overall result-dictionary with these new
                    # counts.
                    for reaction, responseDict in newResDict.items():
                        for response, outcomeDict in responseDict.items():
                            for outcome, count in outcomeDict.items():
                                if reaction not in resDict:
                                    resDict[reaction] = {}
                                if response not in resDict[reaction]:
                                    resDict[reaction][response] = {}

                                if outcome not in resDict[reaction][response]:
                                    resDict[reaction][response][
                                        outcome] = count
                                resDict[reaction][response][outcome] += count

                    if verbose:
                        print "predictions stored."
                        for response in self.outcomeDescriptors:
                            predDesc = response.predictedDescriptorType.objects.get(
                                modelContainer=self, statsModel=statsModel, predictionOf=response)
                            conf_mtrx = predDesc.getConfusionMatrix()

                            print "Confusion matrix for {}:".format(predDesc.heading)
                            print confusionMatrixString(conf_mtrx)
                            print "Accuracy: {:.3}".format(accuracy(conf_mtrx))
                            print "BCR: {:.3}".format(BCR(conf_mtrx))

                elif verbose:
                    print "Test set is empty."

            if verbose:
                num_finished += 1
                end_time = datetime.datetime.now()
                elapsed = (end_time - overall_start_time)
                expected_finish = datetime.timedelta(seconds=(elapsed.total_seconds(
                ) * (num_models / float(num_finished)))) + overall_start_time
                print "{}. {} of {} models built.".format(end_time, num_finished, num_models)
                print "Elapsed model building time: {}. Expected completion time: {}".format(elapsed, expected_finish)

        if resDict:
            if verbose:
                print "Storing overall model predictions...",
            self._storePredictions(resDict)
            if verbose:
                print "Predictions stored"

        self.built = True
        if verbose:
            overall_end_time = datetime.datetime.now()
            print "Finished at {}".format(overall_end_time)
Пример #5
0
    def predict(self, reactions, verbose=False):
        """Make predictions from the voting for a set of provided reactions."""
        if self.built:
            resDict = {}

            num_models = self.statsmodel_set.all().count()
            num_finished = 0
            overall_start_time = datetime.datetime.now()
            for model in self.statsmodel_set.all():
                visitorOptions = json.loads(self.modelVisitorOptions)
                modelVisitor = getattr(
                    visitorModules[self.modelVisitorLibrary],
                    self.modelVisitorTool)(statsModel=model, **visitorOptions)
                if verbose:
                    logger.info(
                        "statsModel {}, saved at {}, predicting...".format(
                            model.pk, model.outputFile))
                predictions = modelVisitor.predict(reactions, verbose=verbose)
                if verbose:
                    logger.info(
                        "\t...finished predicting. Storing predictions...")
                newResDict = self._storePredictionComponents(
                    predictions, model)

                # Update the overall result-dictionary with these new counts.
                for reaction, responseDict in newResDict.items():
                    for response, outcomeDict in responseDict.items():
                        for outcome, count in outcomeDict.items():
                            if reaction not in resDict:
                                resDict[reaction] = {}
                            if response not in resDict[reaction]:
                                resDict[reaction][response] = {}

                            if outcome not in resDict[reaction][response]:
                                resDict[reaction][response][outcome] = count
                            resDict[reaction][response][outcome] += count

                if verbose:
                    logger.info("predictions stored.")
                    for response in self.outcomeDescriptors:
                        predDesc = response.predictedDescriptorType.objects.get(
                            modelContainer=self,
                            statsModel=model,
                            predictionOf=response)
                        conf_mtrx = predDesc.getConfusionMatrix(
                            reactions=reactions)

                        logger.info("Confusion matrix for {}:".format(
                            predDesc.heading))
                        logger.info(confusionMatrixString(conf_mtrx))
                        logger.info("Accuracy: {:.3}".format(
                            accuracy(conf_mtrx)))
                        logger.info("BCR: {:.3}".format(BCR(conf_mtrx)))
                        logger.info("Matthews: {:.3}".format(
                            Matthews(conf_mtrx)))
                    num_finished += 1
                    end_time = datetime.datetime.now()
                    elapsed = (end_time - overall_start_time)
                    expected_finish = datetime.timedelta(
                        seconds=(elapsed.total_seconds() *
                                 (num_models /
                                  float(num_finished)))) + overall_start_time
                    logger.info("{}. Predictions from {} of {} models.".format(
                        end_time, num_finished, num_models))
                    logger.info(
                        "Elapsed prediction time: {}. Expected completion time: {}"
                        .format(elapsed, expected_finish))

            return self._storePredictions(resDict)
        else:
            raise RuntimeError(
                'A model container cannot be used to make predictions before the build method has been called'
            )
Пример #6
0
    def build(self, verbose=False):
        """
        Take all options confirmed so far and generate a full model set.

        Train a mutlitude of models using the external libraries selected, then save the
        relevant information to the database (see the statsmodel class.)

        Run the tests for the model using the test sets of data, and then saves that information.

        """
        if self.built:
            raise RuntimeError(
                "Cannot build a model that has already been built.")

        if verbose:
            logger.info("Starting building at {}".format(
                datetime.datetime.now()))

        # set up a prediction results dictionary. Hold on tight. This gets
        # hairy real fast.
        resDict = {}

        num_models = self.statsmodel_set.all().count()
        num_finished = 0
        overall_start_time = datetime.datetime.now()
        for statsModel in self.statsmodel_set.all():
            visitorOptions = json.loads(self.modelVisitorOptions)
            modelVisitor = getattr(visitorModules[self.modelVisitorLibrary],
                                   self.modelVisitorTool)(
                                       statsModel=statsModel, **visitorOptions)
            # Train the model.
            statsModel.startTime = datetime.datetime.now()
            # this filname stuff seems not needed
            fileName = os.path.join(
                settings.STATS_MODEL_LIBS_DIR,
                '{}_{}_{}_{}.model'.format(self.pk, statsModel.pk,
                                           self.modelVisitorLibrary,
                                           self.modelVisitorTool))
            statsModel.outputFile = fileName
            if verbose:
                logger.info(
                    "{} statsModel {}, saving to {}, training...".format(
                        statsModel.startTime, statsModel.pk, fileName))
            modelVisitor.train(verbose=verbose)
            statsModel.endTime = datetime.datetime.now()
            if verbose:
                logger.info(
                    "\t...Trained. Finished at {}. Saving statsModel...".
                    format(statsModel.endTime)),
            statsModel.save()
            if verbose:
                logger.info("saved")

            # Test the model.
            for testSet in statsModel.testSets.all():
                if testSet.reactions.all().count() != 0:
                    if verbose:
                        logger.info("Predicting test set...")
                    predictions = modelVisitor.predict(testSet.reactions.all(),
                                                       verbose=verbose)
                    if verbose:
                        logger.info(
                            "\t...finished predicting. Storing predictions...",
                        )
                    newResDict = self._storePredictionComponents(
                        predictions, statsModel)

                    # Update the overall result-dictionary with these new
                    # counts.
                    for reaction, responseDict in newResDict.items():
                        for response, outcomeDict in responseDict.items():
                            for outcome, count in outcomeDict.items():
                                if reaction not in resDict:
                                    resDict[reaction] = {}
                                if response not in resDict[reaction]:
                                    resDict[reaction][response] = {}

                                if outcome not in resDict[reaction][response]:
                                    resDict[reaction][response][
                                        outcome] = count
                                resDict[reaction][response][outcome] += count

                    if verbose:
                        logger.info("predictions stored.")
                        for response in self.outcomeDescriptors:
                            predDesc = response.predictedDescriptorType.objects.get(
                                modelContainer=self,
                                statsModel=statsModel,
                                predictionOf=response)
                            conf_mtrx = predDesc.getConfusionMatrix()

                            logger.info("Confusion matrix for {}:".format(
                                predDesc.heading))
                            logger.info(confusionMatrixString(conf_mtrx))
                            logger.info("Accuracy: {:.3}".format(
                                accuracy(conf_mtrx)))
                            logger.info("BCR: {:.3}".format(BCR(conf_mtrx)))

                elif verbose:
                    logger.info("Test set is empty.")

            if verbose:
                num_finished += 1
                end_time = datetime.datetime.now()
                elapsed = (end_time - overall_start_time)
                expected_finish = datetime.timedelta(seconds=(
                    elapsed.total_seconds() *
                    (num_models / float(num_finished)))) + overall_start_time
                logger.info("{}. {} of {} models built.".format(
                    end_time, num_finished, num_models))
                logger.info(
                    "Elapsed model building time: {}. Expected completion time: {}"
                    .format(elapsed, expected_finish))

        if resDict:
            if verbose:
                logger.info("Storing overall model predictions...")
            self._storePredictions(resDict)
            if verbose:
                logger.info("Predictions stored")
        self.built = True
        print("here!")
        if verbose:
            overall_end_time = datetime.datetime.now()
            logger.info("Finished at {}".format(overall_end_time))