Пример #1
0
    def test_it_is_convertable_to_a_dict_as_a_shallow_copy(self):
        Manager = PropertiesManager()
        self.assertFalse(isinstance(Manager, dict))
        MD = Manager.toDict()
        self.assertTrue(isinstance(MD, dict))

        for Key in MD.keys():
            self.assertEqual(Manager[Key], MD[Key])
Пример #2
0
    def test_it_reassings_properties(self, Services: MagicMock):
        PM = PropertiesManager()
        PM.classifier = 'is_cancer'

        Services.getService.side_effect = lambda Key, __: PM if Key == 'properties' else MagicMock(
        )

        Pipe = Pipeline.Factory.getInstance()
        Pipe.pipe(MagicMock(), MagicMock(), MagicMock(), MagicMock(),
                  {'classifier': 'doid'})

        self.assertEqual('doid', PM.classifier)
Пример #3
0
    def test_it_initlializes_early_stopping_callback( self ):
        PM = PropertiesManager()
        PM.training[ 'patience' ] = 50

        FFN = ModelBaseSpec.StubbedFFN( PM, MagicMock( spec = Sequential ) )
        FFN.train( MagicMock(), MagicMock() )

        self.__Stopper.assert_called_once_with(
            monitor = 'val_loss',
            mode = 'min',
            verbose = 1,
            patience = PM.training[ 'patience' ]
        )
Пример #4
0
    def setUp(self):
        self.__DP = patch(
            'biomed.vectorizer.selector.selector_manager.DependencySelector',
            spec=Selector)

        self.__FP = patch(
            'biomed.vectorizer.selector.selector_manager.FactorSelector',
            spec=Selector)

        self.__LVP = patch(
            'biomed.vectorizer.selector.selector_manager.LinearVectorSelector',
            spec=Selector)

        self.__LLVP = patch(
            'biomed.vectorizer.selector.selector_manager.LogisticRegressionSelector',
            spec=Selector)

        self.__D = self.__DP.start()
        self.__F = self.__FP.start()
        self.__LV = self.__LVP.start()
        self.__LR = self.__LLVP.start()

        self.__ReferenceSelector = MagicMock(spec=Selector)
        self.__D.return_value = self.__ReferenceSelector
        self.__PM = PropertiesManager()
Пример #5
0
    def test_it_gets_the_training_evaluation_for_multi_processing( self ):
        Model = MagicMock( spec = Sequential )
        X = InputData( MagicMock(), MagicMock(), MagicMock() )
        Y = InputData( NP.zeros( ( 2, 3 ) ), MagicMock(), MagicMock() )

        PM = PropertiesManager()
        PM[ "training" ][ "epochs" ] = 1
        PM[ "training" ][ "batch_size" ] = 2
        PM[ "training" ][ "workers" ] = 3

        self.__Loader.return_value = Model

        FFN = ModelBaseSpec.StubbedFFN( PM, Model )
        FFN.train( X, Y )
        FFN.getTrainingScore( X, Y )

        Model.evaluate.assert_called_once_with(
            X.Test,
            Y.Test,
            batch_size = PM[ "training" ][ "batch_size" ],
            workers = PM[ "training" ][ "workers" ],
            use_multiprocessing = True,
            return_dict = True,
            verbose = 0
        )
Пример #6
0
    def test_it_fails_if_the_model_was_not_trained_while_predicting( self ):
        Model = MagicMock( spec = Sequential )
        ToPredict = MagicMock()

        FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), Model )
        with self.assertRaises( RuntimeError, msg = "The model has not be trained" ):
            FFN.predict( ToPredict )
Пример #7
0
def printResults(Predictions):
    def outputResults(prediction: list):
        output_predictions = f"pmid,{ pm.classifier }\n"
        found_targets = list()
        found_pmids = list()
        for index in range(0, len(prediction[0])):
            output_predictions += f"{prediction[ 2 ][ 'pmid' ].iloc[ index ]},{prediction[ 1 ][ index ]}\n"
            if prediction[0][index] != 0:
                found_targets.append(prediction[1][index])
                found_pmids.append(prediction[2]['pmid'].iloc[index])

        print('number of cancer predictions found_targets:',
              len(found_targets))
        counter = collections.Counter(found_targets)
        print('(doid, count):', counter.most_common())
        print('cancer found in articles with PMID:', found_pmids)
        return output_predictions

    pm = PropertiesManager()
    for key in Predictions:
        print("Configuration ID ", key)
        output_predictions = outputResults(Predictions[key])
        path = OS.path.abspath(
            OS.path.join(
                OS.path.dirname(__file__), "..", "results",
                f"{'blind_' if pm.is_blind else ''}{'binary' if pm.classifier == 'is_cancer' else 'multi'}_{pm.model}_{pm.preprocessing['variant']}_{ datetime.now().strftime('%Y-%m-%d_%H-%M-%S') }_{ key }.csv"
            ))
        with open(path, "w") as file:
            file.write(output_predictions)
Пример #8
0
    def test_it_predicts_with_mulitprocessing( self ):
        Model = MagicMock( spec = Sequential )
        ToPredict = MagicMock()
        Model.predict.return_value = NP.array( [ [ 0., 0. ] ] )

        PM = PropertiesManager()
        PM[ "training" ][ "epochs" ] = 1
        PM[ "training" ][ "batch_size" ] = 2
        PM[ "training" ][ "workers" ] = 2

        X = InputData( MagicMock(), MagicMock(), MagicMock() )
        Y = InputData( NP.zeros( ( 2, 3 ) ), MagicMock(), MagicMock() )

        self.__Loader.return_value = Model

        FFN = ModelBaseSpec.StubbedFFN( PM, Model )
        FFN.train( X, Y )
        FFN.predict( ToPredict )

        Model.predict.assert_called_once_with(
            ToPredict,
            batch_size = PM.training['batch_size'],
            workers = PM.training[ "workers" ],
            use_multiprocessing = True
        )
Пример #9
0
    def test_it_returns_normalized_binary_classified_data( self ):
        Model = MagicMock( spec = Sequential )
        ToPredict = MagicMock()

        PM = PropertiesManager()
        PM[ "training" ][ "epochs" ] = 1
        PM[ "training" ][ "batch_size" ] = 2
        PM[ "training" ][ "workers" ] = 1

        X = InputData( MagicMock(), MagicMock(), MagicMock() )
        Y = InputData( NP.zeros( ( 2, 2 ) ), MagicMock(), MagicMock() )

        Model.predict.return_value = NP.array(
            [
                [ 0.0, 0.00 ],
                [ 0.0, 0.98867947 ],
                [ 0.0, 0.00 ]
             ]
        )

        self.__Loader.return_value = Model


        FFN = ModelBaseSpec.StubbedFFN( PM, Model )
        FFN.train( X, Y )
        arrayEqual(
            FFN.predict( ToPredict ),
            NP.array( [ 0, 1, 0 ] )
        )
Пример #10
0
    def test_it_does_nothing_the_saved_model_if_it_is_not_there( self ):
        self.__Path.exists.return_value = False

        FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), MagicMock( spec = Sequential ) )
        FFN.train( MagicMock(), MagicMock() )

        self.__Remover.assert_not_called()
Пример #11
0
    def test_it_trains_a_model_with_class_weights_if_weights_are_given( self ):
        Model = MagicMock( spec = Sequential )
        X = InputData( MagicMock(), MagicMock(), MagicMock() )
        Y = InputData( MagicMock(), MagicMock(), MagicMock() )
        Weights = MagicMock()

        PM = PropertiesManager()
        PM[ "training" ][ "epochs" ] = 1
        PM[ "training" ][ "batch_size" ] = 2
        PM[ "training" ][ "workers" ] = 2

        FFN = ModelBaseSpec.StubbedFFN( PM, Model, Weights )
        FFN.train( X, Y )

        Model.fit.assert_called_once_with(
            x = X.Training,
            y = Y.Training,
            class_weight = Weights,
            shuffle = True,
            epochs = PM[ "training" ][ "epochs" ],
            batch_size = PM[ "training" ][ "batch_size" ],
            validation_data = ( X.Validation, Y.Validation ),
            workers = PM[ "training" ][ "workers" ],
            use_multiprocessing = True,
            callbacks = [ self.__Stopper, self.__Checkpoint ]
        )
Пример #12
0
        def fakeLocator(ServiceKey, Type):
            if ServiceKey != "properties":
                raise RuntimeError("Unexpected ServiceKey")

            if Type != PropertiesManager:
                raise RuntimeError("Unexpected Type")

            return PropertiesManager()
Пример #13
0
    def setUp(self):
        self.__PM = PropertiesManager()
        self.__Simple = MagicMock(spec=FileWriter)
        self.__JSON = MagicMock(spec=FileWriter)
        self.__CSV = MagicMock(spec=FileWriter)

        self.__mkdirM = patch('biomed.evaluator.std_evaluator.mkdir')
        self.__mkdir = self.__mkdirM.start()
        self.__checkDirM = patch('biomed.evaluator.std_evaluator.checkDir')
        self.__checkDir = self.__checkDirM.start()
        self.__checkDir.return_value = True
        self.__TimeM = patch('biomed.evaluator.std_evaluator.Time')
        self.__Time = self.__TimeM.start()
        self.__TimeObj = MagicMock(spec=datetime)
        self.__Time.now.return_value = self.__TimeObj
        self.__TimeValue = '2020-07-25_14-53-36'
        self.__TimeObj.strftime.return_value = self.__TimeValue
Пример #14
0
        def fakeLocator(ServiceKey, Type):
            if ServiceKey not in Dependencies:
                raise RuntimeError("Unexpected ServiceKey")

            if Type != Dependencies[ServiceKey]:
                raise RuntimeError("Unexpected Type")

            return PropertiesManager()
Пример #15
0
    def test_it_removes_the_saved_model_if_it_is_there( self ):
        FileName = 'op'
        self.__Path.exists.return_value = True
        self.__Path.join.return_value = FileName

        FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), MagicMock( spec = Sequential ) )
        FFN.train( MagicMock(), MagicMock() )

        self.__Remover.assert_called_once_with( FileName )
Пример #16
0
    def test_it_initilizes_the_properties_mananger(self, Locator: MagicMock,
                                                   M: MagicMock):
        self.__fullfillDepenendcies(Locator)
        PM = PropertiesManager()
        M.return_value = PM

        Services.startServices()

        M.assert_called_once()
        Locator.set.assert_any_call("properties", PM)
Пример #17
0
    def __initPreprocessorDependencies(self):
        self.__PM = PropertiesManager()
        self.__FakeCache = {}
        self.__FakeCache2 = {}
        self.__Complex = StubbedNormalizerFactory(["n", "v", "a"])
        self.__Simple = StubbedNormalizerFactory(["s", "l", "w"])
        self.__Shared = StubbedCache(self.__FakeCache)
        self.__FileCache = StubbedCache(self.__FakeCache2)

        self.__PM.preprocessing["workers"] = 1
Пример #18
0
    def test_it_loads_the_best_model( self ):
        PM = PropertiesManager()
        PM.training[ 'patience' ] = 50

        FileName = 'kjf'
        self.__Path.join.return_value = FileName

        Best = MagicMock()
        self.__Loader.return_value = Best

        FFN = ModelBaseSpec.StubbedFFN( PM, MagicMock( spec = Sequential ) )
        FFN.train( MagicMock(), MagicMock() )

        self.__Loader.assert_called_once_with( FileName, custom_objects = None )

        self.assertEqual(
            Best,
            FFN._Model
        )
Пример #19
0
    def test_it_uses_the_given_selectors(self):
        Selectors = {
            "dependency": self.__D,
            "factor": self.__F,
            "linearVector": self.__LV,
            "logisticRegression": self.__LR,
        }

        for SelectorKey in Selectors:
            PM = PropertiesManager()
            PM.selection['type'] = SelectorKey

            def fakeLocator(_, __):
                return PM

            ServiceGetter = MagicMock()
            ServiceGetter.side_effect = fakeLocator

            MyManager = SelectorManager.Factory.getInstance(ServiceGetter)
            MyManager.build(MagicMock(), MagicMock(), MagicMock())

            Selectors[SelectorKey].assert_called_once_with(PM)
Пример #20
0
    def test_it_returns_normalized_multi_classified_data( self ):
        Model = MagicMock( spec = Sequential )
        ToPredict = MagicMock()

        PM = PropertiesManager()
        PM[ "training" ][ "epochs" ] = 1
        PM[ "training" ][ "batch_size" ] = 2
        PM[ "training" ][ "workers" ] = 1
        PM.classifier = 'doid'

        X = InputData( MagicMock(), MagicMock(), MagicMock() )
        Y = InputData( NP.zeros( ( 4, 4 ) ), MagicMock(), MagicMock() )

        Model.predict.return_value = NP.array( [ [ -0.00716622, 23 ], [ -23, -98.98867947 ], [ -42, 12 ] ] )

        self.__Loader.return_value = Model

        FFN = ModelBaseSpec.StubbedFFN( PM, Model )
        FFN.train( X, Y )
        arrayEqual(
            FFN.predict( ToPredict ),
            NP.array( [ 1, 0, 1 ] )
        )
Пример #21
0
    def test_it_initlializes_the_checkpoint_callback( self ):
        FileName = 'mbc'
        self.__Path.join.return_value = FileName

        FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), MagicMock( spec = Sequential ) )
        FFN.train( MagicMock(), MagicMock() )

        self.__Checkpoint.assert_called_once_with(
            FileName,
            monitor = 'val_accuracy',
            mode = 'max',
            verbose = 1,
            save_best_only = True
        )
    def setUp(self):
        self.__Data = DataFrame(
            {
                'pmid': ['1a', '2a', '3a', '4a'],
                'text': [
                    "My little cute Poney is a Poney",
                    "My little farm is cute.",
                    "My little programm is a application and runs and runs and runs.",
                    "My little keyboard is to small"
                ],
                'is_cancer': [0, 1, 1, 0],
                'doid': [-1, 1, 2, -1],
                'cancer_type': ['no cancer', 'cancer', 'cancer', 'no cancer'],
            },
            columns=['pmid', 'cancer_type', 'doid', 'is_cancer', 'text'])

        self.__INDF = patch(
            'biomed.text_mining.text_mining_controller.InputData')

        self.__PM = PropertiesManager()
        self.__Encoder = MagicMock(spec=CategoriesEncoder)
        self.__FacilityManager = MagicMock(spec=FacilityManager)
        self.__Splitter = MagicMock(spec=Splitter)
        self.__Preprocessor = MagicMock(spec=Preprocessor)
        self.__Vectorizer = MagicMock(spec=Vectorizer)
        self.__Measurer = MagicMock(spec=Measurer)
        self.__MLP = MagicMock(spec=MLP)
        self.__Evaluator = MagicMock(spec=Evaluator)
        self.__IND = self.__INDF.start()
        self.__IND.Training = MagicMock()
        self.__IND.Training.shape = (MagicMock(), MagicMock())
        self.__IND.Validation = MagicMock()
        self.__IND.Test = MagicMock()

        self.__FacilityManager.clean.return_value = self.__Data

        self.__Splitter.trainingSplit.return_value = [(MagicMock(),
                                                       MagicMock())]
        self.__Splitter.validationSplit.return_value = (MagicMock(),
                                                        MagicMock())

        self.__Preprocessor.preprocessCorpus.return_value = MagicMock()

        TrainFeatures = MagicMock()
        TrainFeatures.tolist.return_value = []
        self.__Vectorizer.featureizeTrain.return_value = TrainFeatures
        self.__Vectorizer.featureizeTest.return_value = MagicMock()
        self.__Vectorizer.getSupportedFeatures.return_value = MagicMock()
        self.__Measurer.measureClassWeights.return_value = MagicMock()
Пример #23
0
    def test_it_retruns_the_evaluation_score( self ):
        Eval = MagicMock()
        Model = MagicMock( spec = Sequential )
        Model.evaluate.return_value = Eval
        self.__Loader.return_value = Model

        X = InputData( MagicMock(), MagicMock(), MagicMock() )
        Y = InputData( NP.zeros( ( 2, 3 ) ), MagicMock(), MagicMock() )

        FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), Model )
        FFN.train( X, Y )
        self.assertEqual(
            FFN.getTrainingScore( X, Y ),
            Eval
        )
Пример #24
0
    def test_it_returns_the_training_history( self ):
        Hist = MagicMock()
        Hist.history = Hist
        Model = MagicMock( spec = Sequential )
        Model.fit.return_value = Hist

        PM = PropertiesManager()
        PM[ "training" ][ "epochs" ] = 1
        PM[ "training" ][ "batch_size" ] = 2
        PM[ "training" ][ "workers" ] = 2

        FFN = ModelBaseSpec.StubbedFFN( PM, Model )
        self.assertEqual(
            FFN.train( MagicMock(), MagicMock() ),
            Hist
        )
Пример #25
0
        def fullfill(ServiceKey: str, _):
            Pair = {
                "properties":
                PropertiesManager(),
                "preprocessor":
                MagicMock(spec=Preprocessor),
                "preprocessor.cache.persistent":
                MagicMock(spec=Cache),
                "preprocessor.cache.shared":
                MagicMock(spec=Cache),
                "preprocessor.normalizer.simple":
                MagicMock(spec=NormalizerFactory),
                "preprocessor.normalizer.complex":
                MagicMock(spec=NormalizerFactory),
                "vectorizer":
                MagicMock(spec=Vectorizer),
                "vectorizer.selector":
                MagicMock(spec=Selector),
                "evaluator":
                MagicMock(spec=Evaluator),
                "evaluator.simple":
                MagicMock(spec=FileWriter),
                "evaluator.json":
                MagicMock(spec=FileWriter),
                "evaluator.csv":
                MagicMock(spec=FileWriter),
                "facilitymanager":
                MagicMock(spec=FacilityManager),
                "splitter":
                MagicMock(spec=Splitter),
                "measurer":
                MagicMock(spec=Measurer),
                "categories":
                MagicMock(spec=CategoriesEncoder),
                "mlp":
                MagicMock(spec=MLP)
            }

            return Pair[ServiceKey]
Пример #26
0
    def generate_stats_and_write_to_file(self, preds):
        pm = PropertiesManager()
        preds_target = self.Y_test_75_multi if pm.classifier == "doid" else self.Y_test_75_binary
        preds = preds if pm.classifier == 'doid' else self.convert_buggy_doid_to_real_binary(preds)
        if len(preds) != len(preds_target):
            print('Prediction lengths don\'t match')
            return

        tp = 0
        for i in range(len(preds)):
            if preds[i] == preds_target[i]:
                if preds[i] not in ('0', '-1'):
                    tp += 1
        score = classification_report(preds_target, preds)
        f1_score_macro_accurate = f1_score(preds_target, preds, average='macro')
        print('classification report:\n', score,
              '\nmore accurate macro f1 score', f1_score_macro_accurate,
              '\n correctly predicted:', tp)

        with open('../results/all_results.txt', 'a') as file:
            file.write(f"classifier={pm.classifier}, model={pm.model}, preprocessing={pm.preprocessing['variant']}, "
                       f"ngrams={pm.tfidf_transformation_properties['ngram_range'][1]}\n"
                       f"{score}\nf1_score_macro_accurate={f1_score_macro_accurate}\ntrue positives={tp}\n\n")
Пример #27
0
 def setUp(self):
     self.__PM = PropertiesManager()
Пример #28
0
    def test_it_fails_if_the_model_was_not_trained_while_evaluating( self ):
        Model = MagicMock( spec = Sequential )

        FFN = ModelBaseSpec.StubbedFFN( PropertiesManager(), Model )
        with self.assertRaises( RuntimeError, msg = "The model has not be trained" ):
            FFN.getTrainingScore( MagicMock(), MagicMock() )
    def __fakeLocator( self, _, __ ):
        PM = PropertiesManager()
        PM.cache_dir = NumpyArrayFileCacheSpec.__Path

        return PM
Пример #30
0
 def setUp( self ):
     self.__PM = PropertiesManager()
     self.__PM.splitting[ 'folds' ] = 1