示例#1
0
 def runWithoutWndchrm(self):
     print "Loading the classifier"
     classifier = data_io.load_model()
     imageCollections = data_io.get_valid_df()
     featureGetter = FeatureGetter()
     print "Getting the features"
     fileName = data_io.get_savez_name_test()
     if not self.load:  #Last features calculated from candidates
         (namesObservations, coordinates,
          valid) = Utils.calculateFeatures(fileName, featureGetter,
                                           imageCollections)
     else:
         (namesObservations, coordinates,
          valid) = Utils.loadFeatures(fileName)
     print "Making predictions"
     #valid = normalize(valid, axis=0) #askdfhashdf
     predictions = classifier.predict(valid)
     predictions = predictions.reshape(len(predictions), 1)
     print "Writing predictions to file"
     data_io.write_submission(namesObservations, coordinates, predictions)
     data_io.write_submission_nice(namesObservations, coordinates,
                                   predictions)
     print "Calculating final results"
     return Predictor.finalResults(namesObservations, predictions,
                                   coordinates)
示例#2
0
 def prepareEnvironment(self):
     # People want to save time
     testingPath = os.path.join(data_io.get_testing_folder(),
                                data_io.get_test_folder())
     testingPathOld = os.path.join(data_io.get_testing_old_folder(),
                                   data_io.get_test_folder())
     Utils.shift(data_io.get_testing_old_folder(), testingPathOld,
                 data_io.get_test_folder(), testingPath)
     os.mkdir(testingPath)
     if not self.load:
         Utils.shift('.', data_io.get_savez_name_test(),
                     data_io.get_savez_name_test(),
                     data_io.get_savez_name_test())
     if not self.loadWndchrm:
         Utils.shift('.', data_io.get_wndchrm_dataset_test(),
                     data_io.get_wndchrm_dataset_test(),
                     data_io.get_wndchrm_dataset_test())
示例#3
0
 def run(self):
     print "Preparing the environment"
     self.prepareEnvironment()
     print "Loading the classifier"
     classifier = data_io.load_model()
     imageCollections = data_io.get_valid_df()
     featureGetter = FeatureGetter()
     wndchrmWorker = WndchrmWorkerPredict()
     print "Getting the features"
     if not self.loadWndchrm:  #Last wndchrm set of features
         fileName = data_io.get_savez_name_test()
         if not self.load:  #Last features calculated from candidates
             (namesObservations, coordinates,
              _) = Utils.calculateFeatures(fileName, featureGetter,
                                           imageCollections)
         else:
             (namesObservations, coordinates,
              _) = Utils.loadFeatures(fileName)
         print "Saving images"
         imageSaver = ImageSaver(coordinates, namesObservations,
                                 imageCollections, featureGetter.patchSize)
         imageSaver.saveImages()
         print "Executing wndchrm algorithm"
         valid = wndchrmWorker.executeWndchrm(namesObservations)
     else:
         (valid, namesObservations) = wndchrmWorker.loadWndchrmFeatures()
     print "Making predictions"
     predictions = classifier.predict(valid)
     predictions = predictions.reshape(len(predictions), 1)
     print "Writing predictions to file"
     data_io.write_submission(namesObservations, coordinates, predictions)
     data_io.write_submission_nice(namesObservations, coordinates,
                                   predictions)
     print "Calculating final results"
     return Predictor.finalResults(namesObservations, predictions,
                                   coordinates)
示例#4
0
    def run(self, k=3, useOnlyRF=True):
        featureGetter = FeatureGetter()
        fileNameTrain = data_io.get_savez_name()
        fileNameTest = data_io.get_savez_name_test()
        print "Merging files..."
        (namesObservations, coordinates, dataset) = self.mergeFiles(fileNameTrain, fileNameTest)
        dataset = dataset[:,self.filterIndexes(len(dataset[0]))]        
        print "Shuffling and splitting the data"
        indexesChanged = np.arange(len(dataset))
        np.random.shuffle(indexesChanged)
        splittedNamesObs = self.getShuffledSplits(namesObservations, indexesChanged, k+1)
        splittedCoords = self.getShuffledSplits(coordinates, indexesChanged, k+1)
        splittedData = self.getShuffledSplits(dataset, indexesChanged, k+1)
        
        """Leave the last split for testing"""
        testNamesObs = splittedNamesObs[k]
        testCoords = splittedCoords[k]
        testDataset = splittedData[k]
        
        splittedNamesObs = splittedNamesObs[:k]
        splittedCoords = splittedCoords[:k]
        splittedData = splittedData[:k]
        
        del(dataset)
        del(coordinates)
        del(namesObservations)
        del(indexesChanged)

        bestModel = None
        bestFmeasure = 0
        
        for i in range(k-1,-1,-1):#i is the index of the validation
            print "Doing cross-validation for i=%d" %i    
            namesObservationsValid = splittedNamesObs[i]
            coordinatesValid = splittedCoords[i]
            datasetValid = splittedData[i]
            namesObservationsValid = np.reshape(namesObservationsValid, namesObservationsValid.shape[0])
            namesObservationsTrain = self.getTrainData(splittedNamesObs,i)
            coordinatesTrain = self.getTrainData(splittedCoords,i)
            datasetTrain = self.getTrainData(splittedData, i)
            namesObservationsTrain = np.reshape(namesObservationsTrain, namesObservationsTrain.shape[0])
            print "Getting target vector"
            (indexes, target, obs) = featureGetter.getTargetVector(coordinatesTrain, namesObservationsTrain, datasetTrain)
            print "Selecting features"
            classifier = RandomForestClassifier(n_estimators=100, verbose=2, n_jobs=1, min_samples_split=1, random_state=1, compute_importances=True)
            model = Pipeline([('scaling', MinMaxScaler()), ('classifying', classifier)])
            model.fit(obs[indexes], target[indexes])
            if not useOnlyRF:
                importances = classifier.feature_importances_
                filterImportances = np.where(importances > 0.0001)[0]
                print len(filterImportances)
                #namesObservationsTrain = np.reshape(namesObservationsTrain, namesObservationsTrain.shape[0]) 
                print "Training model"
                #classifier = RandomForestClassifier(n_estimators=500, verbose=2, n_jobs=1, min_samples_split=100, random_state=1, compute_importances=True)
                #classifier = KNeighborsClassifier()
                classifier = LinearSVC(verbose=1)
                #classifier = MLPClassifier(verbose=1)
                model = Pipeline([('scaling', MinMaxScaler()), ('classifying', classifier)])
                model.fit(obs[indexes][:,filterImportances], target[indexes])
            print "Making predictions"
            if not useOnlyRF:
                predictions = model.predict(datasetValid[:,filterImportances])
            else:
                predictions = model.predict(datasetValid)
            predictions = predictions.reshape(len(predictions), 1)
            print "Calculating validation results"
            [_, _, _, _, _, fmeasure, _] = Predictor.finalResults(namesObservationsValid, predictions, coordinatesValid)
            if fmeasure > bestFmeasure:
                bestFmeasure = fmeasure
                bestModel = model
            del(datasetTrain)
            del(datasetValid)
            del(coordinatesTrain)
            del(coordinatesValid)
            del(namesObservationsTrain)
            del(namesObservationsValid)
        
        print "Calculating final results"
        predictions = bestModel.predict(testDataset)
        print "The final score is: "
        testNamesObs = np.reshape(testNamesObs, testNamesObs.shape[0])
        Predictor.finalResults(testNamesObs, predictions, testCoords)
示例#5
0
    def run(self, k=3, patientSplit=True, useOnlyRF=True, breakin2=True):
        featureGetter = FeatureGetter()
        overallTP = 0
        overallFP = 0
        overallFN = 0
        fileNameTrain = data_io.get_savez_name()
        fileNameTest = data_io.get_savez_name_test()
        print "Merging files..."
        (namesObservations, coordinates, dataset) = self.mergeFiles(fileNameTrain, fileNameTest)

        dataset = dataset[:,self.filterIndexes(len(dataset[0]))]        
        print "Shuffling and splitting the data"
        indexesChanged = np.arange(len(dataset))
        np.random.shuffle(indexesChanged)
        if patientSplit:
            k = 12
            (splittedNamesObs, splittedCoords, splittedData) = self.getSplits(namesObservations, coordinates, dataset)
            if breakin2:
                k = 2
                (splittedNamesObs, splittedCoords, splittedData) = self.getNewSplits(splittedNamesObs, splittedCoords, splittedData)
        else:
            splittedNamesObs = self.getShuffledSplits(namesObservations, indexesChanged, k)
            splittedCoords = self.getShuffledSplits(coordinates, indexesChanged, k)
            splittedData = self.getShuffledSplits(dataset, indexesChanged, k)
        
        del(dataset)
        del(coordinates)
        del(namesObservations)
        del(indexesChanged)
        
        overallArrayTP = np.zeros(12)
        overallArrayFP = np.zeros(12)
        overallArrayFN = np.zeros(12)

        for i in range(k-1,-1,-1):#i is the index of the validation
            print "Doing cross-validation for i=%d" %i    
            namesObservationsTest = splittedNamesObs[i]
            coordinatesTest = splittedCoords[i]
            datasetTest = splittedData[i]
            namesObservationsTest = np.reshape(namesObservationsTest, namesObservationsTest.shape[0])
            namesObservationsTrain = self.getTrainData(splittedNamesObs,i)
            coordinatesTrain = self.getTrainData(splittedCoords,i)
            datasetTrain = self.getTrainData(splittedData, i)
            namesObservationsTrain = np.reshape(namesObservationsTrain, namesObservationsTrain.shape[0])
            print "Getting target vector"
    
            (indexes, target, obs) = featureGetter.getTargetVector(coordinatesTrain, namesObservationsTrain, datasetTrain)
        
            print "Selecting features"
            classifier = RandomForestClassifier(n_estimators=100, verbose=2, n_jobs=1, min_samples_split=1, random_state=1, compute_importances=True)
            model = Pipeline([('scaling', MinMaxScaler()), ('classifying', classifier)])
            model.fit(obs[indexes], target[indexes])
            if not useOnlyRF:
                importances = classifier.feature_importances_
                filterImportances = np.where(importances > 0.0001)[0]
                print len(filterImportances)
                #namesObservationsTrain = np.reshape(namesObservationsTrain, namesObservationsTrain.shape[0]) 
                print "Training model"
                #classifier = RandomForestClassifier(n_estimators=500, verbose=2, n_jobs=1, min_samples_split=100, random_state=1, compute_importances=True)
                #classifier = KNeighborsClassifier()
                classifier = LinearSVC(verbose=1)
                #classifier = MLPClassifier(verbose=1)
                model = Pipeline([('scaling', MinMaxScaler()), ('classifying', classifier)])
                model.fit(obs[indexes][:,filterImportances], target[indexes])
            print "Making predictions"
            if not useOnlyRF:
                predictions = model.predict(datasetTest[:,filterImportances])
            else:
                predictions = model.predict(datasetTest)
            predictions = predictions.reshape(len(predictions), 1)
            print "Calculating final results"
            [truePositives, falsePositives, falseNegatives, _, _, _, (arrayTP, arrayFP, arrayFN)] = Predictor.finalResults(namesObservationsTest, predictions, coordinatesTest)
            print arrayTP
            print arrayFP
            print arrayFN
            
            overallArrayTP += arrayTP
            overallArrayFP += arrayFP
            overallArrayFN += arrayFN
            overallTP += truePositives
            overallFP += falsePositives
            overallFN += falseNegatives
            del(datasetTrain)
            del(datasetTest)
            del(coordinatesTrain)
            del(coordinatesTest)
            del(namesObservationsTrain)
            del(namesObservationsTest)
        
        precision = 0 if overallTP+overallFP == 0 else (overallTP+0.0)/(overallTP+overallFP+0.0)
        recall = 0 if overallTP+overallFN == 0 else (overallTP+0.0)/(overallTP+overallFN+0.0)
        fmeasure = 0 if recall+precision == 0 else 2*(precision*recall)/(recall+precision)
        
        print "Overall results for k=%d" %k
        print overallTP
        print overallFP
        print overallFN
        print precision
        print recall
        print fmeasure
        
        for i in range(len(overallArrayTP)):
            "Results for patient number %d:"% (i+1)
            overallTP = overallArrayTP[i]
            overallFP = overallArrayFP[i]
            overallFN = overallArrayFN[i]
            precision = 0 if overallTP+overallFP == 0 else (overallTP+0.0)/(overallTP+overallFP+0.0)
            recall = 0 if overallTP+overallFN == 0 else (overallTP+0.0)/(overallTP+overallFN+0.0)
            fmeasure = 0 if recall+precision == 0 else 2*(precision*recall)/(recall+precision)
            print precision
            print recall
            print fmeasure