def adaboost_radar(domain): '''Run Adaboost classifier trained for a radar data set''' classifier = learned_adaboost.radar_classifiers['malawi'] b = modis_utilities.compute_modis_indices(domain) total = get_adaboost_sum(domain, b, classifier) return total.gte(-1.0) # Just threshold the results at zero (equal chance of flood / not flood)
def adaboost_learn(ignored=None, ignored2=None): '''Train Adaboost classifier''' EVAL_RESOLUTION = 250 # Learn this many weak classifiers NUM_CLASSIFIERS_TO_TRAIN = 50 # Load inputs for this domain and preprocess # - Kashmore does not have a good unflooded comparison location so it is left out of the training. #all_problems = ['kashmore_2010_8.xml', 'mississippi_2011_5.xml', 'mississippi_2011_6.xml', 'new_orleans_2005_9.xml', 'sf_bay_area_2011_4.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] #training_domains = [domain.unflooded_domain for domain in all_domains[:-1]] + [all_domains[-1]] # SF is unflooded # This is a cleaned up set where all the permanent water masks are known to be decent. all_problems = ['unflooded_mississippi_2010.xml', 'unflooded_new_orleans_2004.xml', 'sf_bay_area_2011_4.xml', 'unflooded_bosnia_2013.xml'] #all_problems.extend(['arkansas_city_2011_5.xml', 'baghlan_south_2014_6.xml', # 'bosnia_west_2014_5.xml', 'kashmore_north_2010_8.xml', 'slidell_2005_9.xml']) #all_problems = ['unflooded_mississippi_2010_5.xml'] all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # Try testing on radar domains #all_problems = ['rome.xml',] # #'malawi_2015_1.xml',] # #'mississippi.xml'] #all_domains = [Domain('config/domains/sentinel1/rome.xml'),] # #Domain('config/domains/sentinel1/malawi_2015_1.xml'), # #Domain('config/domains/uavsar/mississippi.xml')] #training_domains = [domain.training_domain for domain in all_domains] ## Try testing on Skybox images #all_problems = [#'gloucester_2014_10.xml',] # TODO: Need dense training data for the other images!!! # #'new_bedford_2014_10.xml', # #'sumatra_2014_10.xml',] # 'malawi_2015.xml',] #all_domains = [Domain('config/domains/skybox/' + d) for d in all_problems] #training_domains = [domain.training_domain for domain in all_domains] ## Add a bunch of lakes to the training data #lake_problems = ['Amistad_Reservoir/Amistad_Reservoir_2014-07-01_train.xml', # 'Cascade_Reservoir/Cascade_Reservoir_2014-09-01_train.xml', # 'Edmund/Edmund_2014-07-01_train.xml', # 'Hulun/Hulun_2014-07-01_train.xml', # 'Keeley/Keeley_2014-06-01_train.xml', # 'Lake_Mead/Lake_Mead_2014-09-01_train.xml', # 'Miguel_Aleman/Miguel_Aleman_2014-08-01_train.xml', # 'Oneida_Lake/Oneida_Lake_2014-06-01_train.xml', # 'Quesnel/Quesnel_2014-08-01_train.xml', # 'Shuswap/Shuswap_2014-08-01_train.xml', # 'Trikhonis/Trikhonis_2014-07-01_train.xml', # 'Pickwick_Lake/Pickwick_Lake_2014-07-01_train.xml', # 'Rogoaguado/Rogoaguado_2014-08-01_train.xml', # 'Zapatosa/Zapatosa_2014-09-01_train.xml'] #lake_domains = [Domain('/home/smcmich1/data/Floods/lakeStudy/' + d) for d in lake_problems] #all_problems += lake_problems #all_domains += lake_domains #all_problems = ['unflooded_mississippi_2010.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] #all_problems = ['sf_bay_area_2011_4.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # #all_problems = ['unflooded_bosnia_2013.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # #all_problems = ['unflooded_new_orleans_2004.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] training_domains = all_domains water_masks = [modis_utilities.get_permanent_water_mask() for d in training_domains] for i in range(len(all_domains)): if all_domains[i].ground_truth != None: water_masks[i] = all_domains[i].ground_truth #water_masks = [d.ground_truth for d in training_domains] # Manual mask training_images = [_create_adaboost_learning_image(d, modis_utilities.compute_modis_indices(d)) for d in training_domains] # add pixels in flood permanent water masks to training #training_domains.extend(all_domains) #water_masks.extend([get_permanent_water_mask() for d in all_domains]) #training_images.append([_create_adaboost_learning_image(domain, compute_modis_indices(domain)).mask(get_permanent_water_mask()) for domain in all_domains]) transformed_masks = [water_mask.multiply(2).subtract(1) for water_mask in water_masks] bands = safe_get_info(training_images[0].bandNames()) print('Computing threshold ranges.') band_splits = __compute_threshold_ranges(training_domains, training_images, water_masks, bands) counts = [safe_get_info(training_images[i].select('b1').reduceRegion(ee.Reducer.count(), training_domains[i].bounds, 250))['b1'] for i in range(len(training_images))] count = sum(counts) weights = [ee.Image(1.0 / count) for i in training_images] # Each input pixel in the training images has an equal weight # Initialize for pre-existing partially trained classifier full_classifier = [] for (c, t, alpha) in full_classifier: band_splits[c].append(t) band_splits[c] = sorted(band_splits[c]) total = 0 for i in range(len(training_images)): weights[i] = weights[i].multiply(apply_classifier(training_images[i], c, t).multiply(transformed_masks[i]).multiply(-alpha).exp()) total += safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images)): weights[i] = weights[i].divide(total) ## Apply weak classifiers to the input test image #test_image = _create_adaboost_learning_image(domain, b) while len(full_classifier) < NUM_CLASSIFIERS_TO_TRAIN: best = None for band_name in bands: # For each weak classifier # Find the best threshold that we can choose (threshold, ind, error) = _find_adaboost_optimal_threshold(training_domains, training_images, water_masks, band_name, weights, band_splits[band_name]) # Compute the sum of weighted classification errors across all of the training domains using this threshold #errors = [safe_get_info(weights[i].multiply(training_images[i].select(band_name).lte(threshold).neq(water_masks[i])).reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))] #error = sum(errors) print('%s found threshold %g with error %g' % (band_name, threshold, error)) # Record the band/threshold combination with the highest abs(error) if (best == None) or (abs(0.5 - error) > abs(0.5 - best[0])): # Classifiers that are always wrong are also good with negative alpha best = (error, band_name, threshold, ind) # add an additional split point to search between for thresholds band_splits[best[1]].insert(best[3], best[2]) print('---> Using %s < %g. Error %g.' % (best[1], best[2], best[0])) alpha = 0.5 * math.log((1 - best[0]) / best[0]) classifier = (best[1], best[2], alpha) full_classifier.append(classifier) print('---> Now have %d out of %d classifiers.' % (len(full_classifier), NUM_CLASSIFIERS_TO_TRAIN)) # update the weights weights = [weights[i].multiply(apply_classifier(training_images[i], classifier[0], classifier[1]).multiply(transformed_masks[i]).multiply(-alpha).exp()) for i in range(len(training_images))] totals = [safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))] total = sum(totals) weights = [w.divide(total) for w in weights] print(full_classifier)
def adaboost_learn(ignored=None, ignored2=None): '''Train Adaboost classifier''' EVAL_RESOLUTION = 250 # Learn this many weak classifiers NUM_CLASSIFIERS_TO_TRAIN = 50 # Load inputs for this domain and preprocess # - Kashmore does not have a good unflooded comparison location so it is left out of the training. #all_problems = ['kashmore_2010_8.xml', 'mississippi_2011_5.xml', 'mississippi_2011_6.xml', 'new_orleans_2005_9.xml', 'sf_bay_area_2011_4.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] #training_domains = [domain.unflooded_domain for domain in all_domains[:-1]] + [all_domains[-1]] # SF is unflooded # This is a cleaned up set where all the permanent water masks are known to be decent. all_problems = ['unflooded_mississippi_2010.xml', 'unflooded_new_orleans_2004.xml', 'sf_bay_area_2011_4.xml', 'unflooded_bosnia_2013.xml'] #all_problems.extend(['arkansas_city_2011_5.xml', 'baghlan_south_2014_6.xml', # 'bosnia_west_2014_5.xml', 'kashmore_north_2010_8.xml', 'slidell_2005_9.xml']) #all_problems = ['unflooded_mississippi_2010_5.xml'] all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # Try testing on radar domains #all_problems = ['rome.xml',] # #'malawi_2015_1.xml',] # #'mississippi.xml'] #all_domains = [Domain('config/domains/sentinel1/rome.xml'),] # #Domain('config/domains/sentinel1/malawi_2015_1.xml'), # #Domain('config/domains/uavsar/mississippi.xml')] #training_domains = [domain.training_domain for domain in all_domains] ## Try testing on Skybox images #all_problems = [#'gloucester_2014_10.xml',] # TODO: Need dense training data for the other images!!! # #'new_bedford_2014_10.xml', # #'sumatra_2014_10.xml',] # 'malawi_2015.xml',] #all_domains = [Domain('config/domains/skybox/' + d) for d in all_problems] #training_domains = [domain.training_domain for domain in all_domains] ## Add a bunch of lakes to the training data #lake_problems = ['Amistad_Reservoir/Amistad_Reservoir_2014-07-01_train.xml', # 'Cascade_Reservoir/Cascade_Reservoir_2014-09-01_train.xml', # 'Edmund/Edmund_2014-07-01_train.xml', # 'Hulun/Hulun_2014-07-01_train.xml', # 'Keeley/Keeley_2014-06-01_train.xml', # 'Lake_Mead/Lake_Mead_2014-09-01_train.xml', # 'Miguel_Aleman/Miguel_Aleman_2014-08-01_train.xml', # 'Oneida_Lake/Oneida_Lake_2014-06-01_train.xml', # 'Quesnel/Quesnel_2014-08-01_train.xml', # 'Shuswap/Shuswap_2014-08-01_train.xml', # 'Trikhonis/Trikhonis_2014-07-01_train.xml', # 'Pickwick_Lake/Pickwick_Lake_2014-07-01_train.xml', # 'Rogoaguado/Rogoaguado_2014-08-01_train.xml', # 'Zapatosa/Zapatosa_2014-09-01_train.xml'] #lake_domains = [Domain('/home/smcmich1/data/Floods/lakeStudy/' + d) for d in lake_problems] #all_problems += lake_problems #all_domains += lake_domains #all_problems = ['unflooded_mississippi_2010.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] #all_problems = ['sf_bay_area_2011_4.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # #all_problems = ['unflooded_bosnia_2013.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] # #all_problems = ['unflooded_new_orleans_2004.xml'] #all_domains = [Domain('config/domains/modis/' + d) for d in all_problems] training_domains = all_domains water_masks = [modis_utilities.get_permanent_water_mask() for d in training_domains] for i in range(len(all_domains)): if all_domains[i].ground_truth != None: water_masks[i] = all_domains[i].ground_truth #water_masks = [d.ground_truth for d in training_domains] # Manual mask training_images = [_create_adaboost_learning_image(d, modis_utilities.compute_modis_indices(d)) for d in training_domains] # add pixels in flood permanent water masks to training #training_domains.extend(all_domains) #water_masks.extend([get_permanent_water_mask() for d in all_domains]) #training_images.append([_create_adaboost_learning_image(domain, compute_modis_indices(domain)).mask(get_permanent_water_mask()) for domain in all_domains]) transformed_masks = [water_mask.multiply(2).subtract(1) for water_mask in water_masks] bands = safe_get_info(training_images[0].bandNames()) print 'Computing threshold ranges.' band_splits = __compute_threshold_ranges(training_domains, training_images, water_masks, bands) counts = [safe_get_info(training_images[i].select('b1').reduceRegion(ee.Reducer.count(), training_domains[i].bounds, 250))['b1'] for i in range(len(training_images))] count = sum(counts) weights = [ee.Image(1.0 / count) for i in training_images] # Each input pixel in the training images has an equal weight # Initialize for pre-existing partially trained classifier full_classifier = [] for (c, t, alpha) in full_classifier: band_splits[c].append(t) band_splits[c] = sorted(band_splits[c]) total = 0 for i in range(len(training_images)): weights[i] = weights[i].multiply(apply_classifier(training_images[i], c, t).multiply(transformed_masks[i]).multiply(-alpha).exp()) total += safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images)): weights[i] = weights[i].divide(total) ## Apply weak classifiers to the input test image #test_image = _create_adaboost_learning_image(domain, b) while len(full_classifier) < NUM_CLASSIFIERS_TO_TRAIN: best = None for band_name in bands: # For each weak classifier # Find the best threshold that we can choose (threshold, ind, error) = _find_adaboost_optimal_threshold(training_domains, training_images, water_masks, band_name, weights, band_splits[band_name]) # Compute the sum of weighted classification errors across all of the training domains using this threshold #errors = [safe_get_info(weights[i].multiply(training_images[i].select(band_name).lte(threshold).neq(water_masks[i])).reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))] #error = sum(errors) print '%s found threshold %g with error %g' % (band_name, threshold, error) # Record the band/threshold combination with the highest abs(error) if (best == None) or (abs(0.5 - error) > abs(0.5 - best[0])): # Classifiers that are always wrong are also good with negative alpha best = (error, band_name, threshold, ind) # add an additional split point to search between for thresholds band_splits[best[1]].insert(best[3], best[2]) print '---> Using %s < %g. Error %g.' % (best[1], best[2], best[0]) alpha = 0.5 * math.log((1 - best[0]) / best[0]) classifier = (best[1], best[2], alpha) full_classifier.append(classifier) print '---> Now have %d out of %d classifiers.' % (len(full_classifier), NUM_CLASSIFIERS_TO_TRAIN) # update the weights weights = [weights[i].multiply(apply_classifier(training_images[i], classifier[0], classifier[1]).multiply(transformed_masks[i]).multiply(-alpha).exp()) for i in range(len(training_images))] totals = [safe_get_info(weights[i].reduceRegion(ee.Reducer.sum(), training_domains[i].bounds, EVAL_RESOLUTION))['constant'] for i in range(len(training_images))] total = sum(totals) weights = [w.divide(total) for w in weights] print full_classifier