示例#1
0
def test_impute():
    from pyimpute import load_training_rasters, load_targets, impute

    # Load training data
    train_xs, train_y = load_training_rasters(response_raster,
                                              explanatory_rasters)

    # Train a classifier
    from sklearn.ensemble import RandomForestClassifier
    clf = RandomForestClassifier(n_estimators=10, n_jobs=1)
    clf.fit(train_xs, train_y)

    # Load targets
    target_xs, raster_info = load_targets(explanatory_rasters)

    # Go...
    impute(target_xs,
           clf,
           raster_info,
           outdir=TMPOUT,
           linechunk=400,
           class_prob=True,
           certainty=True)

    assert os.path.exists(os.path.join(TMPOUT, "responses.tif"))
    assert os.path.exists(os.path.join(TMPOUT, "certainty.tif"))
    assert os.path.exists(os.path.join(TMPOUT, "probability_90.tif"))
def main():
    # Define the known data points or "training" data
    explanatory_fields = "tmin12c tmax8c p_ph_c pmean_wntrc pmean_sumrc irr_lands gt_demc grwsnc d2u2c".split()
    explanatory_rasters = [os.path.join(TRAINING_DIR, r, "hdr.adf") for r in explanatory_fields]
    response_raster = os.path.join(TRAINING_DIR, 'iso_zns3-27/hdr.adf')

    # Take a random stratified sample
    selected = stratified_sample_raster(response_raster,
        target_sample_size=20, min_sample_proportion=0.01)
  
    # Load the training rasters using the sampled subset
    train_xs, train_y = load_training_rasters(response_raster, 
        explanatory_rasters, selected)
    print(train_xs.shape, train_y.shape)

    # Train the classifier
    clf = ExtraTreesClassifier(n_estimators=10, n_jobs=1)
    clf.fit(train_xs, train_y)
    print(clf)

    # Cross validate
    k = 5
    scores = cross_validation.cross_val_score(clf, train_xs, train_y, cv=k)
    print("%d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (k, scores.mean() * 100, scores.std() * 200))

    # ... Other model assessment

    # Run the model on the current data; i.e. predict itself
    print("Imputing response rasters FOR CURRENT DATA")
    target_xs, raster_info = load_targets(explanatory_rasters)

    impute(target_xs, clf, raster_info, outdir="_aez_output_current",
           linechunk=400, class_prob=True, certainty=True)

    sys.exit()

    years = ['2070s']
    for year in years:
        print("Loading target explanatory raster data, swapping out for %s climate data" % year)

        fdir = os.path.join(TRAINING_DIR, "../RCP85/%s/" % year)

        # swap out datasets that are predicted to change over time (i.e the climate data only)
        climate_rasters = "grwsnc pmean_sumrc pmean_wntrc tmax8c tmin12c".split()
        new_explanatory_rasters = OrderedDict(zip(explanatory_fields, explanatory_rasters))
        for cr in climate_rasters:
            new_explanatory_rasters[cr] = fdir + cr + "/hdr.adf"

        target_xs, raster_info = load_targets(new_explanatory_rasters.values())

        print("Imputing response rasters")
        impute(target_xs, clf, raster_info, outdir="_aez_output_%s" % year,
               linechunk=40, class_prob=True, certainty=True)
示例#3
0
def main():

    # Define the known data points or "training" data
    explanatory_fields = "d100 dd0 dd5 fday ffp gsdd5 gsp map mat_tenths mmax_tenths mmindd0 mmin_tenths mtcm_tenths mtwm_tenths sday".split()
    explanatory_rasters = [os.path.join(TRAINING_DIR, "current_" + r + ".img") for r in explanatory_fields]
    response_shapes = os.path.join(TRAINING_DIR, "DF.shp")

    # Load the training rasters using the sampled subset
    try:
        cached = json.load(open("_cached_training.json"))
        train_xs = np.array(cached['train_xs'])
        train_y = np.array(cached['train_y'])
    except IOError:
        train_xs, train_y = load_training_vector(response_shapes, 
            explanatory_rasters, response_field='GRIDCODE')
        cache = {'train_xs': train_xs.tolist(), 'train_y': train_y.tolist()}
        with open("_cached_training.json", 'w') as fh:
            fh.write(json.dumps(cache))

    print(train_xs.shape, train_y.shape)

    # Train the classifier
    clf = ExtraTreesClassifier(n_estimators=120, n_jobs=3)
    clf.fit(train_xs, train_y)
    print(clf)

    # Cross validate
    k = 5
    scores = cross_validation.cross_val_score(clf, train_xs, train_y, cv=k)
    print("%d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (k, scores.mean() * 100, scores.std() * 200))

    # Run the model on the current data; i.e. predict current conditions
    print("Imputing response rasters FOR CURRENT DATA")
    target_xs, raster_info = load_targets(explanatory_rasters)

    impute(target_xs, clf, raster_info, outdir="_usfs_output_current",
           linechunk=400, class_prob=True, certainty=True)

    years = ['2060']
    for year in years:
        print("Loading target explanatory raster data, swapping out for %s climate data" % year)

        # Swap out for future climate rasters
        new_explanatory_rasters = [os.path.join(TRAINING_DIR, "Ensemble_rcp60_y%s_%s.img" % (year, r)) 
                                    for r in explanatory_fields]

        target_xs, raster_info = load_targets(new_explanatory_rasters)

        print("Imputing response rasters")
        impute(target_xs, clf, raster_info, outdir="_usfs_output_%s" % year,
               linechunk=400, class_prob=True, certainty=True)
示例#4
0
def test_impute():
    from pyimpute import load_training_rasters, load_targets, impute

    # Load training data
    train_xs, train_y = load_training_rasters(response_raster, explanatory_rasters)

    # Train a classifier
    from sklearn.ensemble import RandomForestClassifier
    clf = RandomForestClassifier(n_estimators=10, n_jobs=1)
    clf.fit(train_xs, train_y)

    # Load targets
    target_xs, raster_info = load_targets(explanatory_rasters)

    # Go...
    impute(target_xs, clf, raster_info, outdir=TMPOUT,
           linechunk=400, class_prob=True, certainty=True)

    assert os.path.exists(os.path.join(TMPOUT, "responses.tif"))
    assert os.path.exists(os.path.join(TMPOUT, "certainty.tif"))
    assert os.path.exists(os.path.join(TMPOUT, "probability_90.tif"))
示例#5
0
def main():

    # Define the known data points or "training" data
    explanatory_fields = "d100 dd0 dd5 fday ffp gsdd5 gsp map mat_tenths mmax_tenths mmindd0 mmin_tenths mtcm_tenths mtwm_tenths sday".split(
    )
    explanatory_rasters = [
        os.path.join(TRAINING_DIR, "current_" + r + ".img")
        for r in explanatory_fields
    ]
    response_shapes = os.path.join(TRAINING_DIR, "DF.shp")

    # Load the training rasters using the sampled subset
    try:
        cached = json.load(open("_cached_training.json"))
        train_xs = np.array(cached['train_xs'])
        train_y = np.array(cached['train_y'])
    except IOError:
        train_xs, train_y = load_training_vector(response_shapes,
                                                 explanatory_rasters,
                                                 response_field='GRIDCODE')
        cache = {'train_xs': train_xs.tolist(), 'train_y': train_y.tolist()}
        with open("_cached_training.json", 'w') as fh:
            fh.write(json.dumps(cache))

    print(train_xs.shape, train_y.shape)

    # Train the classifier
    clf = ExtraTreesClassifier(n_estimators=120, n_jobs=3)
    clf.fit(train_xs, train_y)
    print(clf)

    # Cross validate
    k = 5
    scores = cross_validation.cross_val_score(clf, train_xs, train_y, cv=k)
    print("%d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" %
          (k, scores.mean() * 100, scores.std() * 200))

    # Run the model on the current data; i.e. predict current conditions
    print("Imputing response rasters FOR CURRENT DATA")
    target_xs, raster_info = load_targets(explanatory_rasters)

    impute(target_xs,
           clf,
           raster_info,
           outdir="_usfs_output_current",
           linechunk=400,
           class_prob=True,
           certainty=True)

    years = ['2060']
    for year in years:
        print(
            "Loading target explanatory raster data, swapping out for %s climate data"
            % year)

        # Swap out for future climate rasters
        new_explanatory_rasters = [
            os.path.join(TRAINING_DIR, "Ensemble_rcp60_y%s_%s.img" % (year, r))
            for r in explanatory_fields
        ]

        target_xs, raster_info = load_targets(new_explanatory_rasters)

        print("Imputing response rasters")
        impute(target_xs,
               clf,
               raster_info,
               outdir="_usfs_output_%s" % year,
               linechunk=400,
               class_prob=True,
               certainty=True)