예제 #1
0
def test_df_sorting():
    df_c, df_p = create_df([
        random.sample(range(10, 300), 50),
        random.sample(range(10, 300), 50),
        random.sample(range(10, 300), 50)
    ])

    def do_sort(col, ascending):
        srt_c = df_c.sort_values(by=col, ascending=ascending)
        srt_p = df_p.sort_values(by=col, ascending=ascending)
        assert_eq(srt_c, srt_p)

    # single column
    for asc in [True, False]:
        for c in range(0, 3):
            do_sort(c, asc)

    # multi column
    for asc in [True, False]:
        for c1 in range(0, 3):
            for c2 in range(0, 3):
                if c1 != c2:
                    do_sort([c1, c2], asc)
                for c3 in range(0, 3):
                    if c1 != c2 and c1 != c3 and c2 != c3:
                        do_sort([c1, c2, c3], asc)
예제 #2
0
def test_df_joining():
    df_c_1, df_p_1 = create_df([
        random.sample(range(10, 300), 50),
        random.sample(range(10, 300), 50),
        random.sample(range(10, 300), 50)
    ])

    df_c_2, df_p_2 = create_df([
        random.sample(range(10, 300), 50),
        random.sample(range(10, 300), 50),
        random.sample(range(10, 300), 50)
    ])

    def do_join(col):
        df_c_1.set_index(col, inplace=True)
        df_c_2.set_index(col, inplace=True)

        #df_p_1.set_index(col, inplace=True, drop=False)
        #df_p_2.set_index(col, inplace=True, drop=False)
        print(col)
        srt_c = df_c_1.join(on=col, other=df_c_2)
        print(srt_c)
        #print(df_p_1)
        #print(df_p_2)
        srt_p = df_p_1.join(on=col, other=df_p_2, lsuffix="l", rsuffix="r")
        print(srt_p)
        #assert_eq(srt_c, srt_p, sort=True)

    # multi column
    for asc in [True, False]:
        for c1 in range(0, 3):
            # single column
            do_join([c1])
            for c2 in range(0, 3):
                if c1 != c2:
                    do_join([c1, c2])
                for c3 in range(0, 3):
                    if c1 != c2 and c1 != c3 and c2 != c3:
                        do_join([c1, c2, c3])
예제 #3
0
def plot_corr_model(y, y_pred, name='plotcorr.png'):
    df = utils.create_df(y, y_pred)
    font_size = 22

    labelx = [r'$\phi_{cep}$', r'$\theta$', r'$L_p$']
    labely = [r'predict $\phi_{cep}$', r'predict $\theta$', r'predict $L_p$']

    for i in range(len(pnt)):
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 7), dpi=200)

        ax.scatter(df[pnt[i]], df[pnp[i]], s=2)

        red_line = [df[pnt[i]].min(), df[pnt[i]].max()]
        ax.plot(red_line, red_line, color='red')

        ax.set_xlabel(labelx[i], fontsize=font_size)
        ax.set_ylabel(labely[i], fontsize=font_size)

        ax.tick_params(axis='both', which='major', labelsize=15)

        plt.tight_layout()
        plt.savefig('../picture/' + name[:-3] + str(i) + '.png')
        # plt.clf()
        plt.close()
예제 #4
0
k = 10

y_min = np.array([0, 0, 0.08])
y_max = np.array([180, 75, 0.78])

for i in range(k):
    model_fcnn.append(load_model(fcnn_dir + 'model' + str(i) +'.h5'))
    model_pca.append(load_model(pca_dir + 'model' + str(i) +'.h5'))
    model_aug.append(load_model(aug_dir + 'model' + str(i) +'.h5'))

    x, y = load_data(fcnn_dir + 'data' + str(i) + '.h5')
    data_fcnn['x'], data_fcnn['y'] = x, y
    y_pred = model_fcnn[i].predict(x)
    y_pred, y = utils.postprocessing(y_pred, y, y_min, y_max)
    accuracy_fcnn.append(utils.metric(utils.create_df(y, y_pred)))

    x, y = load_data(pca_dir + 'data' + str(i) + '.h5')
    data_pca['x'], data_pca['y'] = x, y
    y_pred = model_pca[i].predict(x)
    y_pred, y = utils.postprocessing(y_pred, y, y_min, y_max)
    accuracy_pca.append(utils.metric(utils.create_df(y, y_pred)))

    x, y = load_data(aug_dir + 'data' + str(i) + '.h5')
    data_aug['x'], data_aug['y'] = x, y
    y_pred = model_aug[i].predict(x)
    y_pred, y = utils.postprocessing(y_pred, y, y_min, y_max)
    accuracy_aug.append(utils.metric(utils.create_df(y, y_pred)))

print('fcnn')
print(get_mean_result(accuracy_fcnn))
예제 #5
0
    text_feature_path = args.text_feature_path
    label_des_path = args.label_des_path
    os.makedirs(save_root, exist_ok=True)
    ###################################################################################################################
    if text_edge_weight:
        save_seed_idxname = '{}/seed_{}knn_k{}_sw{}_top{}.npy'.format(
            save_root, text_dist_def, text_k, text_self_weight, text_topk)
    else:
        save_seed_idxname = '{}/seed_{}knn_k{}_sw{}negw_top{}.npy'.format(
            save_root, text_dist_def, text_k, text_self_weight, text_topk)

    textdf_file_path = os.path.join(save_root, 'data_frame.pkl')
    if os.path.exists(textdf_file_path):
        df = pd.read_pickle(textdf_file_path)
    else:
        df = utils.create_df(text_imglist_path, textdf_file_path)
    class_num = np.unique(np.array(df['y']))
    if os.path.exists(save_seed_idxname):
        seed_index = np.load(save_seed_idxname)
        print('Read Seed File from {}.'.format(save_seed_idxname), flush=True)
    else:
        if text_edge_weight:
            save_textknn_filename = '{}/textknn_{}knn_k{}_sw{}'.format(
                save_root, text_dist_def, text_k, text_self_weight)
        else:
            save_textknn_filename = '{}/textknn_{}knn_k{}_sw{}negw'.format(
                save_root, text_dist_def, text_k, text_self_weight)
        # select seed and return index
        if text_k > 0:
            if use_multisource:
                df = utils.get_knn_conf_multisource(
예제 #6
0
    def process_data(self):
        """
        Run a processing
        :return:
        """
        # ===========================================
        # CREATE PANDAS DATAFRAME FROM RAW CSV FILE
        # ===========================================
        df_raw = ut.create_df(self.raw_csv_filename)

        # ===========================================
        # GENERATE PROCESSED CSV
        # ===========================================
        output_dwellings = self.create_ouput_files_raw_csv_processing(
        )['output_dwellings']
        output_pois = self.create_ouput_files_raw_csv_processing(
        )['output_pois']

        ut.sanitize_and_separate_df_pois(
            df=df_raw,
            output_file_dwelling=output_dwellings,
            output_file_pois=output_pois,
            struct_type_col=self.params['struct_type_col'],
            null_feat_cat_replacement="missing",
            cols_to_keep_df=self.params['cols_df'],
            cols_to_keep_poi=self.params['cols_poi'],
            new_col_names_df=self.params['new_names_df'],
            new_col_names_poi=self.params['new_names_poi'],
            residential_struct_category=self.params['res_struct_val'])
        # ===========================================
        # SPLIT CSV BY WARD
        # ===========================================
        if self.district:
            prov = self.province
            dist = self.district
            self.dir_with_ward_subdirs = self.ea_demarcation_dir.joinpath(
                prov, dist)

        ward_id_col = self.params['ward_id_col']
        ut.split_csv_into_wards(csv_file=output_dwellings,
                                ward_id_col=ward_id_col,
                                output_folder=self.dir_with_ward_subdirs,
                                suffix="df")
        ut.split_csv_into_wards(csv_file=output_pois,
                                ward_id_col=ward_id_col,
                                output_folder=self.dir_with_ward_subdirs,
                                suffix="poi")

        # ===========================================
        # CREATE SHP FILES
        # ===========================================
        ut.create_shp_for_each_ward(self.dir_with_ward_subdirs,
                                    crs=self.params["crs"],
                                    lon_col=self.params["lon"],
                                    lat_col=self.params["lat"])

        # =========================================================
        # APPEND HH LISTING BASED POPULATION COUNT, STRUCTURE COUNT
        # AND BUILDING COUNTS FROM SATELLITE IMAGERY
        # ==========================================================
        self.append_building_attributes_to_ward_level_ea_shp()
예제 #7
0
#choose global_color_histograms: image will be procesed and change space color and save global_color_hist in resuts_GVHistogram (create file )
global_color_histograms = False
# Final evaluation and Test
performEvaluation = 0
performTest = 1
# which of the three different final methods is performed, 1 BGR, 2 LUV, 3 Wavelet
method = 2
#type of space and level
if (method == 1):
    spaceType = "BGR"
    level = 2
elif (method == 2):
    spaceType = "LUV"
    level = 2

dfDataset = create_df(pathDS)
dfQuery = create_df(pathQueries)
dfQueryTest = create_df(pathQueriesTest)

if global_color_histograms == True:
    for i in range(len(dfDataset)):
        dfSingle = dfDataset.iloc[i]
        imgBGR = get_full_image(dfSingle, pathDS)
        imageName = dfSingle['Image']
        channel0Single, channel1Single, channel2Single = global_color_hist(
            imgBGR, spaceType, pathprep_resultDS, imageName)
        save_global_color_hist(channel0Single, channel1Single, channel2Single,
                               dfSingle, spaceType, imageName, pathResults)

if build_dataset == True:
    # Read Images
os.mkdir('../result/pca_enumeration/')
path = '../result/pca_enumeration/'

y_min = np.array([0, 0, 0.08])
y_max = np.array([180, 75, 0.78])

for i in range(100, 2, -10):
    acc = []
    mod = []
    for j in range(3):
        mod.append(load_model(path + 'model' + str(i) + '_' + str(j) + '.h5'))
        x, y = load_data(path + 'data' + str(i) + '_' + str(j) + '.h5')

        y_pred = mod[j].predict(x)
        y_pred, y = utils.postprocessing(y_pred, y, y_min, y_max)
        acc.append(utils.metric(utils.create_df(y, y_pred)))

    accuracy[i] = acc

result = []
met = 'average'
crit = 'MRE'

for k in accuracy.keys():
    result.append([
        accuracy[k][0][met][crit], accuracy[k][1][met][crit],
        accuracy[k][2][met][crit]
    ])

df_accuracy = pd.DataFrame(result, index=list(accuracy.keys()))
df_accuracy = df_accuracy.T
예제 #9
0
    scaler = StandardScaler()

    scaler.fit(x_train)

    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)

    pca = PCA(n_components=24)
    pca.fit(x_train)

    x_train = pca.transform(x_train)
    x_test = pca.transform(x_test)

    #x_train = x_train[:, 3:]
    #x_test = x_test[:, 3:]

    print(x_train.shape)

    x_train, x_test, x_min, x_max = utils.preprocessing(x_train, x_test)
    y_train, y_test, y_min, y_max = utils.preprocessing(y_train, y_test)

    model, history = train_nn(x_train, y_train, epochs)

    y_pred = model.predict(x_test)
    y_pred, y = utils.postprocessing(y_pred, y_test, y_min, y_max)
    print(utils.metric(utils.create_df(y, y_pred)))

    #model.save(path + 'model' + str(i) +'.h5')
    #utils.save_data(x_test, y_test, i, path)