sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from config import get_config
from grid.create_grid import RegionGrid
from model.RegionEncoder import RegionEncoder
from model.utils import write_embeddings
import torch
import numpy as np

if len(sys.argv) > 1:
    N_RUNS = int(sys.argv[1])
else:
    N_RUNS = 15


c = get_config()
region_grid = RegionGrid(config=c)
region_grid.load_img_data(std_img=True)
region_grid.load_weighted_mtx()

OUT_DIR = c['data_dir_main'] + c['hyperparams_dir'] + "/"

# hyperparameters
n_nodes = len(region_grid.regions)
n_nodal_features = region_grid.feature_matrix.shape[1]
h_dim_graph = 64
h_dim_img = 32
h_dim_size = int(c['hidden_dim_size'])
context_gcn = 4
neg_samples_gcn = 10
epochs = 12
learning_rate = .1
Exemplo n.º 2
0
else:
    task = sys.argv[1]
    estimator = sys.argv[2]
    try:
       n_epochs = int(sys.argv[3])
    except IndexError:
        n_epochs = 25


assert(estimator in ['xgb', 'lasso', 'rf', 'mlp', 'ridge'])
n_folds = 5
print("K-Fold Learning - {}".format(estimator))


c = get_config()
region_grid = RegionGrid(config=c)
region_grid.load_weighted_mtx()

tmp = pd.DataFrame(region_grid.feature_matrix, index = region_grid.idx_coor_map.values())

autoencoder_embed = c['autoencoder_embedding_file']
regionencoder_embed = c['embedding_file']
gcn_all_embed = '{}gcn_all_embedding.txt'.format(c['data_dir_main'])
gcn_sg_embed = '{}gcn_skipgram_embedding.txt'.format(c['data_dir_main'])
gcn_flow_embed = '{}gcn_flow_embedding.txt'.format(c['data_dir_main'])
concat_embed ='{}concat_global_embedding.txt'.format(c['data_dir_main'])



if task == 'house_price':
    input_data = region_grid.load_housing_data(c['housing_data_file'])
Exemplo n.º 3
0
            # total counts for each time bucket
            sums = m[v].sum(axis=1)
            s = sums.sum()
            # create probabilities
            p = [x / s for x in sums]
            walk = numpy.random.choice(m[v].shape[0], size=1, p=p)[0]
            walk_sequence.append(walk)
            # set this vertex to walk from
            v = walk
        walks.append(walk_sequence)
    return walks


c = get_config()
# init region
region_grid = RegionGrid(config=c)


def e_distance(d):
    import math
    return math.exp(-d * 1.5)


distance_matrix = region_grid.get_distance_mtx(transform=e_distance)

W, t = region_grid.create_flow_matrix(c['raw_flow_file'],
                                      region_name=c['city_name'],
                                      time=8)

size = region_grid.grid_size * region_grid.grid_size
Exemplo n.º 4
0
from model.utils import write_embeddings
from model.GraphConvNet import GCN
from grid.create_grid import RegionGrid

if len(sys.argv) > 1:
    epochs = int(sys.argv[1])
    learning_rate = float(sys.argv[2])
else:
    epochs = 25
    learning_rate = .1

context_gcn = 4
neg_samples_gcn = 10

c = get_config()
region_grid = RegionGrid(config=c)
region_grid.load_weighted_mtx()

n_nodes = len(region_grid.regions)
h_dim_size = int(c['hidden_dim_size'])
n_nodal_features = region_grid.feature_matrix.shape[1]



gcn = GCN(n_features=n_nodal_features, h_dim_size=h_dim_size)
embedding = gcn.run_train_job(region_grid, n_epoch=epochs, learning_rate=learning_rate, penalty=(1, 1),
                              n_neg_samples=context_gcn, n_pos_samples=neg_samples_gcn)

if torch.cuda.is_available():
    embedding = embedding.data.cpu().numpy()
else:
Exemplo n.º 5
0
import unittest

import numpy

from config import get_config
from grid.create_grid import RegionGrid
from image.image_retrieval import get_images_for_all_no_marker

c = get_config()
TEST_REGION_GRID = RegionGrid(50,
                              poi_file=open(c["poi_file"], 'rb'),
                              load_imgs=False)


class CreateGridTest(unittest.TestCase):
    def test_image_creation(self):
        print("Get Images")
        # get_images_for_all_no_marker(TEST_REGION_GRID, "image_2")

    def test_basic_region(self):
        self.assertEqual(len(TEST_REGION_GRID.regions), 2500)

    def test_region_grid_has_proper_lat_long(self):
        region = TEST_REGION_GRID.regions['0,49']
        self.assertIsNotNone(region.points['nw'])
        self.assertIsNotNone(region.points['sw'])
        self.assertIsNotNone(region.points['ne'])
        self.assertIsNotNone(region.points['se'])

        last_y = TEST_REGION_GRID.y_space[-1]
        first_x = TEST_REGION_GRID.x_space[0]
Exemplo n.º 6
0
            colors.append(c[cat])

    plt.figure(figsize=(3, 3))
    plt.subplots_adjust(bottom=.6)
    plt.bar(range(len(d)), list(d.values()), align='center', color=colors)
    plt.xticks(range(len(d)), list(d.keys()), rotation=90, fontsize=14)
    plt.yticks(fontsize=8)
    plt.ylim(y_lim)
    plt.legend(loc='best')
    plt.savefig(fname)
    plt.clf()
    plt.close()


c = get_config()
region_grid = RegionGrid(config=c)
k = 5

H = load_embedding(c['embedding_file'])
X = region_grid.feature_matrix
nbrs = NearestNeighbors(n_neighbors=k, algorithm='ball_tree').fit(H)
distances, indices = nbrs.kneighbors(H)

for id, r in region_grid.regions.items():
    r_latent_nbrs = indices[r.index, 1:]

    all_nbrs_disconnected = 0
    for nbr in r_latent_nbrs:
        nbr_coor = region_grid.idx_coor_map[nbr]
        r_nbr = region_grid.regions[nbr_coor]
        #print("r: {}, nbr: {}".format(id, nbr_coor))
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from config import get_config
from grid.create_grid import RegionGrid
import numpy as np
import matplotlib.pyplot as plt
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
import geopandas as gpd

c = get_config()

region_grid = RegionGrid(config=c)

house_price = region_grid.load_housing_data(c['housing_data_file'])
print(house_price.head())
checkin = region_grid.get_checkin_counts(metric="mean")

trim = np.percentile(house_price['priceSqft'], 99)
print(trim)

house_price = house_price[house_price['priceSqft'] < trim]
house_price.to_csv(c['data_dir_main'] + 'zillow_house_price_trim.csv',
                   index=False)
checkin.to_csv(c['data_dir_main'] + "checkin.csv", index=False)
print(checkin.head())

#checkin.checkins.hist()
#plt.show()
#plt.clf()
if __name__ == "__main__":

    import numpy as np
    from grid.create_grid import RegionGrid

    if len(sys.argv) > 1:
        epochs = int(sys.argv[1])
        learning_rate = float(sys.argv[2])
        batch_size = int(sys.argv[3])
    else:
        epochs = 25
        learning_rate = .05
        batch_size = 25

    c = get_config()
    region_grid = RegionGrid(config=c)
    region_grid.load_img_data(std_img=True)

    img_tensor = torch.Tensor(region_grid.img_tensor)
    h_dim_size = int(c['hidden_dim_size'])

    auto_encoder = AutoEncoder(img_dims=(50, 50), h_dim_size=h_dim_size)
    embedding = auto_encoder.run_train_job(n_epoch=epochs, img_tensor=img_tensor, lr=learning_rate)

    if torch.cuda.is_available():
        embedding = embedding.data.cpu().numpy()
    else:
        embedding = embedding.data.numpy()

    write_embeddings(arr=embedding, n_nodes=region_grid.n_regions, fname=c['tile2vec_file'])
Exemplo n.º 9
0
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from model.RegionEncoder import RegionEncoder
from grid.create_grid import RegionGrid, get_images_for_grid
from config import get_config
import pickle
import torch
from model.utils import write_embeddings

# Main script if region area, or grid configuration changes

c = get_config()
# init region
region_grid = RegionGrid(config=c)
print("Initializing Region: Lat [{}, {}], Lon: [{}, {}]".format(region_grid.lat_min, region_grid.lat_max,
                                                                region_grid.lon_min, region_grid.lat_max))
# pull images
get_images_for_grid(region_grid, clear_dir=True)
# load images
region_grid.load_img_data(std_img=True)
# Compute weighted edge matrix W
W = region_grid.create_flow_matrix(c['raw_flow_file'], region_name=c['city_name'])
with open(c['flow_mtx_file'], 'wb') as f:
    pickle.dump(W, f)

region_grid.load_weighted_mtx()
region_grid.load_img_data()

# hyperparameters
n_nodes = len(region_grid.regions)
Exemplo n.º 10
0
        self.input = input
        self.format = format
        self.undirected = undirected
        self.output = output
        self.number_walks = number_walks
        self.representation_size = representation_size
        self.walk_length = walk_length

        self.max_memory_data_size = 1000000000
        self.seed = 1990
        self.window_size = 5
        self.workers = 1


# Configure project
config = get_config()
region_grid = RegionGrid(config=config)
region_grid.write_adj_list(config['adj_list_file'])
region_grid.write_edge_list(config['edge_list_file'])

h_dim_size = int(config['hidden_dim_size'])

args = DWparams(input=config['adj_list_file'],
                format='adjlist',
                undirected=True,
                output=config['deepwalk_file'],
                number_walks=100,
                representation_size=h_dim_size,
                walk_length=40)

process(args)
Exemplo n.º 11
0
            mse = mean_squared_error(y_true=y_test, y_pred=y_hat)
            mae = mean_absolute_error(y_test, y_hat)
            errors[i, 0] = mse
            errors[i, 1] = mae

        mean_cv_err = np.round(np.mean(errors, axis=0), 4)
        std_cv_err = np.round(np.std(errors, axis=0), 4)

        return mean_cv_err[0], std_cv_err[0], mean_cv_err[1], std_cv_err[
            1], errors


if __name__ == "__main__":

    c = get_config()
    region_grid = RegionGrid(config=c)
    region_grid.load_weighted_mtx()
    region_grid.load_housing_data(c['housing_data_file'])

    y_house = region_grid.get_target_var("house_price")
    results = []

    # Adjacency Average Model
    mse, mse_std, mae, mae_std, err_adj = cv_adj_mean(
        region_grid.regions, region_grid.matrix_idx_map, y_house)
    results.append(['adjacent avg', mse, mse_std, mae, mae_std])

    y_is_valid = np.where(~np.isnan(y_house))[0]
    y_house = y_house[y_is_valid]

    # Global Avg Model