def osm_nominatim(DataLocation):
#    DataLocation = r'C:\Users\Wuga\Documents\DATASETS\SFREHPDATA\HousingSales2012.csv'
    geolocator=Nominatim()
    df=DO.readfile(DataLocation)
    print df
    df['Location'][:]=[x[0:x.find('#')] if x.find('#')!=-1 else x for x in df['Location']]
    print df
    complete=df['Location'][3]+" San Francisco"
    print complete
    longlat=geolocator.geocode(complete)
    print((longlat.latitude,longlat.longitude))
    transed=[]
    #transed=[geolocator.geocode(x+" San Francisco", timeout=None) for x in df['Location'][:5]]#need to sleep for a sec
    for x in df['Location'][:5]:
        transed.append(geolocator.geocode(x+" San Francisco", timeout=10))
        time.sleep(2)
    transed_la=[x.latitude if x is not None else 0 for x in transed]
    transed_lo=[x.longitude if x is not None else 0 for x in transed]
    cood_price=zip(df['Price'],transed_la,transed_lo)
    df_cood_price=pd.DataFrame(data=cood_price, columns=['Price','Latitude','Longitude'])
    df_cood_price.to_csv(Constants.filelocations.OSM_NOMINATIM_HOUSEPRICE, header= True, index=False)
    print df_cood_price
    return

    
Exemplo n.º 2
0
def main():
    print("Begin")
    print("-----------------------------------------------------")
    # print("获取原始数据")
    DataOperation.get_init_data()
    print("-----------------------------------------------------")
    # print("数据格式检查")
    if not DataOperation.data_check():
        print("数据格式检查未通过,请检查文档")
        return
    print("数据格式检查通过")
    print("-----------------------------------------------------")
    # print("数据计算")
    OvertimePayCalculation.calculate_data()
    print("-----------------------------------------------------")
    # print("写入数据")
    DataOperation.set_data()
    print("-----------------------------------------------------")
    print("End")
def train(maxlen=100, embedding_dim=128):   # 主训练/测试代码
    start = time.time()
    l_trainX, r_trainX, ret_labels, l_topredictX, r_topredictX = do.load_data_bi_word2vec(maxlen=maxlen,
                                                                                          words_keep=50000,
                                                                                          validation_portion=0.,
                                                                                          embedding_dim=embedding_dim,
                                                                                          ma="A")
    trainY = to_categorical(ret_labels, nb_classes=3)
    del ret_labels
    lnet = tflearn.input_data([None, maxlen, embedding_dim])
    rnet = tflearn.input_data([None, maxlen, embedding_dim])
    lnet = tflearn.gru(lnet, embedding_dim, dropout=0.8, return_seq=False, dynamic=True)
    rnet = tflearn.gru(rnet, embedding_dim, dropout=0.8, return_seq=False, dynamic=True)
    net = tflearn.layers.merge_outputs([lnet, rnet])
    net = tflearn.fully_connected(net, 3, activation='softmax')
    net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                             loss='categorical_crossentropy')
    # Training
    model = tflearn.DNN(net, tensorboard_verbose=0)
    model.fit([l_trainX, r_trainX], trainY, validation_set=0.1, show_metric=True,
              batch_size=32)
    model.save('MODELS/E_W2V_GRU_TC{}_{}.dy'.format(embedding_dim, maxlen))
    # model.load('MODELS/E_W2V_GRU_TC{}_{}.dy'.format(embedding_dim, maxlen))
    del l_trainX
    del r_trainX
    del trainY
    idx2cla = {0: 'neu', 1: 'pos', 2: 'neg'}
    filename = "Result/result_{}.csv".format(datetime.datetime.now().strftime("%Y%m%d%H%M"))
    prefix = list(open('Result/A_AFTER_NRP_200', 'r').readlines())
    f = open(filename, 'w')
    f.write('SentenceId,View,Opinion\n')
    a = [0,     5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 55000]
    b = [5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 55000, 65000]
    ANS = []
    for i in range(12):
        ans = model.predict([l_topredictX[a[i]:b[i]], r_topredictX[a[i]:b[i]]])
        ANS.extend([s for s in ans])
        print("ANS.LENGTH: {}".format(len(ans)))
    for i, r in enumerate(ANS):
        f.write(prefix[i].strip())
        idx = int(np.argmax(r))
        f.write(idx2cla[idx])
        k = ""
        for l in r:
            k += ',{:.4f}'.format(l)
        f.write(k)
        f.write('\n')
    f.close()
    end = time.time()
    print("TIME COST: {}".format(end-start))
    outf = vote_by_score(filename)
    add(outf)
Exemplo n.º 4
0
def analyze_forest(data, rate):
    file = open("_final_bigger_forest_{0}.csv".format(rate), "w")
    start_trees = 5
    num_of_trees = start_trees
    percentage = []
    data_learn, data_test = DataOperation.separate_data(DataOperation.bin_data(data, 5), 2)
    true_values = list(data_test.iloc[:, len(data_test.columns)-1])
    classifier = RandomForest.RandomForest(start_trees, data_learn, rate)

    for _ in range(30):
        results = classifier.estimate_class(data_test)
        good, bad = 0, 0
        for i in range(len(true_values)):
            if results[i] == true_values[i]:
                good += 1
            else:
                bad += 1
        percentage.append(good/(good+bad)*100)
        file.write("{0};{1}%\n".format(num_of_trees, good / (good + bad) * 100))

        for _ in range(5):
            classifier.add_new_tree()
            num_of_trees += 1
    file.close()
def google_nominatim(DataLocation):
    df=DO.readfile(DataLocation)
    print df
    geolocator=geocoder.google('110 OTTER COVE TERRACE San Francisco')
    geolocator.latlng
    print geolocator.latlng
    transed=[]
    for x in df['Location']:
        transed.append(geocoder.google(x+" San Francisco", timeout=10).latlng)
        time.sleep(2)
    print transed
    nptransed=np.array(transed)
    transed_la = nptransed[:,0]
    transed_lo = nptransed[:,1]
    cood_price=zip(df['Price'],transed_la,transed_lo)
    df_cood_price=pd.DataFrame(data=cood_price, columns=['Price','Latitude','Longitude'])
    df_cood_price.to_csv(Constants.filelocations.GOOGLE_NOMINATIM_HOUSEPRICE, header= True, index=False)
    return
Exemplo n.º 6
0
def imgClassify(inputImg):
    #加载训练好的模型
    model = torch.load('resnet.pkl')
    #print(model)
    #固定模型参数
    model.eval()

    #输入格式调整
    BATCH_SIZE = 1
    my_data = DataOperation.MyDataset(inputImg,
                                      transform=transforms.ToTensor())
    my_loader = DataOperation.Data.DataLoader(dataset=my_data,
                                              batch_size=BATCH_SIZE)
    for batch_index, (test_x, test_y) in enumerate(my_loader):
        test_output = model(test_x)
        pred_y = torch.max(test_output, 1)[1].data.numpy()

    classfy = pred_y

    return classfy
def voronoiplot(df):
    Datalist=[]
    for i in df.index:
        #print i,[df['Latitude'][i],df['Longitude'][i]]
        Datalist.append([float(df['Latitude'][i]),float(df['Longitude'][i])] )
    points = np.array(Datalist)
    #print points
    vor = Voronoi(points)
    #voronoi_plot_2d(vor)
    region,vertices=Vorplots.voronoi_finite_polygons_2d(vor)
    print len(region)
    geo_json,data_csv=DO.geojsonwrite(vor,region,vertices,df)
    print data_csv
    target = open(Constants.filelocations.VORONOI_GEOJSON, 'w')
    target.write(geo_json)
    target.close()
    data_csv.to_csv(Constants.filelocations.GEOJSON_CSV_DATA, header= True, index=False)
    return

# LOCATION='/Users/Wuga/Documents/DATA/SFREHPDATA/HousingSales2012PL_GOOGLE.csv'
# df=DO.readgeofile(LOCATION)
# train,test,train_index,test_index=DO.dataseperator(df)
# print [float(train['Latitude'][1]),float(train['Longitude'][1])] 
# voronoiplot(train)
for i in range(400):
    position=[random.random()*10,random.random()*10]
    mu1=[3,3]
    mu2=[7,7]
    radius1=5
    radius2=3.5
    if (InCirclue(position, mu1, radius1) and (not InCirclue(position, mu1, radius2))) or (InCirclue(position, mu2, radius1) and (not InCirclue(position, mu2, radius2))):
        data_with_noise.append([7+random.random()/2,position[0],position[1]])
    else:
        data_with_noise.append([6+random.random()/2,position[0],position[1]])
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
colors = np.abs(np.array(data_with_noise)[:,0])
data=np.array(data_with_noise)
print colors
#for idx,data in enumerate(data_with_noise):
#    ax.scatter(data[1], data[2],data[0],c=colors[idx]*20)
ax.plot_trisurf(data[:,1],data[:,2],data[:,0],cmap=cm.jet, linewidth=0.2)
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
plt.show()
for i in range(len(data_with_noise)):
    data_with_noise[i][0]='$'+str(data_with_noise[i][0])
df=pd.DataFrame(data_with_noise,columns=['Price','Latitude','Longitude'])
print df
DO.write(df,'../../sysnthetic.csv')

        
    
Exemplo n.º 9
0
import torch.nn as nn
import torch.optim as optim
import numpy as np

#myfunction
import DataOperation

use_cuda = torch.cuda.is_available()
# Hyper Parameters
EPOCH = 50  # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 50
LR = 0.001  # learning rate

# 根据自己定义的那个MyDataset来创建数据集!注意是数据集!而不是loader迭代器
train_data = DataOperation.MyDataset('./StampDB/',
                                     'train.txt',
                                     transform=transforms.ToTensor())
test_data = DataOperation.MyDataset('./StampDB/',
                                    'test.txt',
                                    transform=transforms.ToTensor())
# valid_data = DataOperation.MyDataset('./StampDB/', 'valid.txt', transform=transforms.ToTensor())

train_loader = Data.DataLoader(dataset=train_data,
                               batch_size=BATCH_SIZE,
                               shuffle=True)
test_loader = Data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE)
# valid_loader = Data.DataLoader(dataset=valid_data,batch_size=BATCH_SIZE)

model = models.resnet50(pretrained=False)
#if torch 0.4.2
adp = torch.nn.AdaptiveAvgPool2d(list(np.array([1, 1])))
Exemplo n.º 10
0
import DataOperation
import Setting
from Model import autoencoder_A, autoencoder_B


def save_model(modelA, modelB):
    modelA.save("./modelA.h5")
    modelB.save("./modelB.h5")


if __name__ == '__main__':
    for epoch in range(Setting.epochs):
        print('Epoch {} ......'.format(epoch))
        warped_A, target_A = DataOperation.get_training_data_A()
        warped_B, target_B = DataOperation.get_training_data_B()
        loss_A = autoencoder_A.train_on_batch(warped_A, target_A)
        loss_B = autoencoder_B.train_on_batch(warped_B, target_B)
        print("lossA:{},lossB:{}".format(loss_A, loss_B))
        if epoch + 1 % 10 == 0:
            save_model(autoencoder_A, autoencoder_B)
Exemplo n.º 11
0
import numpy as np
import cv2
import DataOperation
import Setting
import matplotlib.pyplot as plt

images_A_paths = DataOperation.get_image_paths(Setting.IMAGE_PATH_A)
images_B_paths = DataOperation.get_image_paths(Setting.IMAGE_PATH_B)
print('Number of images_A is {}, number of images_B is {}'.format(
    len(images_A_paths), len(images_B_paths)))

A_Images = DataOperation.load_images(images_A_paths[:3])
B_Images = DataOperation.load_images(images_B_paths[:3])

figure = np.concatenate([A_Images, B_Images], axis=0)
print(figure.shape)
figure = figure.reshape((2, 3) + figure.shape[1:])
print(figure.shape)
figure = DataOperation.stack_image(figure)
print(figure.shape)
plt.imshow(cv2.cvtColor(figure, cv2.COLOR_RGB2BGR))
plt.show()
Exemplo n.º 12
0
import numpy as np
import cv2
import matplotlib.pyplot as plt
import keras

from PixelShuffler import PixelShuffler
import DataOperation

if __name__ == '__main__':
    autoencoder_A = keras.models.load_model(
        './modelA.h5', custom_objects={'PixelShuffler': PixelShuffler})
    autoencoder_B = keras.models.load_model(
        './modelB.h5', custom_objects={'PixelShuffler': PixelShuffler})

    warped_A, target_A = DataOperation.get_training_data_A()
    warped_B, target_B = DataOperation.get_training_data_B()

    test_A = target_A[0:3]
    test_B = target_B[0:3]

    # 进行拼接 原图 A - 解码器 A 生成的图 - 解码器 B 生成的图
    figure_A = np.stack([
        test_A,
        autoencoder_A.predict(test_A),
        autoencoder_B.predict(test_A),
    ],
                        axis=1)
    # 进行拼接  原图 B - 解码器 B 生成的图 - 解码器 A 生成的图
    figure_B = np.stack([
        test_B,
        autoencoder_B.predict(test_B),
'''
Created on 09-23-2015

@author: Wuga
'''
import folium

import geocoder
import DataOperation as DO
import DataPreprocess as DP
import Vorplots as V
import Constants
import pandas as pd

g=geocoder.osm('dublin,ireland')
loca=g.latlng
print loca
LOCATION=Constants.filelocations.DUBLIN_2010
df=DO.readgeofile(LOCATION)
train,test,train_index,test_index=DO.dataseperator(df)
map_osm = folium.Map(location=loca, zoom_start=9, max_zoom=18)
train=DP.elim(train)
train = train.reset_index(drop=True)
V.voronoiplot(DP.elim(train))
map_osm.geo_json(geo_path=r'autovoronoi.json', data_out='/Users/Wuga/Documents/DATA/SFREHPDATA/pricedata.json',data=pd.read_csv('/Users/Wuga/Documents/DATA/SFREHPDATA/pricedata.csv'),columns=['Id','Price'],key_on='feature.id', threshold_scale=[200000,250000,300000,350000,400000,500000], fill_color='YlOrRd', fill_opacity=0.5, line_opacity=0.5, legend_name='SF house price')
map_osm.create_map(path=Constants.filelocations.MAP_HTML)