def train(epoch, learningRate, batchSize): r""" Training and saving my model. :param epoch: :param learningRate: :param batchSize: :return: None """ train = load_train() trainset = torch.utils.data.DataLoader(train, batch_size=batchSize) test = load_test() testset = torch.utils.data.DataLoader(test, batch_size=batchSize) vectorNet = VectorNetWithPredicting(len=9, timeStampNumber=30) # for para in vectorNet.named_parameters(): # print(para) # exit(0) vectorNet = vectorNet.to(device) lossfunc = torch.nn.MSELoss() lr = learningRate optimizer = torch.optim.Adam(vectorNet.parameters(), lr=lr) for iterator in range(epoch): for data, target in trainset: data = data.to(device) target = target.to(device) optimizer.zero_grad() offset = data[:, -1, :] # [0, 0, 0, 0, 0, maxX, maxY, ..., 0] data = data[:, 0:data.shape[1] - 1, :] outputs = vectorNet(data) # [batch size, len*2] loss = lossfunc(outputs, target) loss.backward() # print(iterator) # print('loss=',loss.item()) # t = askADE(outputs, target) # print('minDis=', t[0].item(), 'ADE=', t[1].item()) optimizer.step() print(iterator) if (iterator + 1) % 5 == 0: lr *= 0.3 optimizer = torch.optim.Adam(vectorNet.parameters(), lr=lr) minADE = torch.zeros(1).to(device) minDis = torch.zeros(1).to(device) minDis[0] = float('inf') for data, target in testset: data = data.to(device) target = target.to(device) offset = data[:, -1, :] # [0, 0, 0, 0, 0, maxX, maxY, ..., 0] data = data[:, 0:data.shape[1] - 1, :] outputs = vectorNet(data) for i in range(0, outputs.shape[1], 2): outputs[:, i] *= offset[:, 5] target[:, i] *= offset[:, 5] outputs[:, i + 1] *= offset[:, 6] target[:, i + 1] *= offset[:, 6] print(outputs) print(target) # loss = lossfunc(outputs, target) # print('loss=', loss.item()) t = askADE(outputs, target) # print('minDis=', t[0].item(), 'ADE=', t[1].item()) # minADE += t[1] if minDis > t[0]: minDis = t[0] minADE = t[1] print('minDis=', minDis.item(), 'minADE=', minADE.item()) torch.save(vectorNet, 'VectorNet-test.model')
listX.append(y[0, i]) listY.append(y[0, i + 1]) print(listX) print(listY) print('---------') plt.plot(listX, listY, 'g', linewidth=3) # ground truth x = x.to(device) myPredict = vectorNet(x) listX = [0] listY = [0] for i in range(0, y.shape[1], 2): listX.append(myPredict[0, i]) listY.append(myPredict[0, i + 1]) print(listX) print(listY) print('---------') plt.plot(listX, listY, 'yellow', linewidth=3) # predict plt.show() if __name__ == '__main__': data = load_train() dataset = torch.utils.data.DataLoader(data, batch_size=1) for data, y in dataset: offset = data[:, -1, :] # [0, 0, 0, 0, 0, maxX, maxY, ..., 0] data = data[:, 0:data.shape[1] - 1, :] viz(data, y) # exit(0)
import dataloader import transformer import numpy as np import matplotlib.pyplot as plt ''' base analysis ''' desc_map, price_map = dataloader.load_train() plt.scatter(desc_map['YearBuilt'], price_map['price'], s=10) plt.title('SalePrice_YearBuilt') plt.xlabel('YearBuilt') plt.ylabel('SalePrice') plt.show() print('SalePrice mean:', np.mean(price_map['price']), 'median:', np.median(price_map['price']), 'std:', np.std(price_map['price'])) ''' corrcoefs analysis ''' corrcoefs = [] keys = [] # corrcoefs_map ={} desc_map, price_map = dataloader.load_train() price_map['price'] = transformer.normlize(price_map['price']) for key in desc_map.keys(): desc_map[key] = transformer.normlize(desc_map[key]) corrcoefs.append(np.corrcoef(desc_map[key], price_map['price'])[0][1]) keys.append(key) # corrcoefs_map[key] zipped = zip(keys, corrcoefs) sort_zipped = sorted(zipped, key=lambda x: (x[1], x[0]), reverse=True)
#decay by time def act_score_by_time(act, time, decay_weight = 0.98): return act_score(act) * (decay_weight ** (time)) def cal_pop(train): pop = defaultdict(float) for record in train: uid, bid, act, time = record[0], record[1], record[2], record[3] pop[bid] += act_score_by_time(act, time) #pop[bid] += act_score(act) #normalize pop_sum = sum(pop) for bid in pop: pop[bid] /= pop_sum return pop def dump(dic, path): fo = open(path, 'w') for bid in dic: fo.write(str(bid) + '\t' + str(dic[bid]) + '\n') fo.close() if __name__ == "__main__": path = 'data/train.csv' train = load_train(path) pop = cal_pop(train) dump(pop, 'data/pop.csv')
def shuffle(f_submit): f = open(f_submit) submit = [] for line in f: submit.append(line) f.close() for i in range(0, len(submit)): idx = random.randint(0,len(submit)-1) submit[i], submit[idx] = submit[idx], submit[i] f = open(f_submit, 'w') for i in range(0, len(submit)): f.write(submit[i]) f.close() if __name__ == "__main__": f_train = 'data/train.csv' f_pref = 'data/long_pref.csv' f_pop = 'data/pop.csv' train = load_train(f_train) pref = load_pref(f_pref) short_pref = cal_short_pref(train) pop = load_pop(f_pop) ans = gen_ans(pref, pop, short_pref) topk = cal_topk(train) write_ans(ans, topk, 'data/pop_pref_0414.txt')